Compare commits
42 Commits
Author | SHA1 | Date |
---|---|---|
|
d09a8cf3ef | |
|
a347f05b4c | |
|
aab4fc9f7d | |
|
88e1e574c3 | |
|
e067155c4c | |
|
d701060d6e | |
|
6d59b1b408 | |
|
c1044db093 | |
|
29de6525cc | |
|
2a3fad344e | |
|
187ea3db43 | |
|
1f460aa32a | |
|
49957a64f1 | |
|
7a82c46c96 | |
|
58aa4d00ce | |
|
184287e210 | |
|
2847af917b | |
|
575d8b52c7 | |
|
fb3bad4ce7 | |
|
6045d3928e | |
|
380b5eb56c | |
|
3876094820 | |
|
7984600608 | |
|
bc4a340609 | |
|
0af9f38bb2 | |
|
0790a15c64 | |
|
7638e36e2a | |
|
72e498d024 | |
|
50076ebcf4 | |
|
a426a3a184 | |
|
a206e2ed77 | |
|
ca5c3a6938 | |
|
55c018a56b | |
|
849dfc203b | |
|
ea26cd8a5d | |
|
6922879a08 | |
|
2d01fdc5af | |
|
0412605d6e | |
|
1202dcf775 | |
|
130170d94c | |
|
364f578177 | |
|
471883b63c |
|
@ -2,8 +2,21 @@ name: Pre-commit
|
|||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches: [main]
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
LC_ALL: en_US.UTF-8
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
@ -11,27 +24,43 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
pre-commit:
|
||||
name: pre-commit
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Checkout code
|
||||
- name: Checkout containers/ramalama-stack
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
# https://github.com/actions/checkout/issues/249
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
python-version: "3.11"
|
||||
cache: pip
|
||||
cache-dependency-path: |
|
||||
**/requirements*.txt
|
||||
.pre-commit-config.yaml
|
||||
|
||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
- name: Run pre-commit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Verify if there are any diff files after pre-commit
|
||||
run: git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||
|
||||
- name: Verify if there are any new files after pre-commit
|
||||
run: |
|
||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||
unstaged_files=$(git ls-files --others --exclude-standard)
|
||||
if [ -n "$unstaged_files" ]; then
|
||||
echo "There are uncommitted new files, run pre-commit locally and commit again"
|
||||
echo "$unstaged_files"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build, test, and upload PyPI package
|
||||
name: Build and publish PyPI package
|
||||
|
||||
on:
|
||||
push:
|
||||
|
@ -37,23 +37,27 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Checkout containers/ramalama-stack
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
# for setuptools-scm
|
||||
# https://github.com/actions/checkout/issues/249
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
|
||||
- name: Build and inspect python package
|
||||
uses: hynek/build-and-inspect-python-package@c52c3a4710070b50470d903818a7b25115dcd076 # v2.13.0
|
||||
|
||||
- name: Run 'test-build.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||
|
||||
# push to Test PyPI on
|
||||
# - a new GitHub release is published
|
||||
# - a PR is merged into main branch
|
||||
publish-test-pypi:
|
||||
name: Publish packages to test.pypi.org
|
||||
# environment: publish-test-pypi
|
||||
if: |
|
||||
github.repository_owner == 'containers' && (
|
||||
github.event.action == 'published' ||
|
||||
|
@ -65,10 +69,9 @@ jobs:
|
|||
id-token: write
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-package
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
@ -88,7 +91,6 @@ jobs:
|
|||
# - a new GitHub release is published
|
||||
publish-pypi:
|
||||
name: Publish release to pypi.org
|
||||
# environment: publish-pypi
|
||||
if: |
|
||||
github.repository_owner == 'containers' && github.event.action == 'published'
|
||||
permissions:
|
||||
|
@ -96,13 +98,11 @@ jobs:
|
|||
id-token: write
|
||||
# allow gh release upload
|
||||
contents: write
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-package
|
||||
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
name: Test Container
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
inference_model:
|
||||
description: Model to download and inference via RamaLama
|
||||
required: false
|
||||
default: llama3.2:3b
|
||||
schedule:
|
||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
||||
|
||||
env:
|
||||
LC_ALL: en_US.UTF-8
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test-lls-integration:
|
||||
name: test-container
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Checkout containers/ramalama-stack
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
# https://github.com/actions/checkout/issues/249
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
# install podman
|
||||
sudo apt-get -y install podman
|
||||
|
||||
# install packaged version of ramalama
|
||||
uv venv
|
||||
uv pip install ramalama
|
||||
|
||||
- name: Cache Ramalama store
|
||||
id: ramalama-store-cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: ~/.local/share/ramalama
|
||||
key: ramalama-store-${{ env.INFERENCE_MODEL }}
|
||||
|
||||
- name: Download model to serve with Ramalama
|
||||
if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
|
||||
run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
|
||||
|
||||
- name: Run 'test-container.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-container.sh
|
||||
|
||||
- name: Run 'test-ui-linux.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-ui-linux.sh
|
||||
|
||||
- name: Upload logs
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
if: always()
|
||||
with:
|
||||
name: logs-test-container
|
||||
retention-days: 5
|
||||
path: |
|
||||
**/*.log
|
|
@ -1,34 +1,82 @@
|
|||
name: Test External Providers
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'src/ramalama_stack/**'
|
||||
- 'tests/**'
|
||||
- '.github/workflows/test-external-providers.yml'
|
||||
- pyproject.toml
|
||||
- requirements.txt
|
||||
- uv.lock
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'src/ramalama_stack/**'
|
||||
- 'tests/**'
|
||||
- '.github/workflows/test-external-providers.yml'
|
||||
- pyproject.toml
|
||||
- requirements.txt
|
||||
- uv.lock
|
||||
|
||||
env:
|
||||
LC_ALL: en_US.UTF-8
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test-external-providers:
|
||||
name: test-external-providers
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
inference_model:
|
||||
- 'llama3.2:3b'
|
||||
- 'granite3.2:2b'
|
||||
env:
|
||||
INFERENCE_MODEL: llama3.2:3b-instruct-fp16
|
||||
INFERENCE_MODEL: ${{ matrix.inference_model }}
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Checkout repository
|
||||
- name: Set INFERENCE_MODEL_NO_COLON for logging artifacts
|
||||
run: echo "INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout containers/ramalama-stack
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
# https://github.com/actions/checkout/issues/249
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
uv sync
|
||||
uv pip install -e .
|
||||
|
||||
# temporary hack for file writing that should be done by the pip setup script
|
||||
# https://github.com/containers/ramalama-stack/issues/53
|
||||
mkdir -p ~/.llama/distributions/ramalama/
|
||||
cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
|
||||
cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
|
||||
|
||||
- name: Run 'test-build.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||
|
||||
- name: Cache Ramalama store
|
||||
id: ramalama-store-cache
|
||||
|
@ -44,11 +92,14 @@ jobs:
|
|||
- name: Run 'test-external-providers.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
||||
|
||||
- name: Run 'test-rag.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-rag.sh
|
||||
|
||||
- name: Upload logs
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
if: always()
|
||||
with:
|
||||
name: logs
|
||||
name: logs-test-external-providers-${{ env.INFERENCE_MODEL_NO_COLON }}
|
||||
retention-days: 5
|
||||
path: |
|
||||
**/*.log
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
name: Test LLS Integration
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
inference_model:
|
||||
description: Model to download and inference via RamaLama
|
||||
required: false
|
||||
default: llama3.2:3b
|
||||
schedule:
|
||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
||||
|
||||
env:
|
||||
LC_ALL: en_US.UTF-8
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test-lls-integration:
|
||||
name: test-lls-integration
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
||||
steps:
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Checkout containers/ramalama-stack
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
# https://github.com/actions/checkout/issues/249
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
uv venv
|
||||
|
||||
# install packaged version of ramalama-stack
|
||||
uv pip install ramalama-stack
|
||||
|
||||
# update llama-stack version to main branch
|
||||
uv pip install git+https://github.com/meta-llama/llama-stack.git@main
|
||||
|
||||
# temporary hack for file writing that should be done by the pip setup script
|
||||
# https://github.com/containers/ramalama-stack/issues/53
|
||||
mkdir -p ~/.llama/distributions/ramalama/
|
||||
cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
|
||||
cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
|
||||
|
||||
- name: Run 'test-build.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||
|
||||
- name: Cache Ramalama store
|
||||
id: ramalama-store-cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: ~/.local/share/ramalama
|
||||
key: ramalama-store-${{ env.INFERENCE_MODEL }}
|
||||
|
||||
- name: Download model to serve with Ramalama
|
||||
if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
|
||||
run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
|
||||
|
||||
- name: Run 'test-external-providers.sh'
|
||||
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
||||
|
||||
- name: Upload logs
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
if: always()
|
||||
with:
|
||||
name: logs-test-lls-integration
|
||||
retention-days: 5
|
||||
path: |
|
||||
**/*.log
|
|
@ -176,3 +176,4 @@ cython_debug/
|
|||
# Anything additional
|
||||
distributions/
|
||||
src/ramalama_stack/_version.py
|
||||
.python-version
|
||||
|
|
|
@ -16,14 +16,14 @@ repos:
|
|||
- id: check-shebang-scripts-are-executable
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.4
|
||||
rev: v0.11.12
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [ --fix ]
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.6.3
|
||||
rev: 0.7.9
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
- id: uv-export
|
||||
|
@ -31,9 +31,20 @@ repos:
|
|||
"--frozen",
|
||||
"--no-hashes",
|
||||
"--no-emit-project",
|
||||
"--no-default-groups",
|
||||
"--output-file=requirements.txt"
|
||||
]
|
||||
|
||||
- repo: https://github.com/koalaman/shellcheck-precommit
|
||||
rev: v0.10.0
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
|
||||
- repo: https://github.com/rhysd/actionlint
|
||||
rev: v1.7.7
|
||||
hooks:
|
||||
- id: actionlint
|
||||
args: [
|
||||
"-shellcheck=",
|
||||
"-pyflakes="
|
||||
]
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
3.10
|
|
@ -0,0 +1,316 @@
|
|||
# Contributing to ramalama-stack
|
||||
|
||||
We'd love to have you join the community!
|
||||
Below summarizes the processes that we follow.
|
||||
|
||||
## Topics
|
||||
`
|
||||
* [Reporting Issues](#reporting-issues)
|
||||
* [Working On Issues](#working-on-issues)
|
||||
* [Contributing To ramalama-stack](#contributing-to-ramalama-stack-1)
|
||||
* [Submitting Pull Requests](#submitting-pull-requests)
|
||||
* [Communications](#communications)
|
||||
* [Code of Conduct](#code-of-conduct)
|
||||
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
Before reporting an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) to see if someone else has already reported it.
|
||||
If so, feel free to add your scenario, or additional information, to the discussion.
|
||||
Or simply "subscribe" to it to be notified when it is updated.
|
||||
Please do not add comments like "+1" or "I have this issue as well" without adding any new information.
|
||||
Instead, please add a thumbs-up emoji to the original report.
|
||||
|
||||
Note: Older closed issues/PRs are automatically locked.
|
||||
If you have a similar problem please open a new issue instead of commenting.
|
||||
|
||||
If you find a new issue with the project we'd love to hear about it!
|
||||
The most important aspect of a bug report is that it includes enough information for us to reproduce it.
|
||||
To make this easier, there are three types of issue templates you can use.
|
||||
* If you have a bug to report, please use *Bug Report* template.
|
||||
* If you have an idea to propose, please use the *Feature Request* template.
|
||||
* If your issue is something else, please use the default *Blank issue* template.
|
||||
|
||||
Please include as much detail as possible, including all requested fields in the template.
|
||||
Not having all requested information makes it much harder to find and fix issues.
|
||||
A reproducer is the best thing you can include.
|
||||
Reproducers make finding and fixing issues much easier for maintainers.
|
||||
The easier it is for us to reproduce a bug, the faster it'll be fixed!
|
||||
|
||||
Please don't include any private/sensitive information in your issue!
|
||||
Security issues should NOT be reported via Github and should instead be reported via the process described [here](https://github.com/containers/common/blob/main/SECURITY.md).
|
||||
|
||||
## Working On Issues
|
||||
|
||||
Once you have decided to contribute to ramalama-stack by working on an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) looking for any that are unassigned.
|
||||
If you want to work on a specific issue that is already assigned but does not appear to be actively being worked on, please ping the assignee in the issue and ask if you can take over.
|
||||
If they do not respond after several days, you can notify a maintainer to have the issue reassigned.
|
||||
When working on an issue, please assign it to yourself.
|
||||
If you lack permissions to do so, you can ping the `@containers/ramalama-stack-maintainers` group to have a maintainer set you as assignee.
|
||||
|
||||
## Contributing To ramalama-stack
|
||||
|
||||
This section describes how to make a contribution to ramalama-stack.
|
||||
|
||||
### Prepare your environment
|
||||
|
||||
The minimum version of Python required to use ramalama-stack is Python 3.11
|
||||
|
||||
### Fork and clone ramalama-stack
|
||||
|
||||
First, you need to fork this project on GitHub.
|
||||
Then clone your fork locally:
|
||||
```shell
|
||||
$ git clone git@github.com:<you>/ramalama-stack
|
||||
$ cd ./ramalama-stack/
|
||||
```
|
||||
|
||||
### Install required tools
|
||||
|
||||
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
|
||||
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
You can install the dependencies by running:
|
||||
|
||||
```bash
|
||||
cd ramalama-stack
|
||||
uv sync
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.11`)
|
||||
> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
|
||||
> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
|
||||
|
||||
### Adding dependencies
|
||||
|
||||
Please add dependencies using the [uv-documented approach](https://docs.astral.sh/uv/concepts/projects/dependencies/#adding-dependencies).
|
||||
|
||||
This should update both the `pyproject.toml` and the `uv.lock` file.
|
||||
|
||||
The `requirements.txt` file should be updated as well by `pre-commit` - you can also do this manually via `uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt`.
|
||||
|
||||
## Testing
|
||||
|
||||
ramalama-stack provides a small suite of tests in the `test/` directory.
|
||||
Most pull requests should be accompanied by test changes covering the changes in the PR.
|
||||
Pull requests without tests will receive additional scrutiny from maintainers and may be blocked from merging unless tests are added.
|
||||
Maintainers will decide if tests are not necessary during review.
|
||||
|
||||
### Types of Tests
|
||||
|
||||
There are several types of tests run by ramalama-stack's upstream CI.
|
||||
* Pre-commit checks
|
||||
* Functional testing
|
||||
* Integration testing
|
||||
* PyPI build and upload testing
|
||||
|
||||
## Documentation
|
||||
|
||||
Make sure to update the documentation if needed.
|
||||
ramalama-stack is documented via its [README](https://github.com/containers/ramalama-stack/blob/main/docs/README.md) and files in the `docs/` directory.
|
||||
|
||||
## Submitting Pull Requests
|
||||
|
||||
No Pull Request (PR) is too small!
|
||||
Typos, additional comments in the code, new test cases, bug fixes, new features, more documentation, ... it's all welcome!
|
||||
|
||||
While bug fixes can first be identified via an "issue" in Github, that is not required.
|
||||
It's ok to just open up a PR with the fix, but make sure you include the same information you would have included in an issue - like how to reproduce it.
|
||||
|
||||
PRs for new features should include some background on what use cases the new code is trying to address.
|
||||
When possible and when it makes sense, try to break up larger PRs into smaller ones - it's easier to review smaller code changes.
|
||||
But only if those smaller ones make sense as stand-alone PRs.
|
||||
|
||||
Regardless of the type of PR, all PRs should include:
|
||||
* Well-documented code changes, both through comments in the code itself and high-quality commit messages.
|
||||
* Additional tests. Ideally, they should fail w/o your code change applied.
|
||||
* Documentation updates to reflect the changes made in the pull request.
|
||||
|
||||
Squash your commits into logical pieces of work that might want to be reviewed separately from the rest of the PRs.
|
||||
Squashing down to just one commit is also acceptable since in the end the entire PR will be reviewed anyway.
|
||||
When in doubt, squash.
|
||||
|
||||
When your PR fixes an issue, please note that by including `Fixes: #00000` in the commit description.
|
||||
More details on this are below, in the "Describe your changes in Commit Messages" section.
|
||||
|
||||
The ramalama-stack repo follows a one-ack policy for merges.
|
||||
PRs will be approved and merged by a repo owner.
|
||||
Two reviews are required for a pull request to merge, including sourcery.ai
|
||||
|
||||
### Describe your Changes in Commit Messages
|
||||
|
||||
Describe your problem.
|
||||
Whether your patch is a one-line bug fix or 5000 lines of a new feature, there must be an underlying problem that motivated you to do this work.
|
||||
Convince the reviewer that there is a problem worth fixing and that it makes sense for them to read past the first paragraph.
|
||||
|
||||
Describe user-visible impact.
|
||||
Straight up crashes and lockups are pretty convincing, but not all bugs are that blatant.
|
||||
Even if the problem was spotted during code review, describe the impact you think it can have on users.
|
||||
Keep in mind that the majority of users run packages provided by distributions, so include anything that could help route your change downstream.
|
||||
|
||||
Quantify optimizations and trade-offs.
|
||||
If you claim improvements in performance, memory consumption, stack footprint, or binary size, include
|
||||
numbers that back them up.
|
||||
But also describe non-obvious costs.
|
||||
Optimizations usually aren’t free but trade-offs between CPU, memory, and readability; or, when it comes to heuristics, between different workloads.
|
||||
Describe the expected downsides of your optimization so that the reviewer can weigh costs against
|
||||
benefits.
|
||||
|
||||
Once the problem is established, describe what you are actually doing about it in technical detail.
|
||||
It’s important to describe the change in plain English for the reviewer to verify that the code is behaving as you intend it to.
|
||||
|
||||
Solve only one problem per patch.
|
||||
If your description starts to get long, that’s a sign that you probably need to split up your patch.
|
||||
|
||||
If the patch fixes a logged bug entry, refer to that bug entry by number and URL.
|
||||
If the patch follows from a mailing list discussion, give a URL to the mailing list archive.
|
||||
Please format these lines as `Fixes:` followed by the URL or, for Github bugs, the bug number preceded by a #.
|
||||
For example:
|
||||
|
||||
```
|
||||
Fixes: #00000
|
||||
Fixes: https://github.com/containers/ramalama-stack/issues/00000
|
||||
Fixes: https://issues.redhat.com/browse/RHEL-00000
|
||||
Fixes: RHEL-00000
|
||||
```
|
||||
|
||||
However, try to make your explanation understandable without external resources.
|
||||
In addition to giving a URL to a mailing list archive or bug, summarize the relevant points of the discussion that led to the patch as submitted.
|
||||
|
||||
If you want to refer to a specific commit, don’t just refer to the SHA-1 ID of the commit.
|
||||
Please also include the one-line summary of the commit, to make it easier for reviewers to know what it is about. If the commit was merged in GitHub, referring to a GitHub PR number is also a good option, as that will retain all discussion from development, and makes including a summary less critical.
|
||||
Examples:
|
||||
|
||||
```
|
||||
Commit f641c2d9384e ("fix bug in rm -fa parallel deletes") [...]
|
||||
PR #00000
|
||||
```
|
||||
|
||||
When referring to a commit by SHA, you should also be sure to use at least the first twelve characters of the SHA-1 ID.
|
||||
The ramalama-stack repository holds a lot of objects, making collisions with shorter IDs a real possibility.
|
||||
Bear in mind that, even if there is no collision with your six-character ID now, that condition may change five years from now.
|
||||
|
||||
The following git config settings can be used to add a pretty format for outputting the above style in the git log or git show commands:
|
||||
|
||||
```
|
||||
[core]
|
||||
abbrev = 12
|
||||
[pretty]
|
||||
fixes = Fixes: %h (\"%s\")
|
||||
```
|
||||
|
||||
### Sign your PRs
|
||||
|
||||
The sign-off is a line at the end of the explanation for the patch.
|
||||
Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
||||
The rules are simple: if you can certify the below (from [developercertificate.org](https://developercertificate.org/)):
|
||||
|
||||
```
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
```
|
||||
|
||||
Then you just add a line to every git commit message:
|
||||
|
||||
Signed-off-by: Joe Smith <joe.smith@email.com>
|
||||
|
||||
Use your real name (sorry, no pseudonyms or anonymous contributions).
|
||||
|
||||
If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`.
|
||||
|
||||
### Continuous Integration
|
||||
|
||||
All pull requests automatically run ramalama-stack's test suite.
|
||||
|
||||
There is always additional complexity added by automation, and so it sometimes can fail for any number of reasons.
|
||||
This includes post-merge testing on all branches, which you may occasionally see [red bars on the status graph](https://github.com/containers/ramalama-stack/blob/main/docs/ci.md).
|
||||
|
||||
Most notably, the tests will occasionally flake.
|
||||
If you see a single test on your PR has failed, and you do not believe it is caused by your changes, you can rerun the tests.
|
||||
If you lack permissions to rerun the tests, please ping the maintainers using the `@containers/ramalama-stack-maintainers` group and request that the failing test be rerun.
|
||||
|
||||
If you see multiple test failures, you may wish to check the status graph mentioned above.
|
||||
When the graph shows mostly green bars on the right, it's a good indication the main branch is currently stable.
|
||||
Alternating red/green bars is indicative of a testing "flake", and should be examined (anybody can do this):
|
||||
|
||||
* *One or a small handful of tests, on a single task, (i.e. specific distro/version)
|
||||
where all others ran successfully:* Frequently the cause is networking or a brief
|
||||
external service outage. The failed tasks may simply be re-run by pressing the
|
||||
corresponding button on the task details page.
|
||||
|
||||
* *Multiple tasks failing*: Logically this should be due to some shared/common element.
|
||||
If that element is identifiable as a networking or external service (e.g. packaging
|
||||
repository outage), a re-run should be attempted.
|
||||
|
||||
* *All tasks are failing*: If a common element is **not** identifiable as
|
||||
temporary (i.e. container registry outage), please seek assistance via
|
||||
[the methods below](#communications) as this may be early indication of
|
||||
a more serious problem.
|
||||
|
||||
In the (hopefully) rare case there are multiple, contiguous red bars, this is
|
||||
a ***very bad*** sign. It means additional merges are occurring despite an uncorrected
|
||||
or persistently faulty condition. This risks additional bugs being introduced
|
||||
and further complication of necessary corrective measures. Most likely people
|
||||
are aware and working on this, but it doesn't hurt [to confirm and/or try and help
|
||||
if possible.](#communications).
|
||||
|
||||
## Communications
|
||||
|
||||
If you need help, you can contact the maintainers using the channels mentioned in RamaLama's [communications](https://github.com/containers/ramalama/blob/main/README.md#community) document.
|
||||
|
||||
For discussions around issues/bugs and features, you can use the GitHub
|
||||
[issues](https://github.com/containers/ramalama-stack/issues)
|
||||
and
|
||||
[PRs](https://github.com/containers/ramalama-stack/pulls)
|
||||
tracking system.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
As contributors and maintainers of the projects under the [Containers](https://github.com/containers) repository,
|
||||
and in the interest of fostering an open and welcoming community, we pledge to
|
||||
respect all people who contribute through reporting issues, posting feature
|
||||
requests, updating documentation, submitting pull requests or patches, and other
|
||||
activities to any of the projects under the containers umbrella. The full code of conduct guidelines can be
|
||||
found [here](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md).
|
||||
|
||||
|
||||
### Bot Interactions
|
||||
|
||||
ramalama-stack uses [sourcery.ai](https://sourcery.ai/) for AI code reviews.
|
||||
|
||||
You can read their docs [here](https://docs.sourcery.ai/Code-Review/#interacting-with-sourcery) on how to interact with the bot.
|
2
LICENSE
2
LICENSE
|
@ -186,7 +186,7 @@
|
|||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
Copyright 2025 The RamaLama Stack Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
recursive-include ramalama_stack/providers.d *
|
||||
include ramalama_stack/ramalama-run.yaml
|
81
README.md
81
README.md
|
@ -1,5 +1,80 @@
|
|||
# ramalama-stack
|
||||
|
||||

|
||||

|
||||

|
||||
[](https://pypi.org/project/ramalama-stack/)
|
||||
[](https://pypi.org/project/ramalama-stack/)
|
||||
[](https://github.com/containers/ramalama-stack/blob/main/LICENSE)
|
||||
|
||||
An external provider for [Llama Stack](https://github.com/meta-llama/llama-stack) allowing for the use of [RamaLama](https://ramalama.ai/) for inference.
|
||||
|
||||
## Installing
|
||||
|
||||
You can install `ramalama-stack` from PyPI via `pip install ramalama-stack`
|
||||
|
||||
This will install Llama Stack and RamaLama as well if they are not installed already.
|
||||
|
||||
## Usage
|
||||
|
||||
> [!WARNING]
|
||||
> The following workaround is currently needed to run this provider - see https://github.com/containers/ramalama-stack/issues/53 for more details
|
||||
> ```bash
|
||||
> curl --create-dirs --output ~/.llama/providers.d/remote/inference/ramalama.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
|
||||
> curl --create-dirs --output ~/.llama/distributions/ramalama/ramalama-run.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/ramalama-run.yaml
|
||||
> ```
|
||||
|
||||
1. First you will need a RamaLama server running - see [the RamaLama project](https://github.com/containers/ramalama) docs for more information.
|
||||
|
||||
2. Ensure you set your `INFERENCE_MODEL` environment variable to the name of the model you have running via RamaLama.
|
||||
|
||||
3. You can then run the RamaLama external provider via `llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml`
|
||||
|
||||
> [!NOTE]
|
||||
> You can also run the RamaLama external provider inside of a container via [Podman](https://podman.io/)
|
||||
> ```bash
|
||||
> podman run \
|
||||
> --net=host \
|
||||
> --env RAMALAMA_URL=http://0.0.0.0:8080 \
|
||||
> --env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
> quay.io/ramalama/llama-stack
|
||||
> ```
|
||||
|
||||
This will start a Llama Stack server which will use port 8321 by default. You can test this works by configuring the Llama Stack Client to run against this server and
|
||||
sending a test request.
|
||||
- If your client is running on the same machine as the server, you can run `llama-stack-client configure --endpoint http://0.0.0.0:8321 --api-key none`
|
||||
- If your client is running on a different machine, you can run `llama-stack-client configure --endpoint http://<hostname>:8321 --api-key none`
|
||||
- The client should give you a message similar to `Done! You can now use the Llama Stack Client CLI with endpoint <endpoint>`
|
||||
- You can then test the server by running `llama-stack-client inference chat-completion --message "tell me a joke"` which should return something like
|
||||
|
||||
```bash
|
||||
ChatCompletionResponse(
|
||||
completion_message=CompletionMessage(
|
||||
content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs
|
||||
and Schrödinger\'s cat?" The librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."',
|
||||
role='assistant',
|
||||
stop_reason='end_of_turn',
|
||||
tool_calls=[]
|
||||
),
|
||||
logprobs=None,
|
||||
metrics=[
|
||||
Metric(metric='prompt_tokens', value=14.0, unit=None),
|
||||
Metric(metric='completion_tokens', value=63.0, unit=None),
|
||||
Metric(metric='total_tokens', value=77.0, unit=None)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Llama Stack User Interface
|
||||
|
||||
Llama Stack includes an experimental user-interface, check it out
|
||||
[here](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distribution/ui).
|
||||
|
||||
To deploy the UI, run this:
|
||||
|
||||
```bash
|
||||
podman run -d --rm --network=container:ramalama --name=streamlit quay.io/redhat-et/streamlit_client:0.1.0
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If running on MacOS (not Linux), `--network=host` doesn't work. You'll need to publish additional ports `8321:8321` and `8501:8501` with the ramalama serve command,
|
||||
> then run with `network=container:ramalama`.
|
||||
>
|
||||
> If running on Linux use `--network=host` or `-p 8501:8501` instead. The streamlit container will be able to access the ramalama endpoint with either.
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
# ramalama-stack CI
|
||||
|
||||
| Job | Description | Status |
|
||||
| --- | ----------- | ------ |
|
||||
| [Pre-commit](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pre-commit.yml) | Runs pre-commit checks |  |
|
||||
| [Test External Providers](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-external-providers.yml) | Tests the current `ramalama-stack` branch against the latest released versions of `ramalama` and `llama-stack` |  |
|
||||
| [Test LLS Integration](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-lls-integration.yml) | Tests the latest released versions of `ramalama` and `ramalama-stack` against the current `llama-stack` main branch |  |
|
||||
| [Test Container](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-container.yml) | Tests the latest tagged container image of `ramalama/llama-stack` run via Podman |  |
|
||||
| [Build and publish PyPI package](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pypi.yml) | Builds, tests, and publishes `ramalama-stack` package |  |
|
|
@ -4,31 +4,47 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "ramalama-stack"
|
||||
version = "0.1.0"
|
||||
description = "Llama Stack Provider for Ramalama Inference"
|
||||
description = "An external provider for Llama Stack allowing for the use of RamaLama for inference."
|
||||
authors = [{ name = "The RamaLama Stack Authors" }]
|
||||
readme = "README.md"
|
||||
license = {file = "LICENSE"}
|
||||
license = "Apache-2.0"
|
||||
license-files = ["LICENSE"]
|
||||
keywords = ["ramalama", "llama", "AI"]
|
||||
requires-python = ">=3.10"
|
||||
requires-python = ">=3.11"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"llama-stack>=0.2.3",
|
||||
"ramalama>=0.7.5",
|
||||
"urllib3",
|
||||
"faiss-cpu",
|
||||
"autoevals",
|
||||
"six",
|
||||
"pydantic",
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"datasets",
|
||||
"fastapi",
|
||||
"httpx",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"requests",
|
||||
"uvicorn",
|
||||
"aiohttp>=3.12.2",
|
||||
"aiosqlite>=0.21.0",
|
||||
"autoevals>=0.0.129",
|
||||
"blobfile>=3.0.0",
|
||||
"chardet>=3.0.0",
|
||||
"datasets>=3.6.0",
|
||||
"fastapi>=0.115.12",
|
||||
"httpx>=0.28.1",
|
||||
"llama-stack==0.2.9",
|
||||
"mcp>=1.9.2",
|
||||
"numpy>=2.2.6",
|
||||
"openai>=1.82.0",
|
||||
"opentelemetry-exporter-otlp-proto-http>=1.33.1",
|
||||
"opentelemetry-sdk>=1.33.1",
|
||||
"peft>=0.15.2",
|
||||
"psutil>=7.0.0",
|
||||
"pydantic>=2.11.5",
|
||||
"pymilvus>=2.5.10",
|
||||
"ramalama==0.9.0",
|
||||
"requests>=2.32.3",
|
||||
"sentence-transformers>=3.0.0",
|
||||
"six>=1.17.0",
|
||||
"sqlalchemy>=2.0.41",
|
||||
"torch>=2.7.0",
|
||||
"trl>=0.18.1",
|
||||
"urllib3>=2.4.0",
|
||||
"uvicorn>=0.34.2",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"pre-commit>=3.0.4,<4.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
@ -43,9 +59,10 @@ local_scheme = "no-local-version"
|
|||
|
||||
[tool.setuptools]
|
||||
package-dir = { "" = "src" }
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
dependencies = { file = ["requirements.txt"] }
|
||||
[tool.setuptools.package-data]
|
||||
"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]
|
||||
|
||||
[tool.ruff]
|
||||
extend-exclude = ["*.ipynb"]
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
-r requirements.txt
|
||||
|
||||
pre-commit>=3.0.4,<4.0
|
487
requirements.txt
487
requirements.txt
|
@ -1,3 +1,484 @@
|
|||
ramalama>=0.7.5
|
||||
llama-stack>=0.2.3
|
||||
llama-stack-client>=0.2.2
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
|
||||
accelerate==1.7.0
|
||||
# via
|
||||
# peft
|
||||
# trl
|
||||
aiohappyeyeballs==2.6.1
|
||||
# via aiohttp
|
||||
aiohttp==3.12.7
|
||||
# via
|
||||
# fsspec
|
||||
# llama-stack
|
||||
# ramalama-stack
|
||||
aiosignal==1.3.2
|
||||
# via aiohttp
|
||||
aiosqlite==0.21.0
|
||||
# via ramalama-stack
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==4.9.0
|
||||
# via
|
||||
# httpx
|
||||
# llama-stack-client
|
||||
# mcp
|
||||
# openai
|
||||
# sse-starlette
|
||||
# starlette
|
||||
argcomplete==3.6.2
|
||||
# via ramalama
|
||||
attrs==25.3.0
|
||||
# via
|
||||
# aiohttp
|
||||
# jsonschema
|
||||
# referencing
|
||||
autoevals==0.0.129
|
||||
# via ramalama-stack
|
||||
blobfile==3.0.0
|
||||
# via ramalama-stack
|
||||
braintrust-core==0.0.59
|
||||
# via autoevals
|
||||
certifi==2025.4.26
|
||||
# via
|
||||
# httpcore
|
||||
# httpx
|
||||
# requests
|
||||
chardet==5.2.0
|
||||
# via ramalama-stack
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
chevron==0.14.0
|
||||
# via autoevals
|
||||
click==8.2.1
|
||||
# via
|
||||
# llama-stack-client
|
||||
# uvicorn
|
||||
colorama==0.4.6 ; sys_platform == 'win32'
|
||||
# via
|
||||
# click
|
||||
# tqdm
|
||||
datasets==3.6.0
|
||||
# via
|
||||
# ramalama-stack
|
||||
# trl
|
||||
deprecated==1.2.18
|
||||
# via
|
||||
# opentelemetry-api
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# opentelemetry-semantic-conventions
|
||||
dill==0.3.8
|
||||
# via
|
||||
# datasets
|
||||
# multiprocess
|
||||
distro==1.9.0
|
||||
# via
|
||||
# llama-stack-client
|
||||
# openai
|
||||
ecdsa==0.19.1
|
||||
# via python-jose
|
||||
fastapi==0.115.12
|
||||
# via ramalama-stack
|
||||
filelock==3.18.0
|
||||
# via
|
||||
# blobfile
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# torch
|
||||
# transformers
|
||||
fire==0.7.0
|
||||
# via llama-stack
|
||||
frozenlist==1.6.0
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
fsspec==2025.3.0
|
||||
# via
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# torch
|
||||
googleapis-common-protos==1.70.0
|
||||
# via opentelemetry-exporter-otlp-proto-http
|
||||
greenlet==3.2.2 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
|
||||
# via sqlalchemy
|
||||
grpcio==1.67.1
|
||||
# via pymilvus
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
# llama-stack
|
||||
# uvicorn
|
||||
hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
||||
# via huggingface-hub
|
||||
httpcore==1.0.9
|
||||
# via httpx
|
||||
httpx==0.28.1
|
||||
# via
|
||||
# llama-stack
|
||||
# llama-stack-client
|
||||
# mcp
|
||||
# openai
|
||||
# ramalama-stack
|
||||
httpx-sse==0.4.0
|
||||
# via mcp
|
||||
huggingface-hub==0.32.4
|
||||
# via
|
||||
# accelerate
|
||||
# datasets
|
||||
# llama-stack
|
||||
# peft
|
||||
# sentence-transformers
|
||||
# tokenizers
|
||||
# transformers
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# httpx
|
||||
# requests
|
||||
# yarl
|
||||
importlib-metadata==8.6.1
|
||||
# via opentelemetry-api
|
||||
jinja2==3.1.6
|
||||
# via
|
||||
# llama-stack
|
||||
# torch
|
||||
jiter==0.10.0
|
||||
# via openai
|
||||
joblib==1.5.1
|
||||
# via scikit-learn
|
||||
jsonschema==4.24.0
|
||||
# via
|
||||
# autoevals
|
||||
# llama-stack
|
||||
jsonschema-specifications==2025.4.1
|
||||
# via jsonschema
|
||||
llama-stack==0.2.9
|
||||
# via ramalama-stack
|
||||
llama-stack-client==0.2.9
|
||||
# via llama-stack
|
||||
lxml==5.4.0
|
||||
# via blobfile
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
mcp==1.9.2
|
||||
# via ramalama-stack
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
milvus-lite==2.4.12 ; sys_platform != 'win32'
|
||||
# via pymilvus
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
multidict==6.4.4
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
multiprocess==0.70.16
|
||||
# via datasets
|
||||
networkx==3.5
|
||||
# via torch
|
||||
numpy==2.2.6
|
||||
# via
|
||||
# accelerate
|
||||
# datasets
|
||||
# pandas
|
||||
# peft
|
||||
# ramalama-stack
|
||||
# scikit-learn
|
||||
# scipy
|
||||
# transformers
|
||||
nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via
|
||||
# nvidia-cudnn-cu12
|
||||
# nvidia-cusolver-cu12
|
||||
# torch
|
||||
nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via
|
||||
# nvidia-cusolver-cu12
|
||||
# torch
|
||||
nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via
|
||||
# nvidia-cufft-cu12
|
||||
# nvidia-cusolver-cu12
|
||||
# nvidia-cusparse-cu12
|
||||
# torch
|
||||
nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
openai==1.84.0
|
||||
# via
|
||||
# llama-stack
|
||||
# ramalama-stack
|
||||
opentelemetry-api==1.33.1
|
||||
# via
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# opentelemetry-sdk
|
||||
# opentelemetry-semantic-conventions
|
||||
opentelemetry-exporter-otlp-proto-common==1.33.1
|
||||
# via opentelemetry-exporter-otlp-proto-http
|
||||
opentelemetry-exporter-otlp-proto-http==1.33.1
|
||||
# via ramalama-stack
|
||||
opentelemetry-proto==1.33.1
|
||||
# via
|
||||
# opentelemetry-exporter-otlp-proto-common
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
opentelemetry-sdk==1.33.1
|
||||
# via
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# ramalama-stack
|
||||
opentelemetry-semantic-conventions==0.54b1
|
||||
# via opentelemetry-sdk
|
||||
packaging==25.0
|
||||
# via
|
||||
# accelerate
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# peft
|
||||
# transformers
|
||||
pandas==2.2.3
|
||||
# via
|
||||
# datasets
|
||||
# llama-stack-client
|
||||
# pymilvus
|
||||
peft==0.15.2
|
||||
# via ramalama-stack
|
||||
pillow==11.2.1
|
||||
# via
|
||||
# llama-stack
|
||||
# sentence-transformers
|
||||
polyleven==0.9.0
|
||||
# via autoevals
|
||||
prompt-toolkit==3.0.51
|
||||
# via
|
||||
# llama-stack
|
||||
# llama-stack-client
|
||||
propcache==0.3.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# googleapis-common-protos
|
||||
# opentelemetry-proto
|
||||
# pymilvus
|
||||
psutil==7.0.0
|
||||
# via
|
||||
# accelerate
|
||||
# peft
|
||||
# ramalama-stack
|
||||
pyaml==25.5.0
|
||||
# via llama-stack-client
|
||||
pyarrow==20.0.0
|
||||
# via datasets
|
||||
pyasn1==0.6.1
|
||||
# via
|
||||
# python-jose
|
||||
# rsa
|
||||
pycryptodomex==3.23.0
|
||||
# via blobfile
|
||||
pydantic==2.11.5
|
||||
# via
|
||||
# fastapi
|
||||
# llama-stack
|
||||
# llama-stack-client
|
||||
# mcp
|
||||
# openai
|
||||
# pydantic-settings
|
||||
# ramalama-stack
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.9.1
|
||||
# via mcp
|
||||
pygments==2.19.1
|
||||
# via rich
|
||||
pymilvus==2.5.10
|
||||
# via ramalama-stack
|
||||
python-dateutil==2.9.0.post0
|
||||
# via pandas
|
||||
python-dotenv==1.1.0
|
||||
# via
|
||||
# llama-stack
|
||||
# pydantic-settings
|
||||
# pymilvus
|
||||
python-jose==3.5.0
|
||||
# via llama-stack
|
||||
python-multipart==0.0.20
|
||||
# via mcp
|
||||
pytz==2025.2
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# accelerate
|
||||
# autoevals
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# peft
|
||||
# pyaml
|
||||
# transformers
|
||||
ramalama==0.9.0
|
||||
# via ramalama-stack
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
regex==2024.11.6
|
||||
# via
|
||||
# tiktoken
|
||||
# transformers
|
||||
requests==2.32.3
|
||||
# via
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# llama-stack
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# ramalama-stack
|
||||
# tiktoken
|
||||
# transformers
|
||||
rich==14.0.0
|
||||
# via
|
||||
# llama-stack
|
||||
# llama-stack-client
|
||||
rpds-py==0.25.1
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
rsa==4.9.1
|
||||
# via python-jose
|
||||
safetensors==0.5.3
|
||||
# via
|
||||
# accelerate
|
||||
# peft
|
||||
# transformers
|
||||
scikit-learn==1.7.0
|
||||
# via sentence-transformers
|
||||
scipy==1.15.3
|
||||
# via
|
||||
# scikit-learn
|
||||
# sentence-transformers
|
||||
sentence-transformers==4.1.0
|
||||
# via ramalama-stack
|
||||
setuptools==80.9.0
|
||||
# via
|
||||
# llama-stack
|
||||
# pymilvus
|
||||
# torch
|
||||
# triton
|
||||
six==1.17.0
|
||||
# via
|
||||
# ecdsa
|
||||
# python-dateutil
|
||||
# ramalama-stack
|
||||
sniffio==1.3.1
|
||||
# via
|
||||
# anyio
|
||||
# llama-stack-client
|
||||
# openai
|
||||
sqlalchemy==2.0.41
|
||||
# via ramalama-stack
|
||||
sse-starlette==2.3.6
|
||||
# via mcp
|
||||
starlette==0.46.2
|
||||
# via
|
||||
# fastapi
|
||||
# llama-stack
|
||||
# mcp
|
||||
sympy==1.14.0
|
||||
# via torch
|
||||
termcolor==3.1.0
|
||||
# via
|
||||
# fire
|
||||
# llama-stack
|
||||
# llama-stack-client
|
||||
threadpoolctl==3.6.0
|
||||
# via scikit-learn
|
||||
tiktoken==0.9.0
|
||||
# via llama-stack
|
||||
tokenizers==0.21.1
|
||||
# via transformers
|
||||
torch==2.7.0
|
||||
# via
|
||||
# accelerate
|
||||
# peft
|
||||
# ramalama-stack
|
||||
# sentence-transformers
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# llama-stack-client
|
||||
# milvus-lite
|
||||
# openai
|
||||
# peft
|
||||
# sentence-transformers
|
||||
# transformers
|
||||
transformers==4.52.4
|
||||
# via
|
||||
# peft
|
||||
# sentence-transformers
|
||||
# trl
|
||||
triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
trl==0.18.1
|
||||
# via ramalama-stack
|
||||
typing-extensions==4.14.0
|
||||
# via
|
||||
# aiosqlite
|
||||
# anyio
|
||||
# fastapi
|
||||
# huggingface-hub
|
||||
# llama-stack-client
|
||||
# openai
|
||||
# opentelemetry-sdk
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# referencing
|
||||
# sentence-transformers
|
||||
# sqlalchemy
|
||||
# torch
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
ujson==5.10.0
|
||||
# via pymilvus
|
||||
urllib3==2.4.0
|
||||
# via
|
||||
# blobfile
|
||||
# ramalama-stack
|
||||
# requests
|
||||
uvicorn==0.34.3
|
||||
# via
|
||||
# mcp
|
||||
# ramalama-stack
|
||||
wcwidth==0.2.13
|
||||
# via prompt-toolkit
|
||||
wrapt==1.17.2
|
||||
# via deprecated
|
||||
xxhash==3.5.0
|
||||
# via datasets
|
||||
yarl==1.20.0
|
||||
# via aiohttp
|
||||
zipp==3.22.0
|
||||
# via importlib-metadata
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
from setuptools import setup
|
||||
from setuptools.command.install import install
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
class CustomInstallCommand(install):
|
||||
def run(self):
|
||||
# Run the standard install
|
||||
super().run()
|
||||
|
||||
# Write 'providers.d' to '~/.llama/providers.d'
|
||||
# This allows users to see the remote providers
|
||||
providers_dir = os.path.join(self.install_lib, "ramalama_stack", "providers.d")
|
||||
target_dir_1 = os.path.expanduser("~/.llama/providers.d")
|
||||
try:
|
||||
os.makedirs(target_dir_1, exist_ok=True)
|
||||
shutil.copytree(providers_dir, target_dir_1, dirs_exist_ok=True)
|
||||
print(f"Copied {providers_dir} to {target_dir_1}")
|
||||
except Exception as error:
|
||||
print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
|
||||
raise
|
||||
|
||||
# Write `ramalama-run.yaml` to '~/.llama/distributions/ramalama'
|
||||
# This allows users to run the stack
|
||||
run_yaml = os.path.join(self.install_lib, "ramalama_stack", "ramalama-run.yaml")
|
||||
target_dir_2 = os.path.expanduser("~/.llama/distributions/ramalama")
|
||||
try:
|
||||
os.makedirs(target_dir_2, exist_ok=True)
|
||||
shutil.copy(run_yaml, target_dir_2)
|
||||
print(f"Copied {run_yaml} to {target_dir_2}")
|
||||
except Exception as error:
|
||||
print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
|
||||
raise
|
||||
|
||||
|
||||
setup(cmdclass={"install": CustomInstallCommand})
|
|
@ -11,8 +11,8 @@ def get_provider_spec() -> ProviderSpec:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="ramalama",
|
||||
pip_packages=["ramalama>=0.7.5", "faiss-cpu"],
|
||||
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
||||
config_class="config.RamalamaImplConfig",
|
||||
module="ramalama_adapter",
|
||||
module="ramalama_stack",
|
||||
),
|
||||
)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
adapter:
|
||||
adapter_type: ramalama
|
||||
pip_packages: ["ramalama>=0.7.5"]
|
||||
pip_packages: ["ramalama>=0.8.5", "pymilvus"]
|
||||
config_class: ramalama_stack.config.RamalamaImplConfig
|
||||
module: ramalama_stack
|
||||
api_dependencies: []
|
|
@ -5,6 +5,7 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
|
@ -20,13 +21,10 @@ providers:
|
|||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
- provider_id: milvus
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:distributions/ramalama}/faiss_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/milvus_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -40,13 +38,16 @@ providers:
|
|||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:llamastack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -81,6 +82,13 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
post_training:
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
config:
|
||||
checkpoint_format: huggingface
|
||||
distributed_backend: null
|
||||
device: cpu
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
@ -92,20 +100,32 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
- provider_id: wolfram-alpha
|
||||
provider_type: remote::wolfram-alpha
|
||||
config:
|
||||
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: ramalama
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
|
@ -116,8 +136,8 @@ tool_groups:
|
|||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
- toolgroup_id: builtin::wolfram_alpha
|
||||
provider_id: wolfram-alpha
|
||||
server:
|
||||
port: 8321
|
||||
external_providers_dir: ./providers.d
|
||||
external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
|
|
@ -191,7 +191,6 @@ class RamalamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
)
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
model = await self.register_helper.register_model(model)
|
||||
res = await self.client.models.list()
|
||||
available_models = [m.id async for m in res]
|
||||
# Ramalama handles paths on MacOS and Linux differently
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#!/bin/bash
|
||||
|
||||
function test_file_writes {
|
||||
PROVIDER_DIR="$HOME/.llama/providers.d"
|
||||
RUN_YAML="$HOME/.llama/distributions/ramalama/ramalama-run.yaml"
|
||||
|
||||
# check for PROVIDER_DIR
|
||||
if [ -d "$PROVIDER_DIR" ]; then
|
||||
echo "$PROVIDER_DIR found"
|
||||
else
|
||||
echo "$PROVIDER_DIR not found"
|
||||
echo "===> test_file_writes: fail"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check for RUN_YAML
|
||||
if [ -f "$RUN_YAML" ]; then
|
||||
echo "$RUN_YAML found"
|
||||
else
|
||||
echo "$RUN_YAML not found"
|
||||
echo "===> test_file_writes: fail"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# return if all checks are successfully
|
||||
echo "===> test_file_writes: pass"
|
||||
return
|
||||
}
|
||||
|
||||
main() {
|
||||
echo "===> starting 'test-build'..."
|
||||
test_file_writes
|
||||
echo "===> 'test-build' completed successfully!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
exit 0
|
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
|
||||
main() {
|
||||
echo "===> starting 'test-container'..."
|
||||
start_and_wait_for_ramalama_server
|
||||
test_ramalama_models
|
||||
test_ramalama_chat_completion
|
||||
start_and_wait_for_llama_stack_container
|
||||
test_llama_stack_models
|
||||
test_llama_stack_openai_models
|
||||
test_llama_stack_chat_completion
|
||||
test_llama_stack_openai_chat_completion
|
||||
echo "===> 'test-container' completed successfully!"
|
||||
}
|
||||
|
||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
# shellcheck disable=SC1091
|
||||
source "$TEST_UTILS/utils.sh"
|
||||
main "$@"
|
||||
exit 0
|
|
@ -1,96 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
function start_and_wait_for_ramalama_server {
|
||||
# Start ramalama serve in background with logging to 'ramalama.log'
|
||||
nohup uv run ramalama serve "$INFERENCE_MODEL" > ramalama.log 2>&1 &
|
||||
RAMALAMA_PID=$!
|
||||
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
||||
|
||||
# Wait for ramalama to be ready by doing a health check
|
||||
echo "Waiting for RamaLama server..."
|
||||
for i in {1..60}; do
|
||||
echo "Attempt $i to connect to RamaLama..."
|
||||
resp=$(curl -s http://localhost:8080/health)
|
||||
if [ "$resp" == '{"status":"ok"}' ]; then
|
||||
echo "RamaLama server is up and responding!"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 60 ]; then
|
||||
echo "RamaLama server failed to start or respond"
|
||||
echo "RamaLama logs:"
|
||||
cat ramalama.log
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
function start_and_wait_for_llama_stack_server {
|
||||
# Start llama stack run with logging to 'lls.log'
|
||||
LLAMA_STACK_LOG_FILE=lls.log nohup uv run llama stack run run.yaml --image-type venv &
|
||||
LLS_PID=$!
|
||||
echo "Started Llama Stack with PID: $LLS_PID"
|
||||
|
||||
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..60}; do
|
||||
echo "Attempt $i to connect to Llama Stack..."
|
||||
resp=$(curl -s http://localhost:8321/v1/health)
|
||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||
echo "Llama Stack server is up!"
|
||||
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" lls.log; then
|
||||
echo "Llama Stack server is using RamaLama provider"
|
||||
return
|
||||
else
|
||||
echo "Llama Stack server is not using RamaLama provider"
|
||||
echo "Server logs:"
|
||||
cat lls.log
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Llama Stack server failed to start"
|
||||
echo "Server logs:"
|
||||
cat lls.log
|
||||
exit 1
|
||||
}
|
||||
|
||||
function test_ramalama_chat_completion {
|
||||
echo "===> test_ramalama_chat_completion: start"
|
||||
# shellcheck disable=SC2016
|
||||
resp=$(curl -s -X POST http://localhost:8080/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"messages": [{"role": "user", "content": "Hello"}], "model": "$INFERENCE_MODEL"}')
|
||||
if echo "$resp" | grep -q "choices"; then
|
||||
echo "===> test_ramalama_chat_completion: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_ramalama_chat_completion: fail"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_llama_stack_chat_completion {
|
||||
echo "===> test_llama_stack_chat_completion: start"
|
||||
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
||||
if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
|
||||
echo "===> test_llama_stack_chat_completion: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_llama_stack_chat_completion: fail"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
echo "===> starting 'test-external-providers'..."
|
||||
start_and_wait_for_ramalama_server
|
||||
test_ramalama_models
|
||||
test_ramalama_chat_completion
|
||||
start_and_wait_for_llama_stack_server
|
||||
test_llama_stack_models
|
||||
test_llama_stack_openai_models
|
||||
test_llama_stack_chat_completion
|
||||
test_llama_stack_openai_chat_completion
|
||||
echo "===> 'test-external-providers' completed successfully!"
|
||||
}
|
||||
|
||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
# shellcheck disable=SC2153,SC2034
|
||||
INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')
|
||||
# shellcheck disable=SC1091
|
||||
source "$TEST_UTILS/utils.sh"
|
||||
main "$@"
|
||||
exit 0
|
||||
|
|
|
@ -0,0 +1,166 @@
|
|||
import os
|
||||
import uuid
|
||||
from llama_stack_client import LlamaStackClient, RAGDocument
|
||||
|
||||
|
||||
def setup_client():
|
||||
"""Initialize Llama Stack client with configuration"""
|
||||
base_url = "http://localhost:8321"
|
||||
|
||||
client = LlamaStackClient(base_url=base_url, api_key="none", timeout=10.0)
|
||||
|
||||
print(f"Connected to Llama Stack server at {base_url}")
|
||||
return client
|
||||
|
||||
|
||||
def setup_inference_params():
|
||||
"""Configure inference parameters"""
|
||||
model_id = os.getenv(
|
||||
"INFERENCE_MODEL",
|
||||
"bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf",
|
||||
)
|
||||
|
||||
temperature = float(os.getenv("TEMPERATURE", 0.0))
|
||||
if temperature > 0.0:
|
||||
top_p = float(os.getenv("TOP_P", 0.95))
|
||||
strategy = {"type": "top_p", "temperature": temperature, "top_p": top_p}
|
||||
else:
|
||||
strategy = {"type": "greedy"}
|
||||
|
||||
max_tokens = int(os.getenv("MAX_TOKENS", 4096))
|
||||
|
||||
sampling_params = {
|
||||
"strategy": strategy,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
|
||||
stream_env = os.getenv("STREAM", "False")
|
||||
stream = stream_env == "True"
|
||||
|
||||
print("Inference Parameters:")
|
||||
print(f"\tModel: {model_id}")
|
||||
print(f"\tSampling Parameters: {sampling_params}")
|
||||
print(f"\tStream: {stream}")
|
||||
|
||||
return model_id, sampling_params, stream
|
||||
|
||||
|
||||
def setup_vector_db(client):
|
||||
"""Setup vector database for RAG"""
|
||||
vector_db_id = f"test_vector_db_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Find embedding model from available models
|
||||
models = client.models.list()
|
||||
embedding_model = None
|
||||
for model in models:
|
||||
if hasattr(model, "model_type") and model.model_type == "embedding":
|
||||
embedding_model = model.identifier
|
||||
break
|
||||
|
||||
if not embedding_model:
|
||||
raise Exception("No embedding model found")
|
||||
|
||||
print(f"Using embedding model: {embedding_model}")
|
||||
|
||||
# Register vector database
|
||||
client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
|
||||
provider_id=os.getenv("VDB_PROVIDER", "milvus"),
|
||||
)
|
||||
|
||||
# Ingest simple test documents instead of external URLs
|
||||
test_content = [
|
||||
"RamaLama Stack is an external provider for Llama Stack that allows for the use of RamaLama for inference.",
|
||||
"Podman is a container management tool that provides a Docker-compatible command line interface without requiring a daemon.",
|
||||
"Podman can run containers rootlessly and provides robust security isolation.",
|
||||
]
|
||||
|
||||
documents = [
|
||||
RAGDocument(
|
||||
document_id=f"test_doc_{i}",
|
||||
content=content,
|
||||
mime_type="text/plain",
|
||||
metadata={"source": f"test_document_{i}"},
|
||||
)
|
||||
for i, content in enumerate(test_content)
|
||||
]
|
||||
|
||||
print(f"Ingesting {len(documents)} test documents into vector database...")
|
||||
client.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=vector_db_id,
|
||||
chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 128)),
|
||||
)
|
||||
|
||||
print(f"Vector database '{vector_db_id}' setup complete")
|
||||
return vector_db_id
|
||||
|
||||
|
||||
def run_rag_query(client, model_id, sampling_params, stream, vector_db_id, query):
|
||||
"""Execute RAG query and return response"""
|
||||
print(f"\nUser> {query}")
|
||||
|
||||
rag_response = client.tool_runtime.rag_tool.query(
|
||||
content=query, vector_db_ids=[vector_db_id]
|
||||
)
|
||||
|
||||
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
||||
|
||||
prompt_context = rag_response.content
|
||||
extended_prompt = f"Please answer the given query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{query}"
|
||||
messages.append({"role": "user", "content": extended_prompt})
|
||||
|
||||
response = client.inference.chat_completion(
|
||||
messages=messages,
|
||||
model_id=model_id,
|
||||
sampling_params=sampling_params,
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
print("inference> ", end="")
|
||||
if stream:
|
||||
for chunk in response:
|
||||
if hasattr(chunk, "event") and hasattr(chunk.event, "delta"):
|
||||
if hasattr(chunk.event.delta, "text"):
|
||||
print(chunk.event.delta.text, end="")
|
||||
print()
|
||||
else:
|
||||
print(response.completion_message.content)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run RAG test"""
|
||||
print("=== Llama Stack RAG Test ===")
|
||||
|
||||
try:
|
||||
client = setup_client()
|
||||
model_id, sampling_params, stream = setup_inference_params()
|
||||
|
||||
vector_db_id = setup_vector_db(client)
|
||||
|
||||
queries = [
|
||||
"What is RamaLama Stack?",
|
||||
"What is Podman?",
|
||||
"Can Podman run in rootless mode?",
|
||||
]
|
||||
|
||||
print("\n=== Running RAG Queries ===")
|
||||
for query in queries:
|
||||
run_rag_query(
|
||||
client, model_id, sampling_params, stream, vector_db_id, query
|
||||
)
|
||||
print()
|
||||
|
||||
print("=== RAG Test Complete ===")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
|
||||
function test_rag_functionality {
|
||||
echo "===> test_rag_functionality: start"
|
||||
|
||||
if uv run python tests/test-rag.py; then
|
||||
echo "===> test_rag_functionality: pass"
|
||||
return 0
|
||||
else
|
||||
echo "===> test_rag_functionality: fail"
|
||||
echo "RAG test script output above shows the failure details"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
echo "===> starting 'test-rag'..."
|
||||
|
||||
# Check if services are already running (from previous tests)
|
||||
if curl -s http://localhost:8321/v1/health >/dev/null 2>&1 && curl -s http://localhost:8080/health >/dev/null 2>&1; then
|
||||
echo "Using existing RamaLama and Llama Stack servers"
|
||||
else
|
||||
echo "Starting fresh servers for RAG test"
|
||||
start_and_wait_for_ramalama_server
|
||||
start_and_wait_for_llama_stack_server
|
||||
fi
|
||||
|
||||
if test_rag_functionality; then
|
||||
echo "===> 'test-rag' completed successfully!"
|
||||
else
|
||||
echo "===> 'test-rag' failed!"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
# shellcheck disable=SC1091
|
||||
source "$TEST_UTILS/utils.sh"
|
||||
main "$@"
|
||||
exit 0
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
|
||||
function start_and_wait_for_streamlit_ui_linux {
|
||||
echo "Starting Streamlit UI for Linux..."
|
||||
|
||||
podman run -d --rm --network=host --name=streamlit-ui quay.io/redhat-et/streamlit_client:0.1.0
|
||||
|
||||
echo "Waiting for Streamlit UI to be ready..."
|
||||
for i in {1..30}; do
|
||||
echo "Attempt $i to connect to Streamlit UI..."
|
||||
if curl -s http://localhost:8501 >/dev/null 2>&1; then
|
||||
echo "Streamlit UI is up and responding on port 8501!"
|
||||
return 0
|
||||
fi
|
||||
if [ "$i" -eq 30 ]; then
|
||||
echo "Streamlit UI failed to start or respond"
|
||||
echo "Container logs:"
|
||||
podman logs streamlit-ui
|
||||
return 1
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
}
|
||||
|
||||
function test_streamlit_ui_linux {
|
||||
echo "===> test_streamlit_ui_linux: start"
|
||||
|
||||
if start_and_wait_for_streamlit_ui_linux; then
|
||||
# Test that the UI is accessible and returns HTML content
|
||||
resp=$(curl -sS http://localhost:8501)
|
||||
if echo "$resp" | grep -q -i "streamlit\|html"; then
|
||||
echo "===> test_streamlit_ui_linux: pass"
|
||||
return 0
|
||||
else
|
||||
echo "===> test_streamlit_ui_linux: fail - UI not serving expected content"
|
||||
echo "Response: $resp"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "===> test_streamlit_ui_linux: fail - UI failed to start"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function cleanup_streamlit_ui {
|
||||
echo "Cleaning up Streamlit UI container..."
|
||||
podman rm -f streamlit-ui >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
main() {
|
||||
echo "===> starting 'test-ui-linux'..."
|
||||
|
||||
# Only run on Linux
|
||||
# Need a fix to published ports in ramalama to run on MacOS
|
||||
if [[ "$OSTYPE" != "linux-gnu"* ]]; then
|
||||
echo "This test is only for Linux systems. Current OS: $OSTYPE"
|
||||
echo "===> 'test-ui-linux' skipped!"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
trap cleanup_streamlit_ui EXIT
|
||||
|
||||
start_and_wait_for_ramalama_server
|
||||
start_and_wait_for_llama_stack_server
|
||||
|
||||
test_streamlit_ui_linux
|
||||
|
||||
cleanup_streamlit_ui
|
||||
|
||||
echo "===> 'test-ui-linux' completed successfully!"
|
||||
}
|
||||
|
||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
# shellcheck disable=SC1091
|
||||
source "$TEST_UTILS/utils.sh"
|
||||
main "$@"
|
||||
exit 0
|
|
@ -0,0 +1,186 @@
|
|||
#!/bin/bash
|
||||
|
||||
function start_and_wait_for_ramalama_server {
|
||||
# Start ramalama serve in background with logging to 'ramalama-$INFERENCE_MODEL_NO_COLON.log'
|
||||
nohup uv run ramalama serve "$INFERENCE_MODEL" > "ramalama-$INFERENCE_MODEL_NO_COLON.log" 2>&1 &
|
||||
RAMALAMA_PID=$!
|
||||
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
||||
|
||||
# Wait for ramalama to be ready by doing a health check
|
||||
echo "Waiting for RamaLama server..."
|
||||
for i in {1..60}; do
|
||||
echo "Attempt $i to connect to RamaLama..."
|
||||
resp=$(curl -s http://localhost:8080/health)
|
||||
if [ "$resp" == '{"status":"ok"}' ]; then
|
||||
echo "RamaLama server is up and responding!"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 60 ]; then
|
||||
echo "RamaLama server failed to start or respond"
|
||||
echo "RamaLama logs:"
|
||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
function start_and_wait_for_llama_stack_server {
|
||||
# Start llama stack run with logging to 'lls-$INFERENCE_MODEL_NO_COLON.log'
|
||||
LLAMA_STACK_LOG_FILE="lls-$INFERENCE_MODEL_NO_COLON.log" nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
|
||||
LLS_PID=$!
|
||||
echo "Started Llama Stack server with PID: $LLS_PID"
|
||||
|
||||
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..60}; do
|
||||
echo "Attempt $i to connect to Llama Stack..."
|
||||
resp=$(curl -s http://localhost:8321/v1/health)
|
||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||
echo "Llama Stack server is up!"
|
||||
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" "lls-$INFERENCE_MODEL_NO_COLON.log"; then
|
||||
echo "Llama Stack server is using RamaLama provider"
|
||||
return
|
||||
else
|
||||
echo "Llama Stack server is not using RamaLama provider"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Llama Stack server failed to start"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function start_and_wait_for_llama_stack_container {
|
||||
# Start llama stack run
|
||||
podman run \
|
||||
-d \
|
||||
--net=host \
|
||||
--env INFERENCE_MODEL="$INFERENCE_MODEL" \
|
||||
--env RAMALAMA_URL=http://0.0.0.0:8080 \
|
||||
--name llama-stack \
|
||||
quay.io/ramalama/llama-stack:latest
|
||||
LLS_PID=$!
|
||||
echo "Started Llama Stack container with PID: $LLS_PID"
|
||||
|
||||
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..60}; do
|
||||
echo "Attempt $i to connect to Llama Stack..."
|
||||
resp=$(curl -s http://localhost:8321/v1/health)
|
||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||
echo "Llama Stack server is up!"
|
||||
if podman logs llama-stack | grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml"; then
|
||||
echo "Llama Stack server is using RamaLama provider"
|
||||
return
|
||||
else
|
||||
echo "Llama Stack server is not using RamaLama provider"
|
||||
echo "Container logs:"
|
||||
podman logs llama-stack
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Llama Stack server failed to start"
|
||||
echo "Container logs:"
|
||||
podman logs llama-stack
|
||||
exit 1
|
||||
}
|
||||
|
||||
function test_ramalama_models {
|
||||
echo "===> test_ramalama_models: start"
|
||||
# shellcheck disable=SC2016
|
||||
resp=$(curl -sS http://localhost:8080/v1/models)
|
||||
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
||||
echo "===> test_ramalama_models: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_ramalama_models: fail"
|
||||
echo "RamaLama logs:"
|
||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_ramalama_chat_completion {
|
||||
echo "===> test_ramalama_chat_completion: start"
|
||||
# shellcheck disable=SC2016
|
||||
resp=$(curl -sS -X POST http://localhost:8080/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
||||
if echo "$resp" | grep -q "choices"; then
|
||||
echo "===> test_ramalama_chat_completion: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_ramalama_chat_completion: fail"
|
||||
echo "RamaLama logs:"
|
||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_llama_stack_models {
|
||||
echo "===> test_llama_stack_models: start"
|
||||
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
||||
if nohup uv run llama-stack-client models list | grep -q "$INFERENCE_MODEL"; then
|
||||
echo "===> test_llama_stack_models: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_llama_stack_models: fail"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_llama_stack_openai_models {
|
||||
echo "===> test_llama_stack_openai_models: start"
|
||||
# shellcheck disable=SC2016
|
||||
resp=$(curl -sS http://localhost:8321/v1/openai/v1/models)
|
||||
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
||||
echo "===> test_llama_stack_openai_models: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_llama_stack_openai_models: fail"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_llama_stack_chat_completion {
|
||||
echo "===> test_llama_stack_chat_completion: start"
|
||||
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
||||
if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
|
||||
echo "===> test_llama_stack_chat_completion: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_llama_stack_chat_completion: fail"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_llama_stack_openai_chat_completion {
|
||||
echo "===> test_llama_stack_openai_chat_completion: start"
|
||||
# shellcheck disable=SC2016
|
||||
resp=$(curl -sS -X POST http://localhost:8321/v1/openai/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
||||
if echo "$resp" | grep -q "choices"; then
|
||||
echo "===> test_llama_stack_openai_chat_completion: pass"
|
||||
return
|
||||
else
|
||||
echo "===> test_llama_stack_openai_chat_completion: fail"
|
||||
echo "Server logs:"
|
||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||
exit 1
|
||||
fi
|
||||
}
|
Loading…
Reference in New Issue