Compare commits
15 Commits
Author | SHA1 | Date |
---|---|---|
|
d09a8cf3ef | |
|
a347f05b4c | |
|
aab4fc9f7d | |
|
88e1e574c3 | |
|
e067155c4c | |
|
d701060d6e | |
|
6d59b1b408 | |
|
c1044db093 | |
|
29de6525cc | |
|
2a3fad344e | |
|
187ea3db43 | |
|
1f460aa32a | |
|
49957a64f1 | |
|
7a82c46c96 | |
|
58aa4d00ce |
|
@ -28,7 +28,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -41,13 +41,26 @@ jobs:
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.11"
|
||||||
cache: pip
|
cache: pip
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
|
**/requirements*.txt
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- name: Run pre-commit
|
- name: Run pre-commit
|
||||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
|
env:
|
||||||
|
SKIP: no-commit-to-branch
|
||||||
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
||||||
- name: Verify if there are any diff files after pre-commit
|
- name: Verify if there are any diff files after pre-commit
|
||||||
run: git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
run: git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||||
|
|
||||||
|
- name: Verify if there are any new files after pre-commit
|
||||||
|
run: |
|
||||||
|
unstaged_files=$(git ls-files --others --exclude-standard)
|
||||||
|
if [ -n "$unstaged_files" ]; then
|
||||||
|
echo "There are uncommitted new files, run pre-commit locally and commit again"
|
||||||
|
echo "$unstaged_files"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
|
@ -37,7 +37,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Build and inspect python package
|
- name: Build and inspect python package
|
||||||
uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
|
uses: hynek/build-and-inspect-python-package@c52c3a4710070b50470d903818a7b25115dcd076 # v2.13.0
|
||||||
|
|
||||||
- name: Run 'test-build.sh'
|
- name: Run 'test-build.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||||
|
@ -71,7 +71,7 @@ jobs:
|
||||||
needs: build-package
|
needs: build-package
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ jobs:
|
||||||
needs: build-package
|
needs: build-package
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ on:
|
||||||
inference_model:
|
inference_model:
|
||||||
description: Model to download and inference via RamaLama
|
description: Model to download and inference via RamaLama
|
||||||
required: false
|
required: false
|
||||||
default: llama3.2:3b-instruct-fp16
|
default: llama3.2:3b
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
||||||
|
|
||||||
|
@ -25,10 +25,10 @@ jobs:
|
||||||
name: test-container
|
name: test-container
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b-instruct-fp16' }}
|
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -39,9 +39,9 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.11"
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
- name: Set Up Environment and Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
|
@ -66,6 +66,9 @@ jobs:
|
||||||
- name: Run 'test-container.sh'
|
- name: Run 'test-container.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-container.sh
|
run: $GITHUB_WORKSPACE/tests/test-container.sh
|
||||||
|
|
||||||
|
- name: Run 'test-ui-linux.sh'
|
||||||
|
run: $GITHUB_WORKSPACE/tests/test-ui-linux.sh
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
if: always()
|
if: always()
|
||||||
|
|
|
@ -2,17 +2,26 @@ name: Test External Providers
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
|
||||||
inference_model:
|
|
||||||
description: Model to download and inference via RamaLama
|
|
||||||
required: false
|
|
||||||
default: llama3.2:3b-instruct-fp16
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
|
paths:
|
||||||
|
- 'src/ramalama_stack/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- '.github/workflows/test-external-providers.yml'
|
||||||
|
- pyproject.toml
|
||||||
|
- requirements.txt
|
||||||
|
- uv.lock
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
|
paths:
|
||||||
|
- 'src/ramalama_stack/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- '.github/workflows/test-external-providers.yml'
|
||||||
|
- pyproject.toml
|
||||||
|
- requirements.txt
|
||||||
|
- uv.lock
|
||||||
|
|
||||||
env:
|
env:
|
||||||
LC_ALL: en_US.UTF-8
|
LC_ALL: en_US.UTF-8
|
||||||
|
@ -28,14 +37,23 @@ jobs:
|
||||||
test-external-providers:
|
test-external-providers:
|
||||||
name: test-external-providers
|
name: test-external-providers
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
inference_model:
|
||||||
|
- 'llama3.2:3b'
|
||||||
|
- 'granite3.2:2b'
|
||||||
env:
|
env:
|
||||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b-instruct-fp16' }}
|
INFERENCE_MODEL: ${{ matrix.inference_model }}
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
- name: Set INFERENCE_MODEL_NO_COLON for logging artifacts
|
||||||
|
run: echo "INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
- name: Checkout containers/ramalama-stack
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
|
@ -43,14 +61,13 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.11"
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
- name: Set Up Environment and Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
uv sync
|
uv sync
|
||||||
uv pip install -e .
|
|
||||||
|
|
||||||
# temporary hack for file writing that should be done by the pip setup script
|
# temporary hack for file writing that should be done by the pip setup script
|
||||||
# https://github.com/containers/ramalama-stack/issues/53
|
# https://github.com/containers/ramalama-stack/issues/53
|
||||||
|
@ -75,11 +92,14 @@ jobs:
|
||||||
- name: Run 'test-external-providers.sh'
|
- name: Run 'test-external-providers.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
||||||
|
|
||||||
|
- name: Run 'test-rag.sh'
|
||||||
|
run: $GITHUB_WORKSPACE/tests/test-rag.sh
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
name: logs-test-external-providers
|
name: logs-test-external-providers-${{ env.INFERENCE_MODEL_NO_COLON }}
|
||||||
retention-days: 5
|
retention-days: 5
|
||||||
path: |
|
path: |
|
||||||
**/*.log
|
**/*.log
|
||||||
|
|
|
@ -6,7 +6,7 @@ on:
|
||||||
inference_model:
|
inference_model:
|
||||||
description: Model to download and inference via RamaLama
|
description: Model to download and inference via RamaLama
|
||||||
required: false
|
required: false
|
||||||
default: llama3.2:3b-instruct-fp16
|
default: llama3.2:3b
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
||||||
|
|
||||||
|
@ -25,10 +25,10 @@ jobs:
|
||||||
name: test-lls-integration
|
name: test-lls-integration
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b-instruct-fp16' }}
|
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -39,9 +39,9 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.11"
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
- name: Set Up Environment and Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -176,3 +176,4 @@ cython_debug/
|
||||||
# Anything additional
|
# Anything additional
|
||||||
distributions/
|
distributions/
|
||||||
src/ramalama_stack/_version.py
|
src/ramalama_stack/_version.py
|
||||||
|
.python-version
|
||||||
|
|
|
@ -16,14 +16,14 @@ repos:
|
||||||
- id: check-shebang-scripts-are-executable
|
- id: check-shebang-scripts-are-executable
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.11.7
|
rev: v0.11.12
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args: [ --fix ]
|
args: [ --fix ]
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||||
rev: 0.6.17
|
rev: 0.7.9
|
||||||
hooks:
|
hooks:
|
||||||
- id: uv-lock
|
- id: uv-lock
|
||||||
- id: uv-export
|
- id: uv-export
|
||||||
|
@ -31,6 +31,8 @@ repos:
|
||||||
"--frozen",
|
"--frozen",
|
||||||
"--no-hashes",
|
"--no-hashes",
|
||||||
"--no-emit-project",
|
"--no-emit-project",
|
||||||
|
"--no-default-groups",
|
||||||
|
"--output-file=requirements.txt"
|
||||||
]
|
]
|
||||||
|
|
||||||
- repo: https://github.com/koalaman/shellcheck-precommit
|
- repo: https://github.com/koalaman/shellcheck-precommit
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
3.10
|
|
|
@ -0,0 +1,316 @@
|
||||||
|
# Contributing to ramalama-stack
|
||||||
|
|
||||||
|
We'd love to have you join the community!
|
||||||
|
Below summarizes the processes that we follow.
|
||||||
|
|
||||||
|
## Topics
|
||||||
|
`
|
||||||
|
* [Reporting Issues](#reporting-issues)
|
||||||
|
* [Working On Issues](#working-on-issues)
|
||||||
|
* [Contributing To ramalama-stack](#contributing-to-ramalama-stack-1)
|
||||||
|
* [Submitting Pull Requests](#submitting-pull-requests)
|
||||||
|
* [Communications](#communications)
|
||||||
|
* [Code of Conduct](#code-of-conduct)
|
||||||
|
|
||||||
|
|
||||||
|
## Reporting Issues
|
||||||
|
|
||||||
|
Before reporting an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) to see if someone else has already reported it.
|
||||||
|
If so, feel free to add your scenario, or additional information, to the discussion.
|
||||||
|
Or simply "subscribe" to it to be notified when it is updated.
|
||||||
|
Please do not add comments like "+1" or "I have this issue as well" without adding any new information.
|
||||||
|
Instead, please add a thumbs-up emoji to the original report.
|
||||||
|
|
||||||
|
Note: Older closed issues/PRs are automatically locked.
|
||||||
|
If you have a similar problem please open a new issue instead of commenting.
|
||||||
|
|
||||||
|
If you find a new issue with the project we'd love to hear about it!
|
||||||
|
The most important aspect of a bug report is that it includes enough information for us to reproduce it.
|
||||||
|
To make this easier, there are three types of issue templates you can use.
|
||||||
|
* If you have a bug to report, please use *Bug Report* template.
|
||||||
|
* If you have an idea to propose, please use the *Feature Request* template.
|
||||||
|
* If your issue is something else, please use the default *Blank issue* template.
|
||||||
|
|
||||||
|
Please include as much detail as possible, including all requested fields in the template.
|
||||||
|
Not having all requested information makes it much harder to find and fix issues.
|
||||||
|
A reproducer is the best thing you can include.
|
||||||
|
Reproducers make finding and fixing issues much easier for maintainers.
|
||||||
|
The easier it is for us to reproduce a bug, the faster it'll be fixed!
|
||||||
|
|
||||||
|
Please don't include any private/sensitive information in your issue!
|
||||||
|
Security issues should NOT be reported via Github and should instead be reported via the process described [here](https://github.com/containers/common/blob/main/SECURITY.md).
|
||||||
|
|
||||||
|
## Working On Issues
|
||||||
|
|
||||||
|
Once you have decided to contribute to ramalama-stack by working on an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) looking for any that are unassigned.
|
||||||
|
If you want to work on a specific issue that is already assigned but does not appear to be actively being worked on, please ping the assignee in the issue and ask if you can take over.
|
||||||
|
If they do not respond after several days, you can notify a maintainer to have the issue reassigned.
|
||||||
|
When working on an issue, please assign it to yourself.
|
||||||
|
If you lack permissions to do so, you can ping the `@containers/ramalama-stack-maintainers` group to have a maintainer set you as assignee.
|
||||||
|
|
||||||
|
## Contributing To ramalama-stack
|
||||||
|
|
||||||
|
This section describes how to make a contribution to ramalama-stack.
|
||||||
|
|
||||||
|
### Prepare your environment
|
||||||
|
|
||||||
|
The minimum version of Python required to use ramalama-stack is Python 3.11
|
||||||
|
|
||||||
|
### Fork and clone ramalama-stack
|
||||||
|
|
||||||
|
First, you need to fork this project on GitHub.
|
||||||
|
Then clone your fork locally:
|
||||||
|
```shell
|
||||||
|
$ git clone git@github.com:<you>/ramalama-stack
|
||||||
|
$ cd ./ramalama-stack/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Install required tools
|
||||||
|
|
||||||
|
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
|
||||||
|
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
|
||||||
|
|
||||||
|
You can install the dependencies by running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ramalama-stack
|
||||||
|
uv sync
|
||||||
|
source .venv/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.11`)
|
||||||
|
> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
|
||||||
|
> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
|
||||||
|
|
||||||
|
### Adding dependencies
|
||||||
|
|
||||||
|
Please add dependencies using the [uv-documented approach](https://docs.astral.sh/uv/concepts/projects/dependencies/#adding-dependencies).
|
||||||
|
|
||||||
|
This should update both the `pyproject.toml` and the `uv.lock` file.
|
||||||
|
|
||||||
|
The `requirements.txt` file should be updated as well by `pre-commit` - you can also do this manually via `uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt`.
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
ramalama-stack provides a small suite of tests in the `test/` directory.
|
||||||
|
Most pull requests should be accompanied by test changes covering the changes in the PR.
|
||||||
|
Pull requests without tests will receive additional scrutiny from maintainers and may be blocked from merging unless tests are added.
|
||||||
|
Maintainers will decide if tests are not necessary during review.
|
||||||
|
|
||||||
|
### Types of Tests
|
||||||
|
|
||||||
|
There are several types of tests run by ramalama-stack's upstream CI.
|
||||||
|
* Pre-commit checks
|
||||||
|
* Functional testing
|
||||||
|
* Integration testing
|
||||||
|
* PyPI build and upload testing
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
Make sure to update the documentation if needed.
|
||||||
|
ramalama-stack is documented via its [README](https://github.com/containers/ramalama-stack/blob/main/docs/README.md) and files in the `docs/` directory.
|
||||||
|
|
||||||
|
## Submitting Pull Requests
|
||||||
|
|
||||||
|
No Pull Request (PR) is too small!
|
||||||
|
Typos, additional comments in the code, new test cases, bug fixes, new features, more documentation, ... it's all welcome!
|
||||||
|
|
||||||
|
While bug fixes can first be identified via an "issue" in Github, that is not required.
|
||||||
|
It's ok to just open up a PR with the fix, but make sure you include the same information you would have included in an issue - like how to reproduce it.
|
||||||
|
|
||||||
|
PRs for new features should include some background on what use cases the new code is trying to address.
|
||||||
|
When possible and when it makes sense, try to break up larger PRs into smaller ones - it's easier to review smaller code changes.
|
||||||
|
But only if those smaller ones make sense as stand-alone PRs.
|
||||||
|
|
||||||
|
Regardless of the type of PR, all PRs should include:
|
||||||
|
* Well-documented code changes, both through comments in the code itself and high-quality commit messages.
|
||||||
|
* Additional tests. Ideally, they should fail w/o your code change applied.
|
||||||
|
* Documentation updates to reflect the changes made in the pull request.
|
||||||
|
|
||||||
|
Squash your commits into logical pieces of work that might want to be reviewed separately from the rest of the PRs.
|
||||||
|
Squashing down to just one commit is also acceptable since in the end the entire PR will be reviewed anyway.
|
||||||
|
When in doubt, squash.
|
||||||
|
|
||||||
|
When your PR fixes an issue, please note that by including `Fixes: #00000` in the commit description.
|
||||||
|
More details on this are below, in the "Describe your changes in Commit Messages" section.
|
||||||
|
|
||||||
|
The ramalama-stack repo follows a one-ack policy for merges.
|
||||||
|
PRs will be approved and merged by a repo owner.
|
||||||
|
Two reviews are required for a pull request to merge, including sourcery.ai
|
||||||
|
|
||||||
|
### Describe your Changes in Commit Messages
|
||||||
|
|
||||||
|
Describe your problem.
|
||||||
|
Whether your patch is a one-line bug fix or 5000 lines of a new feature, there must be an underlying problem that motivated you to do this work.
|
||||||
|
Convince the reviewer that there is a problem worth fixing and that it makes sense for them to read past the first paragraph.
|
||||||
|
|
||||||
|
Describe user-visible impact.
|
||||||
|
Straight up crashes and lockups are pretty convincing, but not all bugs are that blatant.
|
||||||
|
Even if the problem was spotted during code review, describe the impact you think it can have on users.
|
||||||
|
Keep in mind that the majority of users run packages provided by distributions, so include anything that could help route your change downstream.
|
||||||
|
|
||||||
|
Quantify optimizations and trade-offs.
|
||||||
|
If you claim improvements in performance, memory consumption, stack footprint, or binary size, include
|
||||||
|
numbers that back them up.
|
||||||
|
But also describe non-obvious costs.
|
||||||
|
Optimizations usually aren’t free but trade-offs between CPU, memory, and readability; or, when it comes to heuristics, between different workloads.
|
||||||
|
Describe the expected downsides of your optimization so that the reviewer can weigh costs against
|
||||||
|
benefits.
|
||||||
|
|
||||||
|
Once the problem is established, describe what you are actually doing about it in technical detail.
|
||||||
|
It’s important to describe the change in plain English for the reviewer to verify that the code is behaving as you intend it to.
|
||||||
|
|
||||||
|
Solve only one problem per patch.
|
||||||
|
If your description starts to get long, that’s a sign that you probably need to split up your patch.
|
||||||
|
|
||||||
|
If the patch fixes a logged bug entry, refer to that bug entry by number and URL.
|
||||||
|
If the patch follows from a mailing list discussion, give a URL to the mailing list archive.
|
||||||
|
Please format these lines as `Fixes:` followed by the URL or, for Github bugs, the bug number preceded by a #.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
Fixes: #00000
|
||||||
|
Fixes: https://github.com/containers/ramalama-stack/issues/00000
|
||||||
|
Fixes: https://issues.redhat.com/browse/RHEL-00000
|
||||||
|
Fixes: RHEL-00000
|
||||||
|
```
|
||||||
|
|
||||||
|
However, try to make your explanation understandable without external resources.
|
||||||
|
In addition to giving a URL to a mailing list archive or bug, summarize the relevant points of the discussion that led to the patch as submitted.
|
||||||
|
|
||||||
|
If you want to refer to a specific commit, don’t just refer to the SHA-1 ID of the commit.
|
||||||
|
Please also include the one-line summary of the commit, to make it easier for reviewers to know what it is about. If the commit was merged in GitHub, referring to a GitHub PR number is also a good option, as that will retain all discussion from development, and makes including a summary less critical.
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
```
|
||||||
|
Commit f641c2d9384e ("fix bug in rm -fa parallel deletes") [...]
|
||||||
|
PR #00000
|
||||||
|
```
|
||||||
|
|
||||||
|
When referring to a commit by SHA, you should also be sure to use at least the first twelve characters of the SHA-1 ID.
|
||||||
|
The ramalama-stack repository holds a lot of objects, making collisions with shorter IDs a real possibility.
|
||||||
|
Bear in mind that, even if there is no collision with your six-character ID now, that condition may change five years from now.
|
||||||
|
|
||||||
|
The following git config settings can be used to add a pretty format for outputting the above style in the git log or git show commands:
|
||||||
|
|
||||||
|
```
|
||||||
|
[core]
|
||||||
|
abbrev = 12
|
||||||
|
[pretty]
|
||||||
|
fixes = Fixes: %h (\"%s\")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sign your PRs
|
||||||
|
|
||||||
|
The sign-off is a line at the end of the explanation for the patch.
|
||||||
|
Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
||||||
|
The rules are simple: if you can certify the below (from [developercertificate.org](https://developercertificate.org/)):
|
||||||
|
|
||||||
|
```
|
||||||
|
Developer Certificate of Origin
|
||||||
|
Version 1.1
|
||||||
|
|
||||||
|
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||||
|
660 York Street, Suite 102,
|
||||||
|
San Francisco, CA 94110 USA
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies of this
|
||||||
|
license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Developer's Certificate of Origin 1.1
|
||||||
|
|
||||||
|
By making a contribution to this project, I certify that:
|
||||||
|
|
||||||
|
(a) The contribution was created in whole or in part by me and I
|
||||||
|
have the right to submit it under the open source license
|
||||||
|
indicated in the file; or
|
||||||
|
|
||||||
|
(b) The contribution is based upon previous work that, to the best
|
||||||
|
of my knowledge, is covered under an appropriate open source
|
||||||
|
license and I have the right under that license to submit that
|
||||||
|
work with modifications, whether created in whole or in part
|
||||||
|
by me, under the same open source license (unless I am
|
||||||
|
permitted to submit under a different license), as indicated
|
||||||
|
in the file; or
|
||||||
|
|
||||||
|
(c) The contribution was provided directly to me by some other
|
||||||
|
person who certified (a), (b) or (c) and I have not modified
|
||||||
|
it.
|
||||||
|
|
||||||
|
(d) I understand and agree that this project and the contribution
|
||||||
|
are public and that a record of the contribution (including all
|
||||||
|
personal information I submit with it, including my sign-off) is
|
||||||
|
maintained indefinitely and may be redistributed consistent with
|
||||||
|
this project or the open source license(s) involved.
|
||||||
|
```
|
||||||
|
|
||||||
|
Then you just add a line to every git commit message:
|
||||||
|
|
||||||
|
Signed-off-by: Joe Smith <joe.smith@email.com>
|
||||||
|
|
||||||
|
Use your real name (sorry, no pseudonyms or anonymous contributions).
|
||||||
|
|
||||||
|
If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`.
|
||||||
|
|
||||||
|
### Continuous Integration
|
||||||
|
|
||||||
|
All pull requests automatically run ramalama-stack's test suite.
|
||||||
|
|
||||||
|
There is always additional complexity added by automation, and so it sometimes can fail for any number of reasons.
|
||||||
|
This includes post-merge testing on all branches, which you may occasionally see [red bars on the status graph](https://github.com/containers/ramalama-stack/blob/main/docs/ci.md).
|
||||||
|
|
||||||
|
Most notably, the tests will occasionally flake.
|
||||||
|
If you see a single test on your PR has failed, and you do not believe it is caused by your changes, you can rerun the tests.
|
||||||
|
If you lack permissions to rerun the tests, please ping the maintainers using the `@containers/ramalama-stack-maintainers` group and request that the failing test be rerun.
|
||||||
|
|
||||||
|
If you see multiple test failures, you may wish to check the status graph mentioned above.
|
||||||
|
When the graph shows mostly green bars on the right, it's a good indication the main branch is currently stable.
|
||||||
|
Alternating red/green bars is indicative of a testing "flake", and should be examined (anybody can do this):
|
||||||
|
|
||||||
|
* *One or a small handful of tests, on a single task, (i.e. specific distro/version)
|
||||||
|
where all others ran successfully:* Frequently the cause is networking or a brief
|
||||||
|
external service outage. The failed tasks may simply be re-run by pressing the
|
||||||
|
corresponding button on the task details page.
|
||||||
|
|
||||||
|
* *Multiple tasks failing*: Logically this should be due to some shared/common element.
|
||||||
|
If that element is identifiable as a networking or external service (e.g. packaging
|
||||||
|
repository outage), a re-run should be attempted.
|
||||||
|
|
||||||
|
* *All tasks are failing*: If a common element is **not** identifiable as
|
||||||
|
temporary (i.e. container registry outage), please seek assistance via
|
||||||
|
[the methods below](#communications) as this may be early indication of
|
||||||
|
a more serious problem.
|
||||||
|
|
||||||
|
In the (hopefully) rare case there are multiple, contiguous red bars, this is
|
||||||
|
a ***very bad*** sign. It means additional merges are occurring despite an uncorrected
|
||||||
|
or persistently faulty condition. This risks additional bugs being introduced
|
||||||
|
and further complication of necessary corrective measures. Most likely people
|
||||||
|
are aware and working on this, but it doesn't hurt [to confirm and/or try and help
|
||||||
|
if possible.](#communications).
|
||||||
|
|
||||||
|
## Communications
|
||||||
|
|
||||||
|
If you need help, you can contact the maintainers using the channels mentioned in RamaLama's [communications](https://github.com/containers/ramalama/blob/main/README.md#community) document.
|
||||||
|
|
||||||
|
For discussions around issues/bugs and features, you can use the GitHub
|
||||||
|
[issues](https://github.com/containers/ramalama-stack/issues)
|
||||||
|
and
|
||||||
|
[PRs](https://github.com/containers/ramalama-stack/pulls)
|
||||||
|
tracking system.
|
||||||
|
|
||||||
|
## Code of Conduct
|
||||||
|
|
||||||
|
As contributors and maintainers of the projects under the [Containers](https://github.com/containers) repository,
|
||||||
|
and in the interest of fostering an open and welcoming community, we pledge to
|
||||||
|
respect all people who contribute through reporting issues, posting feature
|
||||||
|
requests, updating documentation, submitting pull requests or patches, and other
|
||||||
|
activities to any of the projects under the containers umbrella. The full code of conduct guidelines can be
|
||||||
|
found [here](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md).
|
||||||
|
|
||||||
|
|
||||||
|
### Bot Interactions
|
||||||
|
|
||||||
|
ramalama-stack uses [sourcery.ai](https://sourcery.ai/) for AI code reviews.
|
||||||
|
|
||||||
|
You can read their docs [here](https://docs.sourcery.ai/Code-Review/#interacting-with-sourcery) on how to interact with the bot.
|
23
README.md
23
README.md
|
@ -17,8 +17,8 @@ This will install Llama Stack and RamaLama as well if they are not installed alr
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> The following workaround is currently needed to run this provider - see https://github.com/containers/ramalama-stack/issues/53 for more details
|
> The following workaround is currently needed to run this provider - see https://github.com/containers/ramalama-stack/issues/53 for more details
|
||||||
> ```bash
|
> ```bash
|
||||||
> curl --create-dirs --output ~/.llama/providers.d/remote/inference/ramalama.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.1.3/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
|
> curl --create-dirs --output ~/.llama/providers.d/remote/inference/ramalama.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
|
||||||
> curl --create-dirs --output ~/.llama/distributions/ramalama/ramalama-run.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.1.3/src/ramalama_stack/ramalama-run.yaml
|
> curl --create-dirs --output ~/.llama/distributions/ramalama/ramalama-run.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/ramalama-run.yaml
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
1. First you will need a RamaLama server running - see [the RamaLama project](https://github.com/containers/ramalama) docs for more information.
|
1. First you will need a RamaLama server running - see [the RamaLama project](https://github.com/containers/ramalama) docs for more information.
|
||||||
|
@ -34,7 +34,7 @@ This will install Llama Stack and RamaLama as well if they are not installed alr
|
||||||
> --net=host \
|
> --net=host \
|
||||||
> --env RAMALAMA_URL=http://0.0.0.0:8080 \
|
> --env RAMALAMA_URL=http://0.0.0.0:8080 \
|
||||||
> --env INFERENCE_MODEL=$INFERENCE_MODEL \
|
> --env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
> ramalama/llama-stack
|
> quay.io/ramalama/llama-stack
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
This will start a Llama Stack server which will use port 8321 by default. You can test this works by configuring the Llama Stack Client to run against this server and
|
This will start a Llama Stack server which will use port 8321 by default. You can test this works by configuring the Llama Stack Client to run against this server and
|
||||||
|
@ -61,3 +61,20 @@ and Schrödinger\'s cat?" The librarian replied, "It rings a bell, but I\'m not
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Llama Stack User Interface
|
||||||
|
|
||||||
|
Llama Stack includes an experimental user-interface, check it out
|
||||||
|
[here](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distribution/ui).
|
||||||
|
|
||||||
|
To deploy the UI, run this:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman run -d --rm --network=container:ramalama --name=streamlit quay.io/redhat-et/streamlit_client:0.1.0
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> If running on MacOS (not Linux), `--network=host` doesn't work. You'll need to publish additional ports `8321:8321` and `8501:8501` with the ramalama serve command,
|
||||||
|
> then run with `network=container:ramalama`.
|
||||||
|
>
|
||||||
|
> If running on Linux use `--network=host` or `-p 8501:8501` instead. The streamlit container will be able to access the ramalama endpoint with either.
|
||||||
|
|
|
@ -10,8 +10,42 @@ readme = "README.md"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
license-files = ["LICENSE"]
|
license-files = ["LICENSE"]
|
||||||
keywords = ["ramalama", "llama", "AI"]
|
keywords = ["ramalama", "llama", "AI"]
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.11"
|
||||||
dynamic = ["dependencies", "optional-dependencies", "version"]
|
dynamic = ["version"]
|
||||||
|
dependencies = [
|
||||||
|
"aiohttp>=3.12.2",
|
||||||
|
"aiosqlite>=0.21.0",
|
||||||
|
"autoevals>=0.0.129",
|
||||||
|
"blobfile>=3.0.0",
|
||||||
|
"chardet>=3.0.0",
|
||||||
|
"datasets>=3.6.0",
|
||||||
|
"fastapi>=0.115.12",
|
||||||
|
"httpx>=0.28.1",
|
||||||
|
"llama-stack==0.2.9",
|
||||||
|
"mcp>=1.9.2",
|
||||||
|
"numpy>=2.2.6",
|
||||||
|
"openai>=1.82.0",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http>=1.33.1",
|
||||||
|
"opentelemetry-sdk>=1.33.1",
|
||||||
|
"peft>=0.15.2",
|
||||||
|
"psutil>=7.0.0",
|
||||||
|
"pydantic>=2.11.5",
|
||||||
|
"pymilvus>=2.5.10",
|
||||||
|
"ramalama==0.9.0",
|
||||||
|
"requests>=2.32.3",
|
||||||
|
"sentence-transformers>=3.0.0",
|
||||||
|
"six>=1.17.0",
|
||||||
|
"sqlalchemy>=2.0.41",
|
||||||
|
"torch>=2.7.0",
|
||||||
|
"trl>=0.18.1",
|
||||||
|
"urllib3>=2.4.0",
|
||||||
|
"uvicorn>=0.34.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"pre-commit>=3.0.4,<4.0",
|
||||||
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
homepage = "https://ramalama.ai"
|
homepage = "https://ramalama.ai"
|
||||||
|
@ -30,8 +64,5 @@ include-package-data = true
|
||||||
[tool.setuptools.package-data]
|
[tool.setuptools.package-data]
|
||||||
"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]
|
"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]
|
||||||
|
|
||||||
[tool.setuptools.dynamic]
|
|
||||||
dependencies = { file = ["requirements.txt"] }
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
extend-exclude = ["*.ipynb"]
|
extend-exclude = ["*.ipynb"]
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
-r requirements.txt
|
|
||||||
|
|
||||||
pre-commit>=3.0.4,<4.0
|
|
502
requirements.txt
502
requirements.txt
|
@ -1,18 +1,484 @@
|
||||||
ramalama>=0.8.2
|
# This file was autogenerated by uv via the following command:
|
||||||
llama-stack>=0.2.5
|
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
|
||||||
urllib3
|
accelerate==1.7.0
|
||||||
faiss-cpu
|
# via
|
||||||
autoevals
|
# peft
|
||||||
six
|
# trl
|
||||||
pydantic
|
aiohappyeyeballs==2.6.1
|
||||||
aiohttp
|
# via aiohttp
|
||||||
aiosqlite
|
aiohttp==3.12.7
|
||||||
datasets
|
# via
|
||||||
fastapi
|
# fsspec
|
||||||
httpx
|
# llama-stack
|
||||||
numpy
|
# ramalama-stack
|
||||||
openai
|
aiosignal==1.3.2
|
||||||
opentelemetry-exporter-otlp-proto-http
|
# via aiohttp
|
||||||
opentelemetry-sdk
|
aiosqlite==0.21.0
|
||||||
requests
|
# via ramalama-stack
|
||||||
uvicorn
|
annotated-types==0.7.0
|
||||||
|
# via pydantic
|
||||||
|
anyio==4.9.0
|
||||||
|
# via
|
||||||
|
# httpx
|
||||||
|
# llama-stack-client
|
||||||
|
# mcp
|
||||||
|
# openai
|
||||||
|
# sse-starlette
|
||||||
|
# starlette
|
||||||
|
argcomplete==3.6.2
|
||||||
|
# via ramalama
|
||||||
|
attrs==25.3.0
|
||||||
|
# via
|
||||||
|
# aiohttp
|
||||||
|
# jsonschema
|
||||||
|
# referencing
|
||||||
|
autoevals==0.0.129
|
||||||
|
# via ramalama-stack
|
||||||
|
blobfile==3.0.0
|
||||||
|
# via ramalama-stack
|
||||||
|
braintrust-core==0.0.59
|
||||||
|
# via autoevals
|
||||||
|
certifi==2025.4.26
|
||||||
|
# via
|
||||||
|
# httpcore
|
||||||
|
# httpx
|
||||||
|
# requests
|
||||||
|
chardet==5.2.0
|
||||||
|
# via ramalama-stack
|
||||||
|
charset-normalizer==3.4.2
|
||||||
|
# via requests
|
||||||
|
chevron==0.14.0
|
||||||
|
# via autoevals
|
||||||
|
click==8.2.1
|
||||||
|
# via
|
||||||
|
# llama-stack-client
|
||||||
|
# uvicorn
|
||||||
|
colorama==0.4.6 ; sys_platform == 'win32'
|
||||||
|
# via
|
||||||
|
# click
|
||||||
|
# tqdm
|
||||||
|
datasets==3.6.0
|
||||||
|
# via
|
||||||
|
# ramalama-stack
|
||||||
|
# trl
|
||||||
|
deprecated==1.2.18
|
||||||
|
# via
|
||||||
|
# opentelemetry-api
|
||||||
|
# opentelemetry-exporter-otlp-proto-http
|
||||||
|
# opentelemetry-semantic-conventions
|
||||||
|
dill==0.3.8
|
||||||
|
# via
|
||||||
|
# datasets
|
||||||
|
# multiprocess
|
||||||
|
distro==1.9.0
|
||||||
|
# via
|
||||||
|
# llama-stack-client
|
||||||
|
# openai
|
||||||
|
ecdsa==0.19.1
|
||||||
|
# via python-jose
|
||||||
|
fastapi==0.115.12
|
||||||
|
# via ramalama-stack
|
||||||
|
filelock==3.18.0
|
||||||
|
# via
|
||||||
|
# blobfile
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# torch
|
||||||
|
# transformers
|
||||||
|
fire==0.7.0
|
||||||
|
# via llama-stack
|
||||||
|
frozenlist==1.6.0
|
||||||
|
# via
|
||||||
|
# aiohttp
|
||||||
|
# aiosignal
|
||||||
|
fsspec==2025.3.0
|
||||||
|
# via
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# torch
|
||||||
|
googleapis-common-protos==1.70.0
|
||||||
|
# via opentelemetry-exporter-otlp-proto-http
|
||||||
|
greenlet==3.2.2 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
|
||||||
|
# via sqlalchemy
|
||||||
|
grpcio==1.67.1
|
||||||
|
# via pymilvus
|
||||||
|
h11==0.16.0
|
||||||
|
# via
|
||||||
|
# httpcore
|
||||||
|
# llama-stack
|
||||||
|
# uvicorn
|
||||||
|
hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
||||||
|
# via huggingface-hub
|
||||||
|
httpcore==1.0.9
|
||||||
|
# via httpx
|
||||||
|
httpx==0.28.1
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# llama-stack-client
|
||||||
|
# mcp
|
||||||
|
# openai
|
||||||
|
# ramalama-stack
|
||||||
|
httpx-sse==0.4.0
|
||||||
|
# via mcp
|
||||||
|
huggingface-hub==0.32.4
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# datasets
|
||||||
|
# llama-stack
|
||||||
|
# peft
|
||||||
|
# sentence-transformers
|
||||||
|
# tokenizers
|
||||||
|
# transformers
|
||||||
|
idna==3.10
|
||||||
|
# via
|
||||||
|
# anyio
|
||||||
|
# httpx
|
||||||
|
# requests
|
||||||
|
# yarl
|
||||||
|
importlib-metadata==8.6.1
|
||||||
|
# via opentelemetry-api
|
||||||
|
jinja2==3.1.6
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# torch
|
||||||
|
jiter==0.10.0
|
||||||
|
# via openai
|
||||||
|
joblib==1.5.1
|
||||||
|
# via scikit-learn
|
||||||
|
jsonschema==4.24.0
|
||||||
|
# via
|
||||||
|
# autoevals
|
||||||
|
# llama-stack
|
||||||
|
jsonschema-specifications==2025.4.1
|
||||||
|
# via jsonschema
|
||||||
|
llama-stack==0.2.9
|
||||||
|
# via ramalama-stack
|
||||||
|
llama-stack-client==0.2.9
|
||||||
|
# via llama-stack
|
||||||
|
lxml==5.4.0
|
||||||
|
# via blobfile
|
||||||
|
markdown-it-py==3.0.0
|
||||||
|
# via rich
|
||||||
|
markupsafe==3.0.2
|
||||||
|
# via jinja2
|
||||||
|
mcp==1.9.2
|
||||||
|
# via ramalama-stack
|
||||||
|
mdurl==0.1.2
|
||||||
|
# via markdown-it-py
|
||||||
|
milvus-lite==2.4.12 ; sys_platform != 'win32'
|
||||||
|
# via pymilvus
|
||||||
|
mpmath==1.3.0
|
||||||
|
# via sympy
|
||||||
|
multidict==6.4.4
|
||||||
|
# via
|
||||||
|
# aiohttp
|
||||||
|
# yarl
|
||||||
|
multiprocess==0.70.16
|
||||||
|
# via datasets
|
||||||
|
networkx==3.5
|
||||||
|
# via torch
|
||||||
|
numpy==2.2.6
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# datasets
|
||||||
|
# pandas
|
||||||
|
# peft
|
||||||
|
# ramalama-stack
|
||||||
|
# scikit-learn
|
||||||
|
# scipy
|
||||||
|
# transformers
|
||||||
|
nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via
|
||||||
|
# nvidia-cudnn-cu12
|
||||||
|
# nvidia-cusolver-cu12
|
||||||
|
# torch
|
||||||
|
nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via
|
||||||
|
# nvidia-cusolver-cu12
|
||||||
|
# torch
|
||||||
|
nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via
|
||||||
|
# nvidia-cufft-cu12
|
||||||
|
# nvidia-cusolver-cu12
|
||||||
|
# nvidia-cusparse-cu12
|
||||||
|
# torch
|
||||||
|
nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
openai==1.84.0
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# ramalama-stack
|
||||||
|
opentelemetry-api==1.33.1
|
||||||
|
# via
|
||||||
|
# opentelemetry-exporter-otlp-proto-http
|
||||||
|
# opentelemetry-sdk
|
||||||
|
# opentelemetry-semantic-conventions
|
||||||
|
opentelemetry-exporter-otlp-proto-common==1.33.1
|
||||||
|
# via opentelemetry-exporter-otlp-proto-http
|
||||||
|
opentelemetry-exporter-otlp-proto-http==1.33.1
|
||||||
|
# via ramalama-stack
|
||||||
|
opentelemetry-proto==1.33.1
|
||||||
|
# via
|
||||||
|
# opentelemetry-exporter-otlp-proto-common
|
||||||
|
# opentelemetry-exporter-otlp-proto-http
|
||||||
|
opentelemetry-sdk==1.33.1
|
||||||
|
# via
|
||||||
|
# opentelemetry-exporter-otlp-proto-http
|
||||||
|
# ramalama-stack
|
||||||
|
opentelemetry-semantic-conventions==0.54b1
|
||||||
|
# via opentelemetry-sdk
|
||||||
|
packaging==25.0
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# peft
|
||||||
|
# transformers
|
||||||
|
pandas==2.2.3
|
||||||
|
# via
|
||||||
|
# datasets
|
||||||
|
# llama-stack-client
|
||||||
|
# pymilvus
|
||||||
|
peft==0.15.2
|
||||||
|
# via ramalama-stack
|
||||||
|
pillow==11.2.1
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# sentence-transformers
|
||||||
|
polyleven==0.9.0
|
||||||
|
# via autoevals
|
||||||
|
prompt-toolkit==3.0.51
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# llama-stack-client
|
||||||
|
propcache==0.3.1
|
||||||
|
# via
|
||||||
|
# aiohttp
|
||||||
|
# yarl
|
||||||
|
protobuf==5.29.5
|
||||||
|
# via
|
||||||
|
# googleapis-common-protos
|
||||||
|
# opentelemetry-proto
|
||||||
|
# pymilvus
|
||||||
|
psutil==7.0.0
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# peft
|
||||||
|
# ramalama-stack
|
||||||
|
pyaml==25.5.0
|
||||||
|
# via llama-stack-client
|
||||||
|
pyarrow==20.0.0
|
||||||
|
# via datasets
|
||||||
|
pyasn1==0.6.1
|
||||||
|
# via
|
||||||
|
# python-jose
|
||||||
|
# rsa
|
||||||
|
pycryptodomex==3.23.0
|
||||||
|
# via blobfile
|
||||||
|
pydantic==2.11.5
|
||||||
|
# via
|
||||||
|
# fastapi
|
||||||
|
# llama-stack
|
||||||
|
# llama-stack-client
|
||||||
|
# mcp
|
||||||
|
# openai
|
||||||
|
# pydantic-settings
|
||||||
|
# ramalama-stack
|
||||||
|
pydantic-core==2.33.2
|
||||||
|
# via pydantic
|
||||||
|
pydantic-settings==2.9.1
|
||||||
|
# via mcp
|
||||||
|
pygments==2.19.1
|
||||||
|
# via rich
|
||||||
|
pymilvus==2.5.10
|
||||||
|
# via ramalama-stack
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
# via pandas
|
||||||
|
python-dotenv==1.1.0
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# pydantic-settings
|
||||||
|
# pymilvus
|
||||||
|
python-jose==3.5.0
|
||||||
|
# via llama-stack
|
||||||
|
python-multipart==0.0.20
|
||||||
|
# via mcp
|
||||||
|
pytz==2025.2
|
||||||
|
# via pandas
|
||||||
|
pyyaml==6.0.2
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# autoevals
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# peft
|
||||||
|
# pyaml
|
||||||
|
# transformers
|
||||||
|
ramalama==0.9.0
|
||||||
|
# via ramalama-stack
|
||||||
|
referencing==0.36.2
|
||||||
|
# via
|
||||||
|
# jsonschema
|
||||||
|
# jsonschema-specifications
|
||||||
|
regex==2024.11.6
|
||||||
|
# via
|
||||||
|
# tiktoken
|
||||||
|
# transformers
|
||||||
|
requests==2.32.3
|
||||||
|
# via
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# llama-stack
|
||||||
|
# opentelemetry-exporter-otlp-proto-http
|
||||||
|
# ramalama-stack
|
||||||
|
# tiktoken
|
||||||
|
# transformers
|
||||||
|
rich==14.0.0
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# llama-stack-client
|
||||||
|
rpds-py==0.25.1
|
||||||
|
# via
|
||||||
|
# jsonschema
|
||||||
|
# referencing
|
||||||
|
rsa==4.9.1
|
||||||
|
# via python-jose
|
||||||
|
safetensors==0.5.3
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# peft
|
||||||
|
# transformers
|
||||||
|
scikit-learn==1.7.0
|
||||||
|
# via sentence-transformers
|
||||||
|
scipy==1.15.3
|
||||||
|
# via
|
||||||
|
# scikit-learn
|
||||||
|
# sentence-transformers
|
||||||
|
sentence-transformers==4.1.0
|
||||||
|
# via ramalama-stack
|
||||||
|
setuptools==80.9.0
|
||||||
|
# via
|
||||||
|
# llama-stack
|
||||||
|
# pymilvus
|
||||||
|
# torch
|
||||||
|
# triton
|
||||||
|
six==1.17.0
|
||||||
|
# via
|
||||||
|
# ecdsa
|
||||||
|
# python-dateutil
|
||||||
|
# ramalama-stack
|
||||||
|
sniffio==1.3.1
|
||||||
|
# via
|
||||||
|
# anyio
|
||||||
|
# llama-stack-client
|
||||||
|
# openai
|
||||||
|
sqlalchemy==2.0.41
|
||||||
|
# via ramalama-stack
|
||||||
|
sse-starlette==2.3.6
|
||||||
|
# via mcp
|
||||||
|
starlette==0.46.2
|
||||||
|
# via
|
||||||
|
# fastapi
|
||||||
|
# llama-stack
|
||||||
|
# mcp
|
||||||
|
sympy==1.14.0
|
||||||
|
# via torch
|
||||||
|
termcolor==3.1.0
|
||||||
|
# via
|
||||||
|
# fire
|
||||||
|
# llama-stack
|
||||||
|
# llama-stack-client
|
||||||
|
threadpoolctl==3.6.0
|
||||||
|
# via scikit-learn
|
||||||
|
tiktoken==0.9.0
|
||||||
|
# via llama-stack
|
||||||
|
tokenizers==0.21.1
|
||||||
|
# via transformers
|
||||||
|
torch==2.7.0
|
||||||
|
# via
|
||||||
|
# accelerate
|
||||||
|
# peft
|
||||||
|
# ramalama-stack
|
||||||
|
# sentence-transformers
|
||||||
|
tqdm==4.67.1
|
||||||
|
# via
|
||||||
|
# datasets
|
||||||
|
# huggingface-hub
|
||||||
|
# llama-stack-client
|
||||||
|
# milvus-lite
|
||||||
|
# openai
|
||||||
|
# peft
|
||||||
|
# sentence-transformers
|
||||||
|
# transformers
|
||||||
|
transformers==4.52.4
|
||||||
|
# via
|
||||||
|
# peft
|
||||||
|
# sentence-transformers
|
||||||
|
# trl
|
||||||
|
triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||||
|
# via torch
|
||||||
|
trl==0.18.1
|
||||||
|
# via ramalama-stack
|
||||||
|
typing-extensions==4.14.0
|
||||||
|
# via
|
||||||
|
# aiosqlite
|
||||||
|
# anyio
|
||||||
|
# fastapi
|
||||||
|
# huggingface-hub
|
||||||
|
# llama-stack-client
|
||||||
|
# openai
|
||||||
|
# opentelemetry-sdk
|
||||||
|
# pydantic
|
||||||
|
# pydantic-core
|
||||||
|
# referencing
|
||||||
|
# sentence-transformers
|
||||||
|
# sqlalchemy
|
||||||
|
# torch
|
||||||
|
# typing-inspection
|
||||||
|
typing-inspection==0.4.1
|
||||||
|
# via
|
||||||
|
# pydantic
|
||||||
|
# pydantic-settings
|
||||||
|
tzdata==2025.2
|
||||||
|
# via pandas
|
||||||
|
ujson==5.10.0
|
||||||
|
# via pymilvus
|
||||||
|
urllib3==2.4.0
|
||||||
|
# via
|
||||||
|
# blobfile
|
||||||
|
# ramalama-stack
|
||||||
|
# requests
|
||||||
|
uvicorn==0.34.3
|
||||||
|
# via
|
||||||
|
# mcp
|
||||||
|
# ramalama-stack
|
||||||
|
wcwidth==0.2.13
|
||||||
|
# via prompt-toolkit
|
||||||
|
wrapt==1.17.2
|
||||||
|
# via deprecated
|
||||||
|
xxhash==3.5.0
|
||||||
|
# via datasets
|
||||||
|
yarl==1.20.0
|
||||||
|
# via aiohttp
|
||||||
|
zipp==3.22.0
|
||||||
|
# via importlib-metadata
|
||||||
|
|
|
@ -11,7 +11,7 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="ramalama",
|
adapter_type="ramalama",
|
||||||
pip_packages=["ramalama>=0.8.2", "faiss-cpu"],
|
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
||||||
config_class="config.RamalamaImplConfig",
|
config_class="config.RamalamaImplConfig",
|
||||||
module="ramalama_stack",
|
module="ramalama_stack",
|
||||||
),
|
),
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
adapter:
|
adapter:
|
||||||
adapter_type: ramalama
|
adapter_type: ramalama
|
||||||
pip_packages: ["ramalama>=0.8.2", "faiss-cpu"]
|
pip_packages: ["ramalama>=0.8.5", "pymilvus"]
|
||||||
config_class: ramalama_stack.config.RamalamaImplConfig
|
config_class: ramalama_stack.config.RamalamaImplConfig
|
||||||
module: ramalama_stack
|
module: ramalama_stack
|
||||||
api_dependencies: []
|
api_dependencies: []
|
||||||
|
|
|
@ -5,6 +5,7 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
|
- post_training
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
|
@ -20,13 +21,10 @@ providers:
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: milvus
|
||||||
provider_type: inline::faiss
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
kvstore:
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/milvus_store.db
|
||||||
type: sqlite
|
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:distributions/ramalama}/faiss_store.db
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -40,13 +38,16 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
|
||||||
|
responses_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/responses_store.db
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
service_name: ${env.OTEL_SERVICE_NAME:llamastack}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama}/trace_store.db
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -81,6 +82,13 @@ providers:
|
||||||
provider_type: inline::braintrust
|
provider_type: inline::braintrust
|
||||||
config:
|
config:
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
|
post_training:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: inline::huggingface
|
||||||
|
config:
|
||||||
|
checkpoint_format: huggingface
|
||||||
|
distributed_backend: null
|
||||||
|
device: cpu
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
|
@ -95,14 +103,29 @@ providers:
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
config: {}
|
config: {}
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
config: {}
|
||||||
|
- provider_id: wolfram-alpha
|
||||||
|
provider_type: remote::wolfram-alpha
|
||||||
|
config:
|
||||||
|
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
|
||||||
|
inference_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/inference_store.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: ramalama
|
provider_id: ramalama
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 384
|
||||||
|
model_id: all-MiniLM-L6-v2
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
|
@ -113,6 +136,8 @@ tool_groups:
|
||||||
provider_id: tavily-search
|
provider_id: tavily-search
|
||||||
- toolgroup_id: builtin::rag
|
- toolgroup_id: builtin::rag
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
|
- toolgroup_id: builtin::wolfram_alpha
|
||||||
|
provider_id: wolfram-alpha
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
|
external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
|
||||||
|
|
|
@ -191,7 +191,6 @@ class RamalamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
model = await self.register_helper.register_model(model)
|
|
||||||
res = await self.client.models.list()
|
res = await self.client.models.list()
|
||||||
available_models = [m.id async for m in res]
|
available_models = [m.id async for m in res]
|
||||||
# Ramalama handles paths on MacOS and Linux differently
|
# Ramalama handles paths on MacOS and Linux differently
|
||||||
|
|
|
@ -3,8 +3,11 @@
|
||||||
main() {
|
main() {
|
||||||
echo "===> starting 'test-container'..."
|
echo "===> starting 'test-container'..."
|
||||||
start_and_wait_for_ramalama_server
|
start_and_wait_for_ramalama_server
|
||||||
|
test_ramalama_models
|
||||||
test_ramalama_chat_completion
|
test_ramalama_chat_completion
|
||||||
start_and_wait_for_llama_stack_container
|
start_and_wait_for_llama_stack_container
|
||||||
|
test_llama_stack_models
|
||||||
|
test_llama_stack_openai_models
|
||||||
test_llama_stack_chat_completion
|
test_llama_stack_chat_completion
|
||||||
test_llama_stack_openai_chat_completion
|
test_llama_stack_openai_chat_completion
|
||||||
echo "===> 'test-container' completed successfully!"
|
echo "===> 'test-container' completed successfully!"
|
||||||
|
|
|
@ -3,14 +3,19 @@
|
||||||
main() {
|
main() {
|
||||||
echo "===> starting 'test-external-providers'..."
|
echo "===> starting 'test-external-providers'..."
|
||||||
start_and_wait_for_ramalama_server
|
start_and_wait_for_ramalama_server
|
||||||
|
test_ramalama_models
|
||||||
test_ramalama_chat_completion
|
test_ramalama_chat_completion
|
||||||
start_and_wait_for_llama_stack_server
|
start_and_wait_for_llama_stack_server
|
||||||
|
test_llama_stack_models
|
||||||
|
test_llama_stack_openai_models
|
||||||
test_llama_stack_chat_completion
|
test_llama_stack_chat_completion
|
||||||
test_llama_stack_openai_chat_completion
|
test_llama_stack_openai_chat_completion
|
||||||
echo "===> 'test-external-providers' completed successfully!"
|
echo "===> 'test-external-providers' completed successfully!"
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
|
# shellcheck disable=SC2153,SC2034
|
||||||
|
INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "$TEST_UTILS/utils.sh"
|
source "$TEST_UTILS/utils.sh"
|
||||||
main "$@"
|
main "$@"
|
||||||
|
|
|
@ -0,0 +1,166 @@
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from llama_stack_client import LlamaStackClient, RAGDocument
|
||||||
|
|
||||||
|
|
||||||
|
def setup_client():
|
||||||
|
"""Initialize Llama Stack client with configuration"""
|
||||||
|
base_url = "http://localhost:8321"
|
||||||
|
|
||||||
|
client = LlamaStackClient(base_url=base_url, api_key="none", timeout=10.0)
|
||||||
|
|
||||||
|
print(f"Connected to Llama Stack server at {base_url}")
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
def setup_inference_params():
|
||||||
|
"""Configure inference parameters"""
|
||||||
|
model_id = os.getenv(
|
||||||
|
"INFERENCE_MODEL",
|
||||||
|
"bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf",
|
||||||
|
)
|
||||||
|
|
||||||
|
temperature = float(os.getenv("TEMPERATURE", 0.0))
|
||||||
|
if temperature > 0.0:
|
||||||
|
top_p = float(os.getenv("TOP_P", 0.95))
|
||||||
|
strategy = {"type": "top_p", "temperature": temperature, "top_p": top_p}
|
||||||
|
else:
|
||||||
|
strategy = {"type": "greedy"}
|
||||||
|
|
||||||
|
max_tokens = int(os.getenv("MAX_TOKENS", 4096))
|
||||||
|
|
||||||
|
sampling_params = {
|
||||||
|
"strategy": strategy,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
}
|
||||||
|
|
||||||
|
stream_env = os.getenv("STREAM", "False")
|
||||||
|
stream = stream_env == "True"
|
||||||
|
|
||||||
|
print("Inference Parameters:")
|
||||||
|
print(f"\tModel: {model_id}")
|
||||||
|
print(f"\tSampling Parameters: {sampling_params}")
|
||||||
|
print(f"\tStream: {stream}")
|
||||||
|
|
||||||
|
return model_id, sampling_params, stream
|
||||||
|
|
||||||
|
|
||||||
|
def setup_vector_db(client):
|
||||||
|
"""Setup vector database for RAG"""
|
||||||
|
vector_db_id = f"test_vector_db_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
# Find embedding model from available models
|
||||||
|
models = client.models.list()
|
||||||
|
embedding_model = None
|
||||||
|
for model in models:
|
||||||
|
if hasattr(model, "model_type") and model.model_type == "embedding":
|
||||||
|
embedding_model = model.identifier
|
||||||
|
break
|
||||||
|
|
||||||
|
if not embedding_model:
|
||||||
|
raise Exception("No embedding model found")
|
||||||
|
|
||||||
|
print(f"Using embedding model: {embedding_model}")
|
||||||
|
|
||||||
|
# Register vector database
|
||||||
|
client.vector_dbs.register(
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
embedding_model=embedding_model,
|
||||||
|
embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
|
||||||
|
provider_id=os.getenv("VDB_PROVIDER", "milvus"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ingest simple test documents instead of external URLs
|
||||||
|
test_content = [
|
||||||
|
"RamaLama Stack is an external provider for Llama Stack that allows for the use of RamaLama for inference.",
|
||||||
|
"Podman is a container management tool that provides a Docker-compatible command line interface without requiring a daemon.",
|
||||||
|
"Podman can run containers rootlessly and provides robust security isolation.",
|
||||||
|
]
|
||||||
|
|
||||||
|
documents = [
|
||||||
|
RAGDocument(
|
||||||
|
document_id=f"test_doc_{i}",
|
||||||
|
content=content,
|
||||||
|
mime_type="text/plain",
|
||||||
|
metadata={"source": f"test_document_{i}"},
|
||||||
|
)
|
||||||
|
for i, content in enumerate(test_content)
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Ingesting {len(documents)} test documents into vector database...")
|
||||||
|
client.tool_runtime.rag_tool.insert(
|
||||||
|
documents=documents,
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 128)),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Vector database '{vector_db_id}' setup complete")
|
||||||
|
return vector_db_id
|
||||||
|
|
||||||
|
|
||||||
|
def run_rag_query(client, model_id, sampling_params, stream, vector_db_id, query):
|
||||||
|
"""Execute RAG query and return response"""
|
||||||
|
print(f"\nUser> {query}")
|
||||||
|
|
||||||
|
rag_response = client.tool_runtime.rag_tool.query(
|
||||||
|
content=query, vector_db_ids=[vector_db_id]
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
||||||
|
|
||||||
|
prompt_context = rag_response.content
|
||||||
|
extended_prompt = f"Please answer the given query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{query}"
|
||||||
|
messages.append({"role": "user", "content": extended_prompt})
|
||||||
|
|
||||||
|
response = client.inference.chat_completion(
|
||||||
|
messages=messages,
|
||||||
|
model_id=model_id,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
stream=stream,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("inference> ", end="")
|
||||||
|
if stream:
|
||||||
|
for chunk in response:
|
||||||
|
if hasattr(chunk, "event") and hasattr(chunk.event, "delta"):
|
||||||
|
if hasattr(chunk.event.delta, "text"):
|
||||||
|
print(chunk.event.delta.text, end="")
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
print(response.completion_message.content)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function to run RAG test"""
|
||||||
|
print("=== Llama Stack RAG Test ===")
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = setup_client()
|
||||||
|
model_id, sampling_params, stream = setup_inference_params()
|
||||||
|
|
||||||
|
vector_db_id = setup_vector_db(client)
|
||||||
|
|
||||||
|
queries = [
|
||||||
|
"What is RamaLama Stack?",
|
||||||
|
"What is Podman?",
|
||||||
|
"Can Podman run in rootless mode?",
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\n=== Running RAG Queries ===")
|
||||||
|
for query in queries:
|
||||||
|
run_rag_query(
|
||||||
|
client, model_id, sampling_params, stream, vector_db_id, query
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("=== RAG Test Complete ===")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
function test_rag_functionality {
|
||||||
|
echo "===> test_rag_functionality: start"
|
||||||
|
|
||||||
|
if uv run python tests/test-rag.py; then
|
||||||
|
echo "===> test_rag_functionality: pass"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "===> test_rag_functionality: fail"
|
||||||
|
echo "RAG test script output above shows the failure details"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
echo "===> starting 'test-rag'..."
|
||||||
|
|
||||||
|
# Check if services are already running (from previous tests)
|
||||||
|
if curl -s http://localhost:8321/v1/health >/dev/null 2>&1 && curl -s http://localhost:8080/health >/dev/null 2>&1; then
|
||||||
|
echo "Using existing RamaLama and Llama Stack servers"
|
||||||
|
else
|
||||||
|
echo "Starting fresh servers for RAG test"
|
||||||
|
start_and_wait_for_ramalama_server
|
||||||
|
start_and_wait_for_llama_stack_server
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test_rag_functionality; then
|
||||||
|
echo "===> 'test-rag' completed successfully!"
|
||||||
|
else
|
||||||
|
echo "===> 'test-rag' failed!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "$TEST_UTILS/utils.sh"
|
||||||
|
main "$@"
|
||||||
|
exit 0
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
function start_and_wait_for_streamlit_ui_linux {
|
||||||
|
echo "Starting Streamlit UI for Linux..."
|
||||||
|
|
||||||
|
podman run -d --rm --network=host --name=streamlit-ui quay.io/redhat-et/streamlit_client:0.1.0
|
||||||
|
|
||||||
|
echo "Waiting for Streamlit UI to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
echo "Attempt $i to connect to Streamlit UI..."
|
||||||
|
if curl -s http://localhost:8501 >/dev/null 2>&1; then
|
||||||
|
echo "Streamlit UI is up and responding on port 8501!"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$i" -eq 30 ]; then
|
||||||
|
echo "Streamlit UI failed to start or respond"
|
||||||
|
echo "Container logs:"
|
||||||
|
podman logs streamlit-ui
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_streamlit_ui_linux {
|
||||||
|
echo "===> test_streamlit_ui_linux: start"
|
||||||
|
|
||||||
|
if start_and_wait_for_streamlit_ui_linux; then
|
||||||
|
# Test that the UI is accessible and returns HTML content
|
||||||
|
resp=$(curl -sS http://localhost:8501)
|
||||||
|
if echo "$resp" | grep -q -i "streamlit\|html"; then
|
||||||
|
echo "===> test_streamlit_ui_linux: pass"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "===> test_streamlit_ui_linux: fail - UI not serving expected content"
|
||||||
|
echo "Response: $resp"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "===> test_streamlit_ui_linux: fail - UI failed to start"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanup_streamlit_ui {
|
||||||
|
echo "Cleaning up Streamlit UI container..."
|
||||||
|
podman rm -f streamlit-ui >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
echo "===> starting 'test-ui-linux'..."
|
||||||
|
|
||||||
|
# Only run on Linux
|
||||||
|
# Need a fix to published ports in ramalama to run on MacOS
|
||||||
|
if [[ "$OSTYPE" != "linux-gnu"* ]]; then
|
||||||
|
echo "This test is only for Linux systems. Current OS: $OSTYPE"
|
||||||
|
echo "===> 'test-ui-linux' skipped!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
trap cleanup_streamlit_ui EXIT
|
||||||
|
|
||||||
|
start_and_wait_for_ramalama_server
|
||||||
|
start_and_wait_for_llama_stack_server
|
||||||
|
|
||||||
|
test_streamlit_ui_linux
|
||||||
|
|
||||||
|
cleanup_streamlit_ui
|
||||||
|
|
||||||
|
echo "===> 'test-ui-linux' completed successfully!"
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "$TEST_UTILS/utils.sh"
|
||||||
|
main "$@"
|
||||||
|
exit 0
|
|
@ -1,8 +1,8 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
function start_and_wait_for_ramalama_server {
|
function start_and_wait_for_ramalama_server {
|
||||||
# Start ramalama serve in background with logging to 'ramalama.log'
|
# Start ramalama serve in background with logging to 'ramalama-$INFERENCE_MODEL_NO_COLON.log'
|
||||||
nohup uv run ramalama serve "$INFERENCE_MODEL" > ramalama.log 2>&1 &
|
nohup uv run ramalama serve "$INFERENCE_MODEL" > "ramalama-$INFERENCE_MODEL_NO_COLON.log" 2>&1 &
|
||||||
RAMALAMA_PID=$!
|
RAMALAMA_PID=$!
|
||||||
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ function start_and_wait_for_ramalama_server {
|
||||||
if [ "$i" -eq 60 ]; then
|
if [ "$i" -eq 60 ]; then
|
||||||
echo "RamaLama server failed to start or respond"
|
echo "RamaLama server failed to start or respond"
|
||||||
echo "RamaLama logs:"
|
echo "RamaLama logs:"
|
||||||
cat ramalama.log
|
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
sleep 1
|
sleep 1
|
||||||
|
@ -26,8 +26,8 @@ function start_and_wait_for_ramalama_server {
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_and_wait_for_llama_stack_server {
|
function start_and_wait_for_llama_stack_server {
|
||||||
# Start llama stack run with logging to 'lls.log'
|
# Start llama stack run with logging to 'lls-$INFERENCE_MODEL_NO_COLON.log'
|
||||||
LLAMA_STACK_LOG_FILE=lls.log nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
|
LLAMA_STACK_LOG_FILE="lls-$INFERENCE_MODEL_NO_COLON.log" nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
|
||||||
LLS_PID=$!
|
LLS_PID=$!
|
||||||
echo "Started Llama Stack server with PID: $LLS_PID"
|
echo "Started Llama Stack server with PID: $LLS_PID"
|
||||||
|
|
||||||
|
@ -38,13 +38,13 @@ function start_and_wait_for_llama_stack_server {
|
||||||
resp=$(curl -s http://localhost:8321/v1/health)
|
resp=$(curl -s http://localhost:8321/v1/health)
|
||||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||||
echo "Llama Stack server is up!"
|
echo "Llama Stack server is up!"
|
||||||
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" lls.log; then
|
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" "lls-$INFERENCE_MODEL_NO_COLON.log"; then
|
||||||
echo "Llama Stack server is using RamaLama provider"
|
echo "Llama Stack server is using RamaLama provider"
|
||||||
return
|
return
|
||||||
else
|
else
|
||||||
echo "Llama Stack server is not using RamaLama provider"
|
echo "Llama Stack server is not using RamaLama provider"
|
||||||
echo "Server logs:"
|
echo "Server logs:"
|
||||||
cat lls.log
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -52,20 +52,19 @@ function start_and_wait_for_llama_stack_server {
|
||||||
done
|
done
|
||||||
echo "Llama Stack server failed to start"
|
echo "Llama Stack server failed to start"
|
||||||
echo "Server logs:"
|
echo "Server logs:"
|
||||||
cat lls.log
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_and_wait_for_llama_stack_container {
|
function start_and_wait_for_llama_stack_container {
|
||||||
# Start llama stack run with logging to 'lls.log'
|
# Start llama stack run
|
||||||
podman run \
|
podman run \
|
||||||
-d \
|
-d \
|
||||||
--net=host \
|
--net=host \
|
||||||
--env INFERENCE_MODEL="$INFERENCE_MODEL" \
|
--env INFERENCE_MODEL="$INFERENCE_MODEL" \
|
||||||
--env RAMALAMA_URL=http://0.0.0.0:8080 \
|
--env RAMALAMA_URL=http://0.0.0.0:8080 \
|
||||||
--env LLAMA_STACK_LOG_FILE=lls.log \
|
|
||||||
--name llama-stack \
|
--name llama-stack \
|
||||||
ramalama/llama-stack
|
quay.io/ramalama/llama-stack:latest
|
||||||
LLS_PID=$!
|
LLS_PID=$!
|
||||||
echo "Started Llama Stack container with PID: $LLS_PID"
|
echo "Started Llama Stack container with PID: $LLS_PID"
|
||||||
|
|
||||||
|
@ -76,24 +75,39 @@ function start_and_wait_for_llama_stack_container {
|
||||||
resp=$(curl -s http://localhost:8321/v1/health)
|
resp=$(curl -s http://localhost:8321/v1/health)
|
||||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||||
echo "Llama Stack server is up!"
|
echo "Llama Stack server is up!"
|
||||||
if podman exec llama-stack cat lls.log | grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml"; then
|
if podman logs llama-stack | grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml"; then
|
||||||
echo "Llama Stack server is using RamaLama provider"
|
echo "Llama Stack server is using RamaLama provider"
|
||||||
return
|
return
|
||||||
else
|
else
|
||||||
echo "Llama Stack server is not using RamaLama provider"
|
echo "Llama Stack server is not using RamaLama provider"
|
||||||
echo "Server logs:"
|
echo "Container logs:"
|
||||||
podman exec llama-stack cat lls.log
|
podman logs llama-stack
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
echo "Llama Stack server failed to start"
|
echo "Llama Stack server failed to start"
|
||||||
echo "Server logs:"
|
echo "Container logs:"
|
||||||
podman exec llama-stack cat lls.log
|
podman logs llama-stack
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_ramalama_models {
|
||||||
|
echo "===> test_ramalama_models: start"
|
||||||
|
# shellcheck disable=SC2016
|
||||||
|
resp=$(curl -sS http://localhost:8080/v1/models)
|
||||||
|
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
||||||
|
echo "===> test_ramalama_models: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_ramalama_models: fail"
|
||||||
|
echo "RamaLama logs:"
|
||||||
|
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
function test_ramalama_chat_completion {
|
function test_ramalama_chat_completion {
|
||||||
echo "===> test_ramalama_chat_completion: start"
|
echo "===> test_ramalama_chat_completion: start"
|
||||||
# shellcheck disable=SC2016
|
# shellcheck disable=SC2016
|
||||||
|
@ -106,7 +120,36 @@ function test_ramalama_chat_completion {
|
||||||
else
|
else
|
||||||
echo "===> test_ramalama_chat_completion: fail"
|
echo "===> test_ramalama_chat_completion: fail"
|
||||||
echo "RamaLama logs:"
|
echo "RamaLama logs:"
|
||||||
cat ramalama.log
|
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_llama_stack_models {
|
||||||
|
echo "===> test_llama_stack_models: start"
|
||||||
|
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
||||||
|
if nohup uv run llama-stack-client models list | grep -q "$INFERENCE_MODEL"; then
|
||||||
|
echo "===> test_llama_stack_models: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_llama_stack_models: fail"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_llama_stack_openai_models {
|
||||||
|
echo "===> test_llama_stack_openai_models: start"
|
||||||
|
# shellcheck disable=SC2016
|
||||||
|
resp=$(curl -sS http://localhost:8321/v1/openai/v1/models)
|
||||||
|
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
||||||
|
echo "===> test_llama_stack_openai_models: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_llama_stack_openai_models: fail"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -120,7 +163,7 @@ function test_llama_stack_chat_completion {
|
||||||
else
|
else
|
||||||
echo "===> test_llama_stack_chat_completion: fail"
|
echo "===> test_llama_stack_chat_completion: fail"
|
||||||
echo "Server logs:"
|
echo "Server logs:"
|
||||||
cat lls.log
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -137,7 +180,7 @@ function test_llama_stack_openai_chat_completion {
|
||||||
else
|
else
|
||||||
echo "===> test_llama_stack_openai_chat_completion: fail"
|
echo "===> test_llama_stack_openai_chat_completion: fail"
|
||||||
echo "Server logs:"
|
echo "Server logs:"
|
||||||
cat lls.log
|
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue