Compare commits
No commits in common. "main" and "v0.1.3" have entirely different histories.
|
@ -2,21 +2,8 @@ name: Pre-commit
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches: [main]
|
||||||
- main
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
@ -24,43 +11,27 @@ concurrency:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
pre-commit:
|
pre-commit:
|
||||||
name: pre-commit
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
|
||||||
# https://github.com/actions/checkout/issues/249
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: '3.11'
|
||||||
cache: pip
|
cache: pip
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
**/requirements*.txt
|
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- name: Run pre-commit
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
|
||||||
env:
|
|
||||||
SKIP: no-commit-to-branch
|
|
||||||
RUFF_OUTPUT_FORMAT: github
|
|
||||||
|
|
||||||
- name: Verify if there are any diff files after pre-commit
|
- name: Verify if there are any diff files after pre-commit
|
||||||
run: git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
|
||||||
|
|
||||||
- name: Verify if there are any new files after pre-commit
|
|
||||||
run: |
|
run: |
|
||||||
unstaged_files=$(git ls-files --others --exclude-standard)
|
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||||
if [ -n "$unstaged_files" ]; then
|
|
||||||
echo "There are uncommitted new files, run pre-commit locally and commit again"
|
|
||||||
echo "$unstaged_files"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Build and publish PyPI package
|
name: Build, test, and upload PyPI package
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
@ -37,18 +37,16 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
with:
|
||||||
# https://github.com/actions/checkout/issues/249
|
# for setuptools-scm
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Build and inspect python package
|
- uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
|
||||||
uses: hynek/build-and-inspect-python-package@c52c3a4710070b50470d903818a7b25115dcd076 # v2.13.0
|
|
||||||
|
|
||||||
- name: Run 'test-build.sh'
|
- name: Run 'test-build.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||||
|
@ -58,6 +56,7 @@ jobs:
|
||||||
# - a PR is merged into main branch
|
# - a PR is merged into main branch
|
||||||
publish-test-pypi:
|
publish-test-pypi:
|
||||||
name: Publish packages to test.pypi.org
|
name: Publish packages to test.pypi.org
|
||||||
|
# environment: publish-test-pypi
|
||||||
if: |
|
if: |
|
||||||
github.repository_owner == 'containers' && (
|
github.repository_owner == 'containers' && (
|
||||||
github.event.action == 'published' ||
|
github.event.action == 'published' ||
|
||||||
|
@ -69,9 +68,10 @@ jobs:
|
||||||
id-token: write
|
id-token: write
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build-package
|
needs: build-package
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
@ -91,6 +91,7 @@ jobs:
|
||||||
# - a new GitHub release is published
|
# - a new GitHub release is published
|
||||||
publish-pypi:
|
publish-pypi:
|
||||||
name: Publish release to pypi.org
|
name: Publish release to pypi.org
|
||||||
|
# environment: publish-pypi
|
||||||
if: |
|
if: |
|
||||||
github.repository_owner == 'containers' && github.event.action == 'published'
|
github.repository_owner == 'containers' && github.event.action == 'published'
|
||||||
permissions:
|
permissions:
|
||||||
|
@ -98,11 +99,13 @@ jobs:
|
||||||
id-token: write
|
id-token: write
|
||||||
# allow gh release upload
|
# allow gh release upload
|
||||||
contents: write
|
contents: write
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build-package
|
needs: build-package
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
|
|
|
@ -1,79 +0,0 @@
|
||||||
name: Test Container
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
inference_model:
|
|
||||||
description: Model to download and inference via RamaLama
|
|
||||||
required: false
|
|
||||||
default: llama3.2:3b
|
|
||||||
schedule:
|
|
||||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-lls-integration:
|
|
||||||
name: test-container
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
|
||||||
steps:
|
|
||||||
- name: Harden Runner
|
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
|
||||||
with:
|
|
||||||
egress-policy: audit
|
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
# https://github.com/actions/checkout/issues/249
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
|
||||||
run: |
|
|
||||||
# install podman
|
|
||||||
sudo apt-get -y install podman
|
|
||||||
|
|
||||||
# install packaged version of ramalama
|
|
||||||
uv venv
|
|
||||||
uv pip install ramalama
|
|
||||||
|
|
||||||
- name: Cache Ramalama store
|
|
||||||
id: ramalama-store-cache
|
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
|
||||||
with:
|
|
||||||
path: ~/.local/share/ramalama
|
|
||||||
key: ramalama-store-${{ env.INFERENCE_MODEL }}
|
|
||||||
|
|
||||||
- name: Download model to serve with Ramalama
|
|
||||||
if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
|
|
||||||
run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
|
|
||||||
|
|
||||||
- name: Run 'test-container.sh'
|
|
||||||
run: $GITHUB_WORKSPACE/tests/test-container.sh
|
|
||||||
|
|
||||||
- name: Run 'test-ui-linux.sh'
|
|
||||||
run: $GITHUB_WORKSPACE/tests/test-ui-linux.sh
|
|
||||||
|
|
||||||
- name: Upload logs
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
if: always()
|
|
||||||
with:
|
|
||||||
name: logs-test-container
|
|
||||||
retention-days: 5
|
|
||||||
path: |
|
|
||||||
**/*.log
|
|
|
@ -2,78 +2,42 @@ name: Test External Providers
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
inference_model:
|
||||||
|
description: Model to download and inference via RamaLama
|
||||||
|
required: false
|
||||||
|
default: llama3.2:3b-instruct-fp16
|
||||||
push:
|
push:
|
||||||
branches:
|
branches: [ main ]
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- 'src/ramalama_stack/**'
|
|
||||||
- 'tests/**'
|
|
||||||
- '.github/workflows/test-external-providers.yml'
|
|
||||||
- pyproject.toml
|
|
||||||
- requirements.txt
|
|
||||||
- uv.lock
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches: [ main ]
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- 'src/ramalama_stack/**'
|
|
||||||
- 'tests/**'
|
|
||||||
- '.github/workflows/test-external-providers.yml'
|
|
||||||
- pyproject.toml
|
|
||||||
- requirements.txt
|
|
||||||
- uv.lock
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test-external-providers:
|
test-external-providers:
|
||||||
name: test-external-providers
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
inference_model:
|
|
||||||
- 'llama3.2:3b'
|
|
||||||
- 'granite3.2:2b'
|
|
||||||
env:
|
env:
|
||||||
INFERENCE_MODEL: ${{ matrix.inference_model }}
|
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b-instruct-fp16' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Harden Runner
|
- name: Harden Runner
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||||
with:
|
with:
|
||||||
egress-policy: audit
|
egress-policy: audit
|
||||||
|
|
||||||
- name: Set INFERENCE_MODEL_NO_COLON for logging artifacts
|
- name: Checkout repository
|
||||||
run: echo "INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
|
||||||
# https://github.com/actions/checkout/issues/249
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.10"
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
- name: Set Up Environment and Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
uv sync
|
uv sync
|
||||||
|
uv pip install -e .
|
||||||
# temporary hack for file writing that should be done by the pip setup script
|
|
||||||
# https://github.com/containers/ramalama-stack/issues/53
|
|
||||||
mkdir -p ~/.llama/distributions/ramalama/
|
mkdir -p ~/.llama/distributions/ramalama/
|
||||||
cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
|
cp -r src/ramalama_stack/providers.d/ ~/.llama/
|
||||||
cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
|
cp src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
|
||||||
|
|
||||||
- name: Run 'test-build.sh'
|
- name: Run 'test-build.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
||||||
|
@ -92,14 +56,11 @@ jobs:
|
||||||
- name: Run 'test-external-providers.sh'
|
- name: Run 'test-external-providers.sh'
|
||||||
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
||||||
|
|
||||||
- name: Run 'test-rag.sh'
|
|
||||||
run: $GITHUB_WORKSPACE/tests/test-rag.sh
|
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
name: logs-test-external-providers-${{ env.INFERENCE_MODEL_NO_COLON }}
|
name: logs
|
||||||
retention-days: 5
|
retention-days: 5
|
||||||
path: |
|
path: |
|
||||||
**/*.log
|
**/*.log
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
name: Test LLS Integration
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
inference_model:
|
|
||||||
description: Model to download and inference via RamaLama
|
|
||||||
required: false
|
|
||||||
default: llama3.2:3b
|
|
||||||
schedule:
|
|
||||||
- cron: '0 11 * * *' # Runs at 11AM UTC every morning
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-lls-integration:
|
|
||||||
name: test-lls-integration
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
|
|
||||||
steps:
|
|
||||||
- name: Harden Runner
|
|
||||||
uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
|
|
||||||
with:
|
|
||||||
egress-policy: audit
|
|
||||||
|
|
||||||
- name: Checkout containers/ramalama-stack
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
# https://github.com/actions/checkout/issues/249
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
|
|
||||||
- name: Set Up Environment and Install Dependencies
|
|
||||||
run: |
|
|
||||||
uv venv
|
|
||||||
|
|
||||||
# install packaged version of ramalama-stack
|
|
||||||
uv pip install ramalama-stack
|
|
||||||
|
|
||||||
# update llama-stack version to main branch
|
|
||||||
uv pip install git+https://github.com/meta-llama/llama-stack.git@main
|
|
||||||
|
|
||||||
# temporary hack for file writing that should be done by the pip setup script
|
|
||||||
# https://github.com/containers/ramalama-stack/issues/53
|
|
||||||
mkdir -p ~/.llama/distributions/ramalama/
|
|
||||||
cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
|
|
||||||
cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
|
|
||||||
|
|
||||||
- name: Run 'test-build.sh'
|
|
||||||
run: $GITHUB_WORKSPACE/tests/test-build.sh
|
|
||||||
|
|
||||||
- name: Cache Ramalama store
|
|
||||||
id: ramalama-store-cache
|
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
|
||||||
with:
|
|
||||||
path: ~/.local/share/ramalama
|
|
||||||
key: ramalama-store-${{ env.INFERENCE_MODEL }}
|
|
||||||
|
|
||||||
- name: Download model to serve with Ramalama
|
|
||||||
if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
|
|
||||||
run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
|
|
||||||
|
|
||||||
- name: Run 'test-external-providers.sh'
|
|
||||||
run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
|
|
||||||
|
|
||||||
- name: Upload logs
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
if: always()
|
|
||||||
with:
|
|
||||||
name: logs-test-lls-integration
|
|
||||||
retention-days: 5
|
|
||||||
path: |
|
|
||||||
**/*.log
|
|
|
@ -176,4 +176,3 @@ cython_debug/
|
||||||
# Anything additional
|
# Anything additional
|
||||||
distributions/
|
distributions/
|
||||||
src/ramalama_stack/_version.py
|
src/ramalama_stack/_version.py
|
||||||
.python-version
|
|
||||||
|
|
|
@ -16,14 +16,14 @@ repos:
|
||||||
- id: check-shebang-scripts-are-executable
|
- id: check-shebang-scripts-are-executable
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.11.12
|
rev: v0.11.7
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args: [ --fix ]
|
args: [ --fix ]
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||||
rev: 0.7.9
|
rev: 0.6.17
|
||||||
hooks:
|
hooks:
|
||||||
- id: uv-lock
|
- id: uv-lock
|
||||||
- id: uv-export
|
- id: uv-export
|
||||||
|
@ -31,8 +31,6 @@ repos:
|
||||||
"--frozen",
|
"--frozen",
|
||||||
"--no-hashes",
|
"--no-hashes",
|
||||||
"--no-emit-project",
|
"--no-emit-project",
|
||||||
"--no-default-groups",
|
|
||||||
"--output-file=requirements.txt"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
- repo: https://github.com/koalaman/shellcheck-precommit
|
- repo: https://github.com/koalaman/shellcheck-precommit
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
3.10
|
316
CONTRIBUTING.md
316
CONTRIBUTING.md
|
@ -1,316 +0,0 @@
|
||||||
# Contributing to ramalama-stack
|
|
||||||
|
|
||||||
We'd love to have you join the community!
|
|
||||||
Below summarizes the processes that we follow.
|
|
||||||
|
|
||||||
## Topics
|
|
||||||
`
|
|
||||||
* [Reporting Issues](#reporting-issues)
|
|
||||||
* [Working On Issues](#working-on-issues)
|
|
||||||
* [Contributing To ramalama-stack](#contributing-to-ramalama-stack-1)
|
|
||||||
* [Submitting Pull Requests](#submitting-pull-requests)
|
|
||||||
* [Communications](#communications)
|
|
||||||
* [Code of Conduct](#code-of-conduct)
|
|
||||||
|
|
||||||
|
|
||||||
## Reporting Issues
|
|
||||||
|
|
||||||
Before reporting an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) to see if someone else has already reported it.
|
|
||||||
If so, feel free to add your scenario, or additional information, to the discussion.
|
|
||||||
Or simply "subscribe" to it to be notified when it is updated.
|
|
||||||
Please do not add comments like "+1" or "I have this issue as well" without adding any new information.
|
|
||||||
Instead, please add a thumbs-up emoji to the original report.
|
|
||||||
|
|
||||||
Note: Older closed issues/PRs are automatically locked.
|
|
||||||
If you have a similar problem please open a new issue instead of commenting.
|
|
||||||
|
|
||||||
If you find a new issue with the project we'd love to hear about it!
|
|
||||||
The most important aspect of a bug report is that it includes enough information for us to reproduce it.
|
|
||||||
To make this easier, there are three types of issue templates you can use.
|
|
||||||
* If you have a bug to report, please use *Bug Report* template.
|
|
||||||
* If you have an idea to propose, please use the *Feature Request* template.
|
|
||||||
* If your issue is something else, please use the default *Blank issue* template.
|
|
||||||
|
|
||||||
Please include as much detail as possible, including all requested fields in the template.
|
|
||||||
Not having all requested information makes it much harder to find and fix issues.
|
|
||||||
A reproducer is the best thing you can include.
|
|
||||||
Reproducers make finding and fixing issues much easier for maintainers.
|
|
||||||
The easier it is for us to reproduce a bug, the faster it'll be fixed!
|
|
||||||
|
|
||||||
Please don't include any private/sensitive information in your issue!
|
|
||||||
Security issues should NOT be reported via Github and should instead be reported via the process described [here](https://github.com/containers/common/blob/main/SECURITY.md).
|
|
||||||
|
|
||||||
## Working On Issues
|
|
||||||
|
|
||||||
Once you have decided to contribute to ramalama-stack by working on an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) looking for any that are unassigned.
|
|
||||||
If you want to work on a specific issue that is already assigned but does not appear to be actively being worked on, please ping the assignee in the issue and ask if you can take over.
|
|
||||||
If they do not respond after several days, you can notify a maintainer to have the issue reassigned.
|
|
||||||
When working on an issue, please assign it to yourself.
|
|
||||||
If you lack permissions to do so, you can ping the `@containers/ramalama-stack-maintainers` group to have a maintainer set you as assignee.
|
|
||||||
|
|
||||||
## Contributing To ramalama-stack
|
|
||||||
|
|
||||||
This section describes how to make a contribution to ramalama-stack.
|
|
||||||
|
|
||||||
### Prepare your environment
|
|
||||||
|
|
||||||
The minimum version of Python required to use ramalama-stack is Python 3.11
|
|
||||||
|
|
||||||
### Fork and clone ramalama-stack
|
|
||||||
|
|
||||||
First, you need to fork this project on GitHub.
|
|
||||||
Then clone your fork locally:
|
|
||||||
```shell
|
|
||||||
$ git clone git@github.com:<you>/ramalama-stack
|
|
||||||
$ cd ./ramalama-stack/
|
|
||||||
```
|
|
||||||
|
|
||||||
### Install required tools
|
|
||||||
|
|
||||||
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
|
|
||||||
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
|
|
||||||
|
|
||||||
You can install the dependencies by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd ramalama-stack
|
|
||||||
uv sync
|
|
||||||
source .venv/bin/activate
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.11`)
|
|
||||||
> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
|
|
||||||
> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
|
|
||||||
|
|
||||||
### Adding dependencies
|
|
||||||
|
|
||||||
Please add dependencies using the [uv-documented approach](https://docs.astral.sh/uv/concepts/projects/dependencies/#adding-dependencies).
|
|
||||||
|
|
||||||
This should update both the `pyproject.toml` and the `uv.lock` file.
|
|
||||||
|
|
||||||
The `requirements.txt` file should be updated as well by `pre-commit` - you can also do this manually via `uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt`.
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
ramalama-stack provides a small suite of tests in the `test/` directory.
|
|
||||||
Most pull requests should be accompanied by test changes covering the changes in the PR.
|
|
||||||
Pull requests without tests will receive additional scrutiny from maintainers and may be blocked from merging unless tests are added.
|
|
||||||
Maintainers will decide if tests are not necessary during review.
|
|
||||||
|
|
||||||
### Types of Tests
|
|
||||||
|
|
||||||
There are several types of tests run by ramalama-stack's upstream CI.
|
|
||||||
* Pre-commit checks
|
|
||||||
* Functional testing
|
|
||||||
* Integration testing
|
|
||||||
* PyPI build and upload testing
|
|
||||||
|
|
||||||
## Documentation
|
|
||||||
|
|
||||||
Make sure to update the documentation if needed.
|
|
||||||
ramalama-stack is documented via its [README](https://github.com/containers/ramalama-stack/blob/main/docs/README.md) and files in the `docs/` directory.
|
|
||||||
|
|
||||||
## Submitting Pull Requests
|
|
||||||
|
|
||||||
No Pull Request (PR) is too small!
|
|
||||||
Typos, additional comments in the code, new test cases, bug fixes, new features, more documentation, ... it's all welcome!
|
|
||||||
|
|
||||||
While bug fixes can first be identified via an "issue" in Github, that is not required.
|
|
||||||
It's ok to just open up a PR with the fix, but make sure you include the same information you would have included in an issue - like how to reproduce it.
|
|
||||||
|
|
||||||
PRs for new features should include some background on what use cases the new code is trying to address.
|
|
||||||
When possible and when it makes sense, try to break up larger PRs into smaller ones - it's easier to review smaller code changes.
|
|
||||||
But only if those smaller ones make sense as stand-alone PRs.
|
|
||||||
|
|
||||||
Regardless of the type of PR, all PRs should include:
|
|
||||||
* Well-documented code changes, both through comments in the code itself and high-quality commit messages.
|
|
||||||
* Additional tests. Ideally, they should fail w/o your code change applied.
|
|
||||||
* Documentation updates to reflect the changes made in the pull request.
|
|
||||||
|
|
||||||
Squash your commits into logical pieces of work that might want to be reviewed separately from the rest of the PRs.
|
|
||||||
Squashing down to just one commit is also acceptable since in the end the entire PR will be reviewed anyway.
|
|
||||||
When in doubt, squash.
|
|
||||||
|
|
||||||
When your PR fixes an issue, please note that by including `Fixes: #00000` in the commit description.
|
|
||||||
More details on this are below, in the "Describe your changes in Commit Messages" section.
|
|
||||||
|
|
||||||
The ramalama-stack repo follows a one-ack policy for merges.
|
|
||||||
PRs will be approved and merged by a repo owner.
|
|
||||||
Two reviews are required for a pull request to merge, including sourcery.ai
|
|
||||||
|
|
||||||
### Describe your Changes in Commit Messages
|
|
||||||
|
|
||||||
Describe your problem.
|
|
||||||
Whether your patch is a one-line bug fix or 5000 lines of a new feature, there must be an underlying problem that motivated you to do this work.
|
|
||||||
Convince the reviewer that there is a problem worth fixing and that it makes sense for them to read past the first paragraph.
|
|
||||||
|
|
||||||
Describe user-visible impact.
|
|
||||||
Straight up crashes and lockups are pretty convincing, but not all bugs are that blatant.
|
|
||||||
Even if the problem was spotted during code review, describe the impact you think it can have on users.
|
|
||||||
Keep in mind that the majority of users run packages provided by distributions, so include anything that could help route your change downstream.
|
|
||||||
|
|
||||||
Quantify optimizations and trade-offs.
|
|
||||||
If you claim improvements in performance, memory consumption, stack footprint, or binary size, include
|
|
||||||
numbers that back them up.
|
|
||||||
But also describe non-obvious costs.
|
|
||||||
Optimizations usually aren’t free but trade-offs between CPU, memory, and readability; or, when it comes to heuristics, between different workloads.
|
|
||||||
Describe the expected downsides of your optimization so that the reviewer can weigh costs against
|
|
||||||
benefits.
|
|
||||||
|
|
||||||
Once the problem is established, describe what you are actually doing about it in technical detail.
|
|
||||||
It’s important to describe the change in plain English for the reviewer to verify that the code is behaving as you intend it to.
|
|
||||||
|
|
||||||
Solve only one problem per patch.
|
|
||||||
If your description starts to get long, that’s a sign that you probably need to split up your patch.
|
|
||||||
|
|
||||||
If the patch fixes a logged bug entry, refer to that bug entry by number and URL.
|
|
||||||
If the patch follows from a mailing list discussion, give a URL to the mailing list archive.
|
|
||||||
Please format these lines as `Fixes:` followed by the URL or, for Github bugs, the bug number preceded by a #.
|
|
||||||
For example:
|
|
||||||
|
|
||||||
```
|
|
||||||
Fixes: #00000
|
|
||||||
Fixes: https://github.com/containers/ramalama-stack/issues/00000
|
|
||||||
Fixes: https://issues.redhat.com/browse/RHEL-00000
|
|
||||||
Fixes: RHEL-00000
|
|
||||||
```
|
|
||||||
|
|
||||||
However, try to make your explanation understandable without external resources.
|
|
||||||
In addition to giving a URL to a mailing list archive or bug, summarize the relevant points of the discussion that led to the patch as submitted.
|
|
||||||
|
|
||||||
If you want to refer to a specific commit, don’t just refer to the SHA-1 ID of the commit.
|
|
||||||
Please also include the one-line summary of the commit, to make it easier for reviewers to know what it is about. If the commit was merged in GitHub, referring to a GitHub PR number is also a good option, as that will retain all discussion from development, and makes including a summary less critical.
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
```
|
|
||||||
Commit f641c2d9384e ("fix bug in rm -fa parallel deletes") [...]
|
|
||||||
PR #00000
|
|
||||||
```
|
|
||||||
|
|
||||||
When referring to a commit by SHA, you should also be sure to use at least the first twelve characters of the SHA-1 ID.
|
|
||||||
The ramalama-stack repository holds a lot of objects, making collisions with shorter IDs a real possibility.
|
|
||||||
Bear in mind that, even if there is no collision with your six-character ID now, that condition may change five years from now.
|
|
||||||
|
|
||||||
The following git config settings can be used to add a pretty format for outputting the above style in the git log or git show commands:
|
|
||||||
|
|
||||||
```
|
|
||||||
[core]
|
|
||||||
abbrev = 12
|
|
||||||
[pretty]
|
|
||||||
fixes = Fixes: %h (\"%s\")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Sign your PRs
|
|
||||||
|
|
||||||
The sign-off is a line at the end of the explanation for the patch.
|
|
||||||
Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
|
||||||
The rules are simple: if you can certify the below (from [developercertificate.org](https://developercertificate.org/)):
|
|
||||||
|
|
||||||
```
|
|
||||||
Developer Certificate of Origin
|
|
||||||
Version 1.1
|
|
||||||
|
|
||||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
|
||||||
660 York Street, Suite 102,
|
|
||||||
San Francisco, CA 94110 USA
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies of this
|
|
||||||
license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
Developer's Certificate of Origin 1.1
|
|
||||||
|
|
||||||
By making a contribution to this project, I certify that:
|
|
||||||
|
|
||||||
(a) The contribution was created in whole or in part by me and I
|
|
||||||
have the right to submit it under the open source license
|
|
||||||
indicated in the file; or
|
|
||||||
|
|
||||||
(b) The contribution is based upon previous work that, to the best
|
|
||||||
of my knowledge, is covered under an appropriate open source
|
|
||||||
license and I have the right under that license to submit that
|
|
||||||
work with modifications, whether created in whole or in part
|
|
||||||
by me, under the same open source license (unless I am
|
|
||||||
permitted to submit under a different license), as indicated
|
|
||||||
in the file; or
|
|
||||||
|
|
||||||
(c) The contribution was provided directly to me by some other
|
|
||||||
person who certified (a), (b) or (c) and I have not modified
|
|
||||||
it.
|
|
||||||
|
|
||||||
(d) I understand and agree that this project and the contribution
|
|
||||||
are public and that a record of the contribution (including all
|
|
||||||
personal information I submit with it, including my sign-off) is
|
|
||||||
maintained indefinitely and may be redistributed consistent with
|
|
||||||
this project or the open source license(s) involved.
|
|
||||||
```
|
|
||||||
|
|
||||||
Then you just add a line to every git commit message:
|
|
||||||
|
|
||||||
Signed-off-by: Joe Smith <joe.smith@email.com>
|
|
||||||
|
|
||||||
Use your real name (sorry, no pseudonyms or anonymous contributions).
|
|
||||||
|
|
||||||
If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`.
|
|
||||||
|
|
||||||
### Continuous Integration
|
|
||||||
|
|
||||||
All pull requests automatically run ramalama-stack's test suite.
|
|
||||||
|
|
||||||
There is always additional complexity added by automation, and so it sometimes can fail for any number of reasons.
|
|
||||||
This includes post-merge testing on all branches, which you may occasionally see [red bars on the status graph](https://github.com/containers/ramalama-stack/blob/main/docs/ci.md).
|
|
||||||
|
|
||||||
Most notably, the tests will occasionally flake.
|
|
||||||
If you see a single test on your PR has failed, and you do not believe it is caused by your changes, you can rerun the tests.
|
|
||||||
If you lack permissions to rerun the tests, please ping the maintainers using the `@containers/ramalama-stack-maintainers` group and request that the failing test be rerun.
|
|
||||||
|
|
||||||
If you see multiple test failures, you may wish to check the status graph mentioned above.
|
|
||||||
When the graph shows mostly green bars on the right, it's a good indication the main branch is currently stable.
|
|
||||||
Alternating red/green bars is indicative of a testing "flake", and should be examined (anybody can do this):
|
|
||||||
|
|
||||||
* *One or a small handful of tests, on a single task, (i.e. specific distro/version)
|
|
||||||
where all others ran successfully:* Frequently the cause is networking or a brief
|
|
||||||
external service outage. The failed tasks may simply be re-run by pressing the
|
|
||||||
corresponding button on the task details page.
|
|
||||||
|
|
||||||
* *Multiple tasks failing*: Logically this should be due to some shared/common element.
|
|
||||||
If that element is identifiable as a networking or external service (e.g. packaging
|
|
||||||
repository outage), a re-run should be attempted.
|
|
||||||
|
|
||||||
* *All tasks are failing*: If a common element is **not** identifiable as
|
|
||||||
temporary (i.e. container registry outage), please seek assistance via
|
|
||||||
[the methods below](#communications) as this may be early indication of
|
|
||||||
a more serious problem.
|
|
||||||
|
|
||||||
In the (hopefully) rare case there are multiple, contiguous red bars, this is
|
|
||||||
a ***very bad*** sign. It means additional merges are occurring despite an uncorrected
|
|
||||||
or persistently faulty condition. This risks additional bugs being introduced
|
|
||||||
and further complication of necessary corrective measures. Most likely people
|
|
||||||
are aware and working on this, but it doesn't hurt [to confirm and/or try and help
|
|
||||||
if possible.](#communications).
|
|
||||||
|
|
||||||
## Communications
|
|
||||||
|
|
||||||
If you need help, you can contact the maintainers using the channels mentioned in RamaLama's [communications](https://github.com/containers/ramalama/blob/main/README.md#community) document.
|
|
||||||
|
|
||||||
For discussions around issues/bugs and features, you can use the GitHub
|
|
||||||
[issues](https://github.com/containers/ramalama-stack/issues)
|
|
||||||
and
|
|
||||||
[PRs](https://github.com/containers/ramalama-stack/pulls)
|
|
||||||
tracking system.
|
|
||||||
|
|
||||||
## Code of Conduct
|
|
||||||
|
|
||||||
As contributors and maintainers of the projects under the [Containers](https://github.com/containers) repository,
|
|
||||||
and in the interest of fostering an open and welcoming community, we pledge to
|
|
||||||
respect all people who contribute through reporting issues, posting feature
|
|
||||||
requests, updating documentation, submitting pull requests or patches, and other
|
|
||||||
activities to any of the projects under the containers umbrella. The full code of conduct guidelines can be
|
|
||||||
found [here](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md).
|
|
||||||
|
|
||||||
|
|
||||||
### Bot Interactions
|
|
||||||
|
|
||||||
ramalama-stack uses [sourcery.ai](https://sourcery.ai/) for AI code reviews.
|
|
||||||
|
|
||||||
You can read their docs [here](https://docs.sourcery.ai/Code-Review/#interacting-with-sourcery) on how to interact with the bot.
|
|
79
README.md
79
README.md
|
@ -1,80 +1,7 @@
|
||||||
# ramalama-stack
|
# ramalama-stack
|
||||||
|
|
||||||
[](https://pypi.org/project/ramalama-stack/)
|
[](https://pypi.org/project/ramalama-stack/)
|
||||||
[](https://pypi.org/project/ramalama-stack/)
|
|
||||||
[](https://github.com/containers/ramalama-stack/blob/main/LICENSE)
|
[](https://github.com/containers/ramalama-stack/blob/main/LICENSE)
|
||||||
|

|
||||||
An external provider for [Llama Stack](https://github.com/meta-llama/llama-stack) allowing for the use of [RamaLama](https://ramalama.ai/) for inference.
|

|
||||||
|

|
||||||
## Installing
|
|
||||||
|
|
||||||
You can install `ramalama-stack` from PyPI via `pip install ramalama-stack`
|
|
||||||
|
|
||||||
This will install Llama Stack and RamaLama as well if they are not installed already.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> The following workaround is currently needed to run this provider - see https://github.com/containers/ramalama-stack/issues/53 for more details
|
|
||||||
> ```bash
|
|
||||||
> curl --create-dirs --output ~/.llama/providers.d/remote/inference/ramalama.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
|
|
||||||
> curl --create-dirs --output ~/.llama/distributions/ramalama/ramalama-run.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/ramalama-run.yaml
|
|
||||||
> ```
|
|
||||||
|
|
||||||
1. First you will need a RamaLama server running - see [the RamaLama project](https://github.com/containers/ramalama) docs for more information.
|
|
||||||
|
|
||||||
2. Ensure you set your `INFERENCE_MODEL` environment variable to the name of the model you have running via RamaLama.
|
|
||||||
|
|
||||||
3. You can then run the RamaLama external provider via `llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml`
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> You can also run the RamaLama external provider inside of a container via [Podman](https://podman.io/)
|
|
||||||
> ```bash
|
|
||||||
> podman run \
|
|
||||||
> --net=host \
|
|
||||||
> --env RAMALAMA_URL=http://0.0.0.0:8080 \
|
|
||||||
> --env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
> quay.io/ramalama/llama-stack
|
|
||||||
> ```
|
|
||||||
|
|
||||||
This will start a Llama Stack server which will use port 8321 by default. You can test this works by configuring the Llama Stack Client to run against this server and
|
|
||||||
sending a test request.
|
|
||||||
- If your client is running on the same machine as the server, you can run `llama-stack-client configure --endpoint http://0.0.0.0:8321 --api-key none`
|
|
||||||
- If your client is running on a different machine, you can run `llama-stack-client configure --endpoint http://<hostname>:8321 --api-key none`
|
|
||||||
- The client should give you a message similar to `Done! You can now use the Llama Stack Client CLI with endpoint <endpoint>`
|
|
||||||
- You can then test the server by running `llama-stack-client inference chat-completion --message "tell me a joke"` which should return something like
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ChatCompletionResponse(
|
|
||||||
completion_message=CompletionMessage(
|
|
||||||
content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs
|
|
||||||
and Schrödinger\'s cat?" The librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."',
|
|
||||||
role='assistant',
|
|
||||||
stop_reason='end_of_turn',
|
|
||||||
tool_calls=[]
|
|
||||||
),
|
|
||||||
logprobs=None,
|
|
||||||
metrics=[
|
|
||||||
Metric(metric='prompt_tokens', value=14.0, unit=None),
|
|
||||||
Metric(metric='completion_tokens', value=63.0, unit=None),
|
|
||||||
Metric(metric='total_tokens', value=77.0, unit=None)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Llama Stack User Interface
|
|
||||||
|
|
||||||
Llama Stack includes an experimental user-interface, check it out
|
|
||||||
[here](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distribution/ui).
|
|
||||||
|
|
||||||
To deploy the UI, run this:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
podman run -d --rm --network=container:ramalama --name=streamlit quay.io/redhat-et/streamlit_client:0.1.0
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> If running on MacOS (not Linux), `--network=host` doesn't work. You'll need to publish additional ports `8321:8321` and `8501:8501` with the ramalama serve command,
|
|
||||||
> then run with `network=container:ramalama`.
|
|
||||||
>
|
|
||||||
> If running on Linux use `--network=host` or `-p 8501:8501` instead. The streamlit container will be able to access the ramalama endpoint with either.
|
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
# ramalama-stack CI
|
|
||||||
|
|
||||||
| Job | Description | Status |
|
|
||||||
| --- | ----------- | ------ |
|
|
||||||
| [Pre-commit](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pre-commit.yml) | Runs pre-commit checks |  |
|
|
||||||
| [Test External Providers](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-external-providers.yml) | Tests the current `ramalama-stack` branch against the latest released versions of `ramalama` and `llama-stack` |  |
|
|
||||||
| [Test LLS Integration](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-lls-integration.yml) | Tests the latest released versions of `ramalama` and `ramalama-stack` against the current `llama-stack` main branch |  |
|
|
||||||
| [Test Container](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-container.yml) | Tests the latest tagged container image of `ramalama/llama-stack` run via Podman |  |
|
|
||||||
| [Build and publish PyPI package](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pypi.yml) | Builds, tests, and publishes `ramalama-stack` package |  |
|
|
|
@ -4,48 +4,13 @@ build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "ramalama-stack"
|
name = "ramalama-stack"
|
||||||
description = "An external provider for Llama Stack allowing for the use of RamaLama for inference."
|
description = "Llama Stack Provider for Ramalama Inference"
|
||||||
authors = [{ name = "The RamaLama Stack Authors" }]
|
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
license-files = ["LICENSE"]
|
license-files = ["LICENSE"]
|
||||||
keywords = ["ramalama", "llama", "AI"]
|
keywords = ["ramalama", "llama", "AI"]
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.10"
|
||||||
dynamic = ["version"]
|
dynamic = ["dependencies", "optional-dependencies", "version"]
|
||||||
dependencies = [
|
|
||||||
"aiohttp>=3.12.2",
|
|
||||||
"aiosqlite>=0.21.0",
|
|
||||||
"autoevals>=0.0.129",
|
|
||||||
"blobfile>=3.0.0",
|
|
||||||
"chardet>=3.0.0",
|
|
||||||
"datasets>=3.6.0",
|
|
||||||
"fastapi>=0.115.12",
|
|
||||||
"httpx>=0.28.1",
|
|
||||||
"llama-stack==0.2.9",
|
|
||||||
"mcp>=1.9.2",
|
|
||||||
"numpy>=2.2.6",
|
|
||||||
"openai>=1.82.0",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http>=1.33.1",
|
|
||||||
"opentelemetry-sdk>=1.33.1",
|
|
||||||
"peft>=0.15.2",
|
|
||||||
"psutil>=7.0.0",
|
|
||||||
"pydantic>=2.11.5",
|
|
||||||
"pymilvus>=2.5.10",
|
|
||||||
"ramalama==0.9.0",
|
|
||||||
"requests>=2.32.3",
|
|
||||||
"sentence-transformers>=3.0.0",
|
|
||||||
"six>=1.17.0",
|
|
||||||
"sqlalchemy>=2.0.41",
|
|
||||||
"torch>=2.7.0",
|
|
||||||
"trl>=0.18.1",
|
|
||||||
"urllib3>=2.4.0",
|
|
||||||
"uvicorn>=0.34.2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[dependency-groups]
|
|
||||||
dev = [
|
|
||||||
"pre-commit>=3.0.4,<4.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
homepage = "https://ramalama.ai"
|
homepage = "https://ramalama.ai"
|
||||||
|
@ -64,5 +29,8 @@ include-package-data = true
|
||||||
[tool.setuptools.package-data]
|
[tool.setuptools.package-data]
|
||||||
"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]
|
"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]
|
||||||
|
|
||||||
|
[tool.setuptools.dynamic]
|
||||||
|
dependencies = { file = ["requirements.txt"] }
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
extend-exclude = ["*.ipynb"]
|
extend-exclude = ["*.ipynb"]
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
-r requirements.txt
|
||||||
|
|
||||||
|
pre-commit>=3.0.4,<4.0
|
502
requirements.txt
502
requirements.txt
|
@ -1,484 +1,18 @@
|
||||||
# This file was autogenerated by uv via the following command:
|
ramalama>=0.8.1
|
||||||
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
|
llama-stack>=0.2.3
|
||||||
accelerate==1.7.0
|
urllib3
|
||||||
# via
|
faiss-cpu
|
||||||
# peft
|
autoevals
|
||||||
# trl
|
six
|
||||||
aiohappyeyeballs==2.6.1
|
pydantic
|
||||||
# via aiohttp
|
aiohttp
|
||||||
aiohttp==3.12.7
|
aiosqlite
|
||||||
# via
|
datasets
|
||||||
# fsspec
|
fastapi
|
||||||
# llama-stack
|
httpx
|
||||||
# ramalama-stack
|
numpy
|
||||||
aiosignal==1.3.2
|
openai
|
||||||
# via aiohttp
|
opentelemetry-exporter-otlp-proto-http
|
||||||
aiosqlite==0.21.0
|
opentelemetry-sdk
|
||||||
# via ramalama-stack
|
requests
|
||||||
annotated-types==0.7.0
|
uvicorn
|
||||||
# via pydantic
|
|
||||||
anyio==4.9.0
|
|
||||||
# via
|
|
||||||
# httpx
|
|
||||||
# llama-stack-client
|
|
||||||
# mcp
|
|
||||||
# openai
|
|
||||||
# sse-starlette
|
|
||||||
# starlette
|
|
||||||
argcomplete==3.6.2
|
|
||||||
# via ramalama
|
|
||||||
attrs==25.3.0
|
|
||||||
# via
|
|
||||||
# aiohttp
|
|
||||||
# jsonschema
|
|
||||||
# referencing
|
|
||||||
autoevals==0.0.129
|
|
||||||
# via ramalama-stack
|
|
||||||
blobfile==3.0.0
|
|
||||||
# via ramalama-stack
|
|
||||||
braintrust-core==0.0.59
|
|
||||||
# via autoevals
|
|
||||||
certifi==2025.4.26
|
|
||||||
# via
|
|
||||||
# httpcore
|
|
||||||
# httpx
|
|
||||||
# requests
|
|
||||||
chardet==5.2.0
|
|
||||||
# via ramalama-stack
|
|
||||||
charset-normalizer==3.4.2
|
|
||||||
# via requests
|
|
||||||
chevron==0.14.0
|
|
||||||
# via autoevals
|
|
||||||
click==8.2.1
|
|
||||||
# via
|
|
||||||
# llama-stack-client
|
|
||||||
# uvicorn
|
|
||||||
colorama==0.4.6 ; sys_platform == 'win32'
|
|
||||||
# via
|
|
||||||
# click
|
|
||||||
# tqdm
|
|
||||||
datasets==3.6.0
|
|
||||||
# via
|
|
||||||
# ramalama-stack
|
|
||||||
# trl
|
|
||||||
deprecated==1.2.18
|
|
||||||
# via
|
|
||||||
# opentelemetry-api
|
|
||||||
# opentelemetry-exporter-otlp-proto-http
|
|
||||||
# opentelemetry-semantic-conventions
|
|
||||||
dill==0.3.8
|
|
||||||
# via
|
|
||||||
# datasets
|
|
||||||
# multiprocess
|
|
||||||
distro==1.9.0
|
|
||||||
# via
|
|
||||||
# llama-stack-client
|
|
||||||
# openai
|
|
||||||
ecdsa==0.19.1
|
|
||||||
# via python-jose
|
|
||||||
fastapi==0.115.12
|
|
||||||
# via ramalama-stack
|
|
||||||
filelock==3.18.0
|
|
||||||
# via
|
|
||||||
# blobfile
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# torch
|
|
||||||
# transformers
|
|
||||||
fire==0.7.0
|
|
||||||
# via llama-stack
|
|
||||||
frozenlist==1.6.0
|
|
||||||
# via
|
|
||||||
# aiohttp
|
|
||||||
# aiosignal
|
|
||||||
fsspec==2025.3.0
|
|
||||||
# via
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# torch
|
|
||||||
googleapis-common-protos==1.70.0
|
|
||||||
# via opentelemetry-exporter-otlp-proto-http
|
|
||||||
greenlet==3.2.2 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
|
|
||||||
# via sqlalchemy
|
|
||||||
grpcio==1.67.1
|
|
||||||
# via pymilvus
|
|
||||||
h11==0.16.0
|
|
||||||
# via
|
|
||||||
# httpcore
|
|
||||||
# llama-stack
|
|
||||||
# uvicorn
|
|
||||||
hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
|
||||||
# via huggingface-hub
|
|
||||||
httpcore==1.0.9
|
|
||||||
# via httpx
|
|
||||||
httpx==0.28.1
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# llama-stack-client
|
|
||||||
# mcp
|
|
||||||
# openai
|
|
||||||
# ramalama-stack
|
|
||||||
httpx-sse==0.4.0
|
|
||||||
# via mcp
|
|
||||||
huggingface-hub==0.32.4
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# datasets
|
|
||||||
# llama-stack
|
|
||||||
# peft
|
|
||||||
# sentence-transformers
|
|
||||||
# tokenizers
|
|
||||||
# transformers
|
|
||||||
idna==3.10
|
|
||||||
# via
|
|
||||||
# anyio
|
|
||||||
# httpx
|
|
||||||
# requests
|
|
||||||
# yarl
|
|
||||||
importlib-metadata==8.6.1
|
|
||||||
# via opentelemetry-api
|
|
||||||
jinja2==3.1.6
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# torch
|
|
||||||
jiter==0.10.0
|
|
||||||
# via openai
|
|
||||||
joblib==1.5.1
|
|
||||||
# via scikit-learn
|
|
||||||
jsonschema==4.24.0
|
|
||||||
# via
|
|
||||||
# autoevals
|
|
||||||
# llama-stack
|
|
||||||
jsonschema-specifications==2025.4.1
|
|
||||||
# via jsonschema
|
|
||||||
llama-stack==0.2.9
|
|
||||||
# via ramalama-stack
|
|
||||||
llama-stack-client==0.2.9
|
|
||||||
# via llama-stack
|
|
||||||
lxml==5.4.0
|
|
||||||
# via blobfile
|
|
||||||
markdown-it-py==3.0.0
|
|
||||||
# via rich
|
|
||||||
markupsafe==3.0.2
|
|
||||||
# via jinja2
|
|
||||||
mcp==1.9.2
|
|
||||||
# via ramalama-stack
|
|
||||||
mdurl==0.1.2
|
|
||||||
# via markdown-it-py
|
|
||||||
milvus-lite==2.4.12 ; sys_platform != 'win32'
|
|
||||||
# via pymilvus
|
|
||||||
mpmath==1.3.0
|
|
||||||
# via sympy
|
|
||||||
multidict==6.4.4
|
|
||||||
# via
|
|
||||||
# aiohttp
|
|
||||||
# yarl
|
|
||||||
multiprocess==0.70.16
|
|
||||||
# via datasets
|
|
||||||
networkx==3.5
|
|
||||||
# via torch
|
|
||||||
numpy==2.2.6
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# datasets
|
|
||||||
# pandas
|
|
||||||
# peft
|
|
||||||
# ramalama-stack
|
|
||||||
# scikit-learn
|
|
||||||
# scipy
|
|
||||||
# transformers
|
|
||||||
nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via
|
|
||||||
# nvidia-cudnn-cu12
|
|
||||||
# nvidia-cusolver-cu12
|
|
||||||
# torch
|
|
||||||
nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via
|
|
||||||
# nvidia-cusolver-cu12
|
|
||||||
# torch
|
|
||||||
nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via
|
|
||||||
# nvidia-cufft-cu12
|
|
||||||
# nvidia-cusolver-cu12
|
|
||||||
# nvidia-cusparse-cu12
|
|
||||||
# torch
|
|
||||||
nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
openai==1.84.0
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# ramalama-stack
|
|
||||||
opentelemetry-api==1.33.1
|
|
||||||
# via
|
|
||||||
# opentelemetry-exporter-otlp-proto-http
|
|
||||||
# opentelemetry-sdk
|
|
||||||
# opentelemetry-semantic-conventions
|
|
||||||
opentelemetry-exporter-otlp-proto-common==1.33.1
|
|
||||||
# via opentelemetry-exporter-otlp-proto-http
|
|
||||||
opentelemetry-exporter-otlp-proto-http==1.33.1
|
|
||||||
# via ramalama-stack
|
|
||||||
opentelemetry-proto==1.33.1
|
|
||||||
# via
|
|
||||||
# opentelemetry-exporter-otlp-proto-common
|
|
||||||
# opentelemetry-exporter-otlp-proto-http
|
|
||||||
opentelemetry-sdk==1.33.1
|
|
||||||
# via
|
|
||||||
# opentelemetry-exporter-otlp-proto-http
|
|
||||||
# ramalama-stack
|
|
||||||
opentelemetry-semantic-conventions==0.54b1
|
|
||||||
# via opentelemetry-sdk
|
|
||||||
packaging==25.0
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# peft
|
|
||||||
# transformers
|
|
||||||
pandas==2.2.3
|
|
||||||
# via
|
|
||||||
# datasets
|
|
||||||
# llama-stack-client
|
|
||||||
# pymilvus
|
|
||||||
peft==0.15.2
|
|
||||||
# via ramalama-stack
|
|
||||||
pillow==11.2.1
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# sentence-transformers
|
|
||||||
polyleven==0.9.0
|
|
||||||
# via autoevals
|
|
||||||
prompt-toolkit==3.0.51
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# llama-stack-client
|
|
||||||
propcache==0.3.1
|
|
||||||
# via
|
|
||||||
# aiohttp
|
|
||||||
# yarl
|
|
||||||
protobuf==5.29.5
|
|
||||||
# via
|
|
||||||
# googleapis-common-protos
|
|
||||||
# opentelemetry-proto
|
|
||||||
# pymilvus
|
|
||||||
psutil==7.0.0
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# peft
|
|
||||||
# ramalama-stack
|
|
||||||
pyaml==25.5.0
|
|
||||||
# via llama-stack-client
|
|
||||||
pyarrow==20.0.0
|
|
||||||
# via datasets
|
|
||||||
pyasn1==0.6.1
|
|
||||||
# via
|
|
||||||
# python-jose
|
|
||||||
# rsa
|
|
||||||
pycryptodomex==3.23.0
|
|
||||||
# via blobfile
|
|
||||||
pydantic==2.11.5
|
|
||||||
# via
|
|
||||||
# fastapi
|
|
||||||
# llama-stack
|
|
||||||
# llama-stack-client
|
|
||||||
# mcp
|
|
||||||
# openai
|
|
||||||
# pydantic-settings
|
|
||||||
# ramalama-stack
|
|
||||||
pydantic-core==2.33.2
|
|
||||||
# via pydantic
|
|
||||||
pydantic-settings==2.9.1
|
|
||||||
# via mcp
|
|
||||||
pygments==2.19.1
|
|
||||||
# via rich
|
|
||||||
pymilvus==2.5.10
|
|
||||||
# via ramalama-stack
|
|
||||||
python-dateutil==2.9.0.post0
|
|
||||||
# via pandas
|
|
||||||
python-dotenv==1.1.0
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# pydantic-settings
|
|
||||||
# pymilvus
|
|
||||||
python-jose==3.5.0
|
|
||||||
# via llama-stack
|
|
||||||
python-multipart==0.0.20
|
|
||||||
# via mcp
|
|
||||||
pytz==2025.2
|
|
||||||
# via pandas
|
|
||||||
pyyaml==6.0.2
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# autoevals
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# peft
|
|
||||||
# pyaml
|
|
||||||
# transformers
|
|
||||||
ramalama==0.9.0
|
|
||||||
# via ramalama-stack
|
|
||||||
referencing==0.36.2
|
|
||||||
# via
|
|
||||||
# jsonschema
|
|
||||||
# jsonschema-specifications
|
|
||||||
regex==2024.11.6
|
|
||||||
# via
|
|
||||||
# tiktoken
|
|
||||||
# transformers
|
|
||||||
requests==2.32.3
|
|
||||||
# via
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# llama-stack
|
|
||||||
# opentelemetry-exporter-otlp-proto-http
|
|
||||||
# ramalama-stack
|
|
||||||
# tiktoken
|
|
||||||
# transformers
|
|
||||||
rich==14.0.0
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# llama-stack-client
|
|
||||||
rpds-py==0.25.1
|
|
||||||
# via
|
|
||||||
# jsonschema
|
|
||||||
# referencing
|
|
||||||
rsa==4.9.1
|
|
||||||
# via python-jose
|
|
||||||
safetensors==0.5.3
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# peft
|
|
||||||
# transformers
|
|
||||||
scikit-learn==1.7.0
|
|
||||||
# via sentence-transformers
|
|
||||||
scipy==1.15.3
|
|
||||||
# via
|
|
||||||
# scikit-learn
|
|
||||||
# sentence-transformers
|
|
||||||
sentence-transformers==4.1.0
|
|
||||||
# via ramalama-stack
|
|
||||||
setuptools==80.9.0
|
|
||||||
# via
|
|
||||||
# llama-stack
|
|
||||||
# pymilvus
|
|
||||||
# torch
|
|
||||||
# triton
|
|
||||||
six==1.17.0
|
|
||||||
# via
|
|
||||||
# ecdsa
|
|
||||||
# python-dateutil
|
|
||||||
# ramalama-stack
|
|
||||||
sniffio==1.3.1
|
|
||||||
# via
|
|
||||||
# anyio
|
|
||||||
# llama-stack-client
|
|
||||||
# openai
|
|
||||||
sqlalchemy==2.0.41
|
|
||||||
# via ramalama-stack
|
|
||||||
sse-starlette==2.3.6
|
|
||||||
# via mcp
|
|
||||||
starlette==0.46.2
|
|
||||||
# via
|
|
||||||
# fastapi
|
|
||||||
# llama-stack
|
|
||||||
# mcp
|
|
||||||
sympy==1.14.0
|
|
||||||
# via torch
|
|
||||||
termcolor==3.1.0
|
|
||||||
# via
|
|
||||||
# fire
|
|
||||||
# llama-stack
|
|
||||||
# llama-stack-client
|
|
||||||
threadpoolctl==3.6.0
|
|
||||||
# via scikit-learn
|
|
||||||
tiktoken==0.9.0
|
|
||||||
# via llama-stack
|
|
||||||
tokenizers==0.21.1
|
|
||||||
# via transformers
|
|
||||||
torch==2.7.0
|
|
||||||
# via
|
|
||||||
# accelerate
|
|
||||||
# peft
|
|
||||||
# ramalama-stack
|
|
||||||
# sentence-transformers
|
|
||||||
tqdm==4.67.1
|
|
||||||
# via
|
|
||||||
# datasets
|
|
||||||
# huggingface-hub
|
|
||||||
# llama-stack-client
|
|
||||||
# milvus-lite
|
|
||||||
# openai
|
|
||||||
# peft
|
|
||||||
# sentence-transformers
|
|
||||||
# transformers
|
|
||||||
transformers==4.52.4
|
|
||||||
# via
|
|
||||||
# peft
|
|
||||||
# sentence-transformers
|
|
||||||
# trl
|
|
||||||
triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
||||||
# via torch
|
|
||||||
trl==0.18.1
|
|
||||||
# via ramalama-stack
|
|
||||||
typing-extensions==4.14.0
|
|
||||||
# via
|
|
||||||
# aiosqlite
|
|
||||||
# anyio
|
|
||||||
# fastapi
|
|
||||||
# huggingface-hub
|
|
||||||
# llama-stack-client
|
|
||||||
# openai
|
|
||||||
# opentelemetry-sdk
|
|
||||||
# pydantic
|
|
||||||
# pydantic-core
|
|
||||||
# referencing
|
|
||||||
# sentence-transformers
|
|
||||||
# sqlalchemy
|
|
||||||
# torch
|
|
||||||
# typing-inspection
|
|
||||||
typing-inspection==0.4.1
|
|
||||||
# via
|
|
||||||
# pydantic
|
|
||||||
# pydantic-settings
|
|
||||||
tzdata==2025.2
|
|
||||||
# via pandas
|
|
||||||
ujson==5.10.0
|
|
||||||
# via pymilvus
|
|
||||||
urllib3==2.4.0
|
|
||||||
# via
|
|
||||||
# blobfile
|
|
||||||
# ramalama-stack
|
|
||||||
# requests
|
|
||||||
uvicorn==0.34.3
|
|
||||||
# via
|
|
||||||
# mcp
|
|
||||||
# ramalama-stack
|
|
||||||
wcwidth==0.2.13
|
|
||||||
# via prompt-toolkit
|
|
||||||
wrapt==1.17.2
|
|
||||||
# via deprecated
|
|
||||||
xxhash==3.5.0
|
|
||||||
# via datasets
|
|
||||||
yarl==1.20.0
|
|
||||||
# via aiohttp
|
|
||||||
zipp==3.22.0
|
|
||||||
# via importlib-metadata
|
|
||||||
|
|
|
@ -11,8 +11,8 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="ramalama",
|
adapter_type="ramalama",
|
||||||
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
pip_packages=["ramalama>=0.8.1", "faiss-cpu"],
|
||||||
config_class="config.RamalamaImplConfig",
|
config_class="config.RamalamaImplConfig",
|
||||||
module="ramalama_stack",
|
module="ramalama_adapter",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
adapter:
|
adapter:
|
||||||
adapter_type: ramalama
|
adapter_type: ramalama
|
||||||
pip_packages: ["ramalama>=0.8.5", "pymilvus"]
|
pip_packages: ["ramalama>=0.8.1", "faiss-cpu"]
|
||||||
config_class: ramalama_stack.config.RamalamaImplConfig
|
config_class: ramalama_stack.config.RamalamaImplConfig
|
||||||
module: ramalama_stack
|
module: ramalama_stack
|
||||||
api_dependencies: []
|
api_dependencies: []
|
||||||
|
|
|
@ -5,7 +5,6 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- post_training
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
|
@ -21,10 +20,13 @@ providers:
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: milvus
|
- provider_id: faiss
|
||||||
provider_type: inline::milvus
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/milvus_store.db
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:distributions/ramalama}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -38,16 +40,13 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
|
||||||
responses_store:
|
|
||||||
type: sqlite
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/responses_store.db
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:llamastack}
|
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama}/trace_store.db
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -82,13 +81,6 @@ providers:
|
||||||
provider_type: inline::braintrust
|
provider_type: inline::braintrust
|
||||||
config:
|
config:
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
post_training:
|
|
||||||
- provider_id: huggingface
|
|
||||||
provider_type: inline::huggingface
|
|
||||||
config:
|
|
||||||
checkpoint_format: huggingface
|
|
||||||
distributed_backend: null
|
|
||||||
device: cpu
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
|
@ -100,32 +92,20 @@ providers:
|
||||||
config:
|
config:
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
|
- provider_id: code-interpreter
|
||||||
|
provider_type: inline::code-interpreter
|
||||||
|
config: {}
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
config: {}
|
config: {}
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
config: {}
|
|
||||||
- provider_id: wolfram-alpha
|
|
||||||
provider_type: remote::wolfram-alpha
|
|
||||||
config:
|
|
||||||
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
|
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
|
||||||
inference_store:
|
|
||||||
type: sqlite
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/inference_store.db
|
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: ramalama
|
provider_id: ramalama
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata:
|
|
||||||
embedding_dimension: 384
|
|
||||||
model_id: all-MiniLM-L6-v2
|
|
||||||
provider_id: sentence-transformers
|
|
||||||
model_type: embedding
|
|
||||||
shields: []
|
shields: []
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
|
@ -136,8 +116,8 @@ tool_groups:
|
||||||
provider_id: tavily-search
|
provider_id: tavily-search
|
||||||
- toolgroup_id: builtin::rag
|
- toolgroup_id: builtin::rag
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::wolfram_alpha
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: wolfram-alpha
|
provider_id: code-interpreter
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
|
external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
|
||||||
|
|
|
@ -191,6 +191,7 @@ class RamalamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
|
model = await self.register_helper.register_model(model)
|
||||||
res = await self.client.models.list()
|
res = await self.client.models.list()
|
||||||
available_models = [m.id async for m in res]
|
available_models = [m.id async for m in res]
|
||||||
# Ramalama handles paths on MacOS and Linux differently
|
# Ramalama handles paths on MacOS and Linux differently
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
main() {
|
|
||||||
echo "===> starting 'test-container'..."
|
|
||||||
start_and_wait_for_ramalama_server
|
|
||||||
test_ramalama_models
|
|
||||||
test_ramalama_chat_completion
|
|
||||||
start_and_wait_for_llama_stack_container
|
|
||||||
test_llama_stack_models
|
|
||||||
test_llama_stack_openai_models
|
|
||||||
test_llama_stack_chat_completion
|
|
||||||
test_llama_stack_openai_chat_completion
|
|
||||||
echo "===> 'test-container' completed successfully!"
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source "$TEST_UTILS/utils.sh"
|
|
||||||
main "$@"
|
|
||||||
exit 0
|
|
|
@ -1,22 +1,118 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
function start_and_wait_for_ramalama_server {
|
||||||
|
# Start ramalama serve in background with logging to 'ramalama.log'
|
||||||
|
nohup uv run ramalama serve "$INFERENCE_MODEL" > ramalama.log 2>&1 &
|
||||||
|
RAMALAMA_PID=$!
|
||||||
|
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
||||||
|
|
||||||
|
# Wait for ramalama to be ready by doing a health check
|
||||||
|
echo "Waiting for RamaLama server..."
|
||||||
|
for i in {1..60}; do
|
||||||
|
echo "Attempt $i to connect to RamaLama..."
|
||||||
|
resp=$(curl -s http://localhost:8080/health)
|
||||||
|
if [ "$resp" == '{"status":"ok"}' ]; then
|
||||||
|
echo "RamaLama server is up and responding!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ "$i" -eq 60 ]; then
|
||||||
|
echo "RamaLama server failed to start or respond"
|
||||||
|
echo "RamaLama logs:"
|
||||||
|
cat ramalama.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function start_and_wait_for_llama_stack_server {
|
||||||
|
# Start llama stack run with logging to 'lls.log'
|
||||||
|
LLAMA_STACK_LOG_FILE=lls.log nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
|
||||||
|
LLS_PID=$!
|
||||||
|
echo "Started Llama Stack with PID: $LLS_PID"
|
||||||
|
|
||||||
|
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
||||||
|
echo "Waiting for Llama Stack server..."
|
||||||
|
for i in {1..60}; do
|
||||||
|
echo "Attempt $i to connect to Llama Stack..."
|
||||||
|
resp=$(curl -s http://localhost:8321/v1/health)
|
||||||
|
if [ "$resp" == '{"status":"OK"}' ]; then
|
||||||
|
echo "Llama Stack server is up!"
|
||||||
|
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" lls.log; then
|
||||||
|
echo "Llama Stack server is using RamaLama provider"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "Llama Stack server is not using RamaLama provider"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat lls.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo "Llama Stack server failed to start"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat lls.log
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ramalama_chat_completion {
|
||||||
|
echo "===> test_ramalama_chat_completion: start"
|
||||||
|
# shellcheck disable=SC2016
|
||||||
|
resp=$(curl -sS -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
||||||
|
if echo "$resp" | grep -q "choices"; then
|
||||||
|
echo "===> test_ramalama_chat_completion: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_ramalama_chat_completion: fail"
|
||||||
|
echo "RamaLama logs:"
|
||||||
|
cat ramalama.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_llama_stack_chat_completion {
|
||||||
|
echo "===> test_llama_stack_chat_completion: start"
|
||||||
|
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
||||||
|
if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
|
||||||
|
echo "===> test_llama_stack_chat_completion: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_llama_stack_chat_completion: fail"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat lls.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_llama_stack_openai_chat_completion {
|
||||||
|
echo "===> test_llama_stack_openai_chat_completion: start"
|
||||||
|
# shellcheck disable=SC2016
|
||||||
|
resp=$(curl -sS -X POST http://localhost:8321/v1/openai/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
||||||
|
if echo "$resp" | grep -q "choices"; then
|
||||||
|
echo "===> test_llama_stack_openai_chat_completion: pass"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
echo "===> test_llama_stack_openai_chat_completion: fail"
|
||||||
|
echo "Server logs:"
|
||||||
|
cat lls.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
echo "===> starting 'test-external-providers'..."
|
echo "===> starting 'test-external-providers'..."
|
||||||
start_and_wait_for_ramalama_server
|
start_and_wait_for_ramalama_server
|
||||||
test_ramalama_models
|
|
||||||
test_ramalama_chat_completion
|
test_ramalama_chat_completion
|
||||||
start_and_wait_for_llama_stack_server
|
start_and_wait_for_llama_stack_server
|
||||||
test_llama_stack_models
|
|
||||||
test_llama_stack_openai_models
|
|
||||||
test_llama_stack_chat_completion
|
test_llama_stack_chat_completion
|
||||||
test_llama_stack_openai_chat_completion
|
test_llama_stack_openai_chat_completion
|
||||||
echo "===> 'test-external-providers' completed successfully!"
|
echo "===> 'test-external-providers' completed successfully!"
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
||||||
# shellcheck disable=SC2153,SC2034
|
|
||||||
INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source "$TEST_UTILS/utils.sh"
|
|
||||||
main "$@"
|
main "$@"
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
@ -1,166 +0,0 @@
|
||||||
import os
|
|
||||||
import uuid
|
|
||||||
from llama_stack_client import LlamaStackClient, RAGDocument
|
|
||||||
|
|
||||||
|
|
||||||
def setup_client():
|
|
||||||
"""Initialize Llama Stack client with configuration"""
|
|
||||||
base_url = "http://localhost:8321"
|
|
||||||
|
|
||||||
client = LlamaStackClient(base_url=base_url, api_key="none", timeout=10.0)
|
|
||||||
|
|
||||||
print(f"Connected to Llama Stack server at {base_url}")
|
|
||||||
return client
|
|
||||||
|
|
||||||
|
|
||||||
def setup_inference_params():
|
|
||||||
"""Configure inference parameters"""
|
|
||||||
model_id = os.getenv(
|
|
||||||
"INFERENCE_MODEL",
|
|
||||||
"bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf",
|
|
||||||
)
|
|
||||||
|
|
||||||
temperature = float(os.getenv("TEMPERATURE", 0.0))
|
|
||||||
if temperature > 0.0:
|
|
||||||
top_p = float(os.getenv("TOP_P", 0.95))
|
|
||||||
strategy = {"type": "top_p", "temperature": temperature, "top_p": top_p}
|
|
||||||
else:
|
|
||||||
strategy = {"type": "greedy"}
|
|
||||||
|
|
||||||
max_tokens = int(os.getenv("MAX_TOKENS", 4096))
|
|
||||||
|
|
||||||
sampling_params = {
|
|
||||||
"strategy": strategy,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
}
|
|
||||||
|
|
||||||
stream_env = os.getenv("STREAM", "False")
|
|
||||||
stream = stream_env == "True"
|
|
||||||
|
|
||||||
print("Inference Parameters:")
|
|
||||||
print(f"\tModel: {model_id}")
|
|
||||||
print(f"\tSampling Parameters: {sampling_params}")
|
|
||||||
print(f"\tStream: {stream}")
|
|
||||||
|
|
||||||
return model_id, sampling_params, stream
|
|
||||||
|
|
||||||
|
|
||||||
def setup_vector_db(client):
|
|
||||||
"""Setup vector database for RAG"""
|
|
||||||
vector_db_id = f"test_vector_db_{uuid.uuid4().hex[:8]}"
|
|
||||||
|
|
||||||
# Find embedding model from available models
|
|
||||||
models = client.models.list()
|
|
||||||
embedding_model = None
|
|
||||||
for model in models:
|
|
||||||
if hasattr(model, "model_type") and model.model_type == "embedding":
|
|
||||||
embedding_model = model.identifier
|
|
||||||
break
|
|
||||||
|
|
||||||
if not embedding_model:
|
|
||||||
raise Exception("No embedding model found")
|
|
||||||
|
|
||||||
print(f"Using embedding model: {embedding_model}")
|
|
||||||
|
|
||||||
# Register vector database
|
|
||||||
client.vector_dbs.register(
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
embedding_model=embedding_model,
|
|
||||||
embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
|
|
||||||
provider_id=os.getenv("VDB_PROVIDER", "milvus"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ingest simple test documents instead of external URLs
|
|
||||||
test_content = [
|
|
||||||
"RamaLama Stack is an external provider for Llama Stack that allows for the use of RamaLama for inference.",
|
|
||||||
"Podman is a container management tool that provides a Docker-compatible command line interface without requiring a daemon.",
|
|
||||||
"Podman can run containers rootlessly and provides robust security isolation.",
|
|
||||||
]
|
|
||||||
|
|
||||||
documents = [
|
|
||||||
RAGDocument(
|
|
||||||
document_id=f"test_doc_{i}",
|
|
||||||
content=content,
|
|
||||||
mime_type="text/plain",
|
|
||||||
metadata={"source": f"test_document_{i}"},
|
|
||||||
)
|
|
||||||
for i, content in enumerate(test_content)
|
|
||||||
]
|
|
||||||
|
|
||||||
print(f"Ingesting {len(documents)} test documents into vector database...")
|
|
||||||
client.tool_runtime.rag_tool.insert(
|
|
||||||
documents=documents,
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 128)),
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Vector database '{vector_db_id}' setup complete")
|
|
||||||
return vector_db_id
|
|
||||||
|
|
||||||
|
|
||||||
def run_rag_query(client, model_id, sampling_params, stream, vector_db_id, query):
|
|
||||||
"""Execute RAG query and return response"""
|
|
||||||
print(f"\nUser> {query}")
|
|
||||||
|
|
||||||
rag_response = client.tool_runtime.rag_tool.query(
|
|
||||||
content=query, vector_db_ids=[vector_db_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
|
||||||
|
|
||||||
prompt_context = rag_response.content
|
|
||||||
extended_prompt = f"Please answer the given query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{query}"
|
|
||||||
messages.append({"role": "user", "content": extended_prompt})
|
|
||||||
|
|
||||||
response = client.inference.chat_completion(
|
|
||||||
messages=messages,
|
|
||||||
model_id=model_id,
|
|
||||||
sampling_params=sampling_params,
|
|
||||||
stream=stream,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("inference> ", end="")
|
|
||||||
if stream:
|
|
||||||
for chunk in response:
|
|
||||||
if hasattr(chunk, "event") and hasattr(chunk.event, "delta"):
|
|
||||||
if hasattr(chunk.event.delta, "text"):
|
|
||||||
print(chunk.event.delta.text, end="")
|
|
||||||
print()
|
|
||||||
else:
|
|
||||||
print(response.completion_message.content)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main function to run RAG test"""
|
|
||||||
print("=== Llama Stack RAG Test ===")
|
|
||||||
|
|
||||||
try:
|
|
||||||
client = setup_client()
|
|
||||||
model_id, sampling_params, stream = setup_inference_params()
|
|
||||||
|
|
||||||
vector_db_id = setup_vector_db(client)
|
|
||||||
|
|
||||||
queries = [
|
|
||||||
"What is RamaLama Stack?",
|
|
||||||
"What is Podman?",
|
|
||||||
"Can Podman run in rootless mode?",
|
|
||||||
]
|
|
||||||
|
|
||||||
print("\n=== Running RAG Queries ===")
|
|
||||||
for query in queries:
|
|
||||||
run_rag_query(
|
|
||||||
client, model_id, sampling_params, stream, vector_db_id, query
|
|
||||||
)
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("=== RAG Test Complete ===")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error: {e}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
exit(main())
|
|
|
@ -1,40 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
function test_rag_functionality {
|
|
||||||
echo "===> test_rag_functionality: start"
|
|
||||||
|
|
||||||
if uv run python tests/test-rag.py; then
|
|
||||||
echo "===> test_rag_functionality: pass"
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
echo "===> test_rag_functionality: fail"
|
|
||||||
echo "RAG test script output above shows the failure details"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
main() {
|
|
||||||
echo "===> starting 'test-rag'..."
|
|
||||||
|
|
||||||
# Check if services are already running (from previous tests)
|
|
||||||
if curl -s http://localhost:8321/v1/health >/dev/null 2>&1 && curl -s http://localhost:8080/health >/dev/null 2>&1; then
|
|
||||||
echo "Using existing RamaLama and Llama Stack servers"
|
|
||||||
else
|
|
||||||
echo "Starting fresh servers for RAG test"
|
|
||||||
start_and_wait_for_ramalama_server
|
|
||||||
start_and_wait_for_llama_stack_server
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test_rag_functionality; then
|
|
||||||
echo "===> 'test-rag' completed successfully!"
|
|
||||||
else
|
|
||||||
echo "===> 'test-rag' failed!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source "$TEST_UTILS/utils.sh"
|
|
||||||
main "$@"
|
|
||||||
exit 0
|
|
|
@ -1,77 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
function start_and_wait_for_streamlit_ui_linux {
|
|
||||||
echo "Starting Streamlit UI for Linux..."
|
|
||||||
|
|
||||||
podman run -d --rm --network=host --name=streamlit-ui quay.io/redhat-et/streamlit_client:0.1.0
|
|
||||||
|
|
||||||
echo "Waiting for Streamlit UI to be ready..."
|
|
||||||
for i in {1..30}; do
|
|
||||||
echo "Attempt $i to connect to Streamlit UI..."
|
|
||||||
if curl -s http://localhost:8501 >/dev/null 2>&1; then
|
|
||||||
echo "Streamlit UI is up and responding on port 8501!"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
if [ "$i" -eq 30 ]; then
|
|
||||||
echo "Streamlit UI failed to start or respond"
|
|
||||||
echo "Container logs:"
|
|
||||||
podman logs streamlit-ui
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
sleep 2
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_streamlit_ui_linux {
|
|
||||||
echo "===> test_streamlit_ui_linux: start"
|
|
||||||
|
|
||||||
if start_and_wait_for_streamlit_ui_linux; then
|
|
||||||
# Test that the UI is accessible and returns HTML content
|
|
||||||
resp=$(curl -sS http://localhost:8501)
|
|
||||||
if echo "$resp" | grep -q -i "streamlit\|html"; then
|
|
||||||
echo "===> test_streamlit_ui_linux: pass"
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
echo "===> test_streamlit_ui_linux: fail - UI not serving expected content"
|
|
||||||
echo "Response: $resp"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "===> test_streamlit_ui_linux: fail - UI failed to start"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function cleanup_streamlit_ui {
|
|
||||||
echo "Cleaning up Streamlit UI container..."
|
|
||||||
podman rm -f streamlit-ui >/dev/null 2>&1 || true
|
|
||||||
}
|
|
||||||
|
|
||||||
main() {
|
|
||||||
echo "===> starting 'test-ui-linux'..."
|
|
||||||
|
|
||||||
# Only run on Linux
|
|
||||||
# Need a fix to published ports in ramalama to run on MacOS
|
|
||||||
if [[ "$OSTYPE" != "linux-gnu"* ]]; then
|
|
||||||
echo "This test is only for Linux systems. Current OS: $OSTYPE"
|
|
||||||
echo "===> 'test-ui-linux' skipped!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
trap cleanup_streamlit_ui EXIT
|
|
||||||
|
|
||||||
start_and_wait_for_ramalama_server
|
|
||||||
start_and_wait_for_llama_stack_server
|
|
||||||
|
|
||||||
test_streamlit_ui_linux
|
|
||||||
|
|
||||||
cleanup_streamlit_ui
|
|
||||||
|
|
||||||
echo "===> 'test-ui-linux' completed successfully!"
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source "$TEST_UTILS/utils.sh"
|
|
||||||
main "$@"
|
|
||||||
exit 0
|
|
186
tests/utils.sh
186
tests/utils.sh
|
@ -1,186 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
function start_and_wait_for_ramalama_server {
|
|
||||||
# Start ramalama serve in background with logging to 'ramalama-$INFERENCE_MODEL_NO_COLON.log'
|
|
||||||
nohup uv run ramalama serve "$INFERENCE_MODEL" > "ramalama-$INFERENCE_MODEL_NO_COLON.log" 2>&1 &
|
|
||||||
RAMALAMA_PID=$!
|
|
||||||
echo "Started RamaLama with PID: $RAMALAMA_PID"
|
|
||||||
|
|
||||||
# Wait for ramalama to be ready by doing a health check
|
|
||||||
echo "Waiting for RamaLama server..."
|
|
||||||
for i in {1..60}; do
|
|
||||||
echo "Attempt $i to connect to RamaLama..."
|
|
||||||
resp=$(curl -s http://localhost:8080/health)
|
|
||||||
if [ "$resp" == '{"status":"ok"}' ]; then
|
|
||||||
echo "RamaLama server is up and responding!"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
if [ "$i" -eq 60 ]; then
|
|
||||||
echo "RamaLama server failed to start or respond"
|
|
||||||
echo "RamaLama logs:"
|
|
||||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
function start_and_wait_for_llama_stack_server {
|
|
||||||
# Start llama stack run with logging to 'lls-$INFERENCE_MODEL_NO_COLON.log'
|
|
||||||
LLAMA_STACK_LOG_FILE="lls-$INFERENCE_MODEL_NO_COLON.log" nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
|
|
||||||
LLS_PID=$!
|
|
||||||
echo "Started Llama Stack server with PID: $LLS_PID"
|
|
||||||
|
|
||||||
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
|
||||||
echo "Waiting for Llama Stack server..."
|
|
||||||
for i in {1..60}; do
|
|
||||||
echo "Attempt $i to connect to Llama Stack..."
|
|
||||||
resp=$(curl -s http://localhost:8321/v1/health)
|
|
||||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
|
||||||
echo "Llama Stack server is up!"
|
|
||||||
if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" "lls-$INFERENCE_MODEL_NO_COLON.log"; then
|
|
||||||
echo "Llama Stack server is using RamaLama provider"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "Llama Stack server is not using RamaLama provider"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "Llama Stack server failed to start"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
function start_and_wait_for_llama_stack_container {
|
|
||||||
# Start llama stack run
|
|
||||||
podman run \
|
|
||||||
-d \
|
|
||||||
--net=host \
|
|
||||||
--env INFERENCE_MODEL="$INFERENCE_MODEL" \
|
|
||||||
--env RAMALAMA_URL=http://0.0.0.0:8080 \
|
|
||||||
--name llama-stack \
|
|
||||||
quay.io/ramalama/llama-stack:latest
|
|
||||||
LLS_PID=$!
|
|
||||||
echo "Started Llama Stack container with PID: $LLS_PID"
|
|
||||||
|
|
||||||
# Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
|
|
||||||
echo "Waiting for Llama Stack server..."
|
|
||||||
for i in {1..60}; do
|
|
||||||
echo "Attempt $i to connect to Llama Stack..."
|
|
||||||
resp=$(curl -s http://localhost:8321/v1/health)
|
|
||||||
if [ "$resp" == '{"status":"OK"}' ]; then
|
|
||||||
echo "Llama Stack server is up!"
|
|
||||||
if podman logs llama-stack | grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml"; then
|
|
||||||
echo "Llama Stack server is using RamaLama provider"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "Llama Stack server is not using RamaLama provider"
|
|
||||||
echo "Container logs:"
|
|
||||||
podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "Llama Stack server failed to start"
|
|
||||||
echo "Container logs:"
|
|
||||||
podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_ramalama_models {
|
|
||||||
echo "===> test_ramalama_models: start"
|
|
||||||
# shellcheck disable=SC2016
|
|
||||||
resp=$(curl -sS http://localhost:8080/v1/models)
|
|
||||||
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
|
||||||
echo "===> test_ramalama_models: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_ramalama_models: fail"
|
|
||||||
echo "RamaLama logs:"
|
|
||||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_ramalama_chat_completion {
|
|
||||||
echo "===> test_ramalama_chat_completion: start"
|
|
||||||
# shellcheck disable=SC2016
|
|
||||||
resp=$(curl -sS -X POST http://localhost:8080/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
|
||||||
if echo "$resp" | grep -q "choices"; then
|
|
||||||
echo "===> test_ramalama_chat_completion: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_ramalama_chat_completion: fail"
|
|
||||||
echo "RamaLama logs:"
|
|
||||||
cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_llama_stack_models {
|
|
||||||
echo "===> test_llama_stack_models: start"
|
|
||||||
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
|
||||||
if nohup uv run llama-stack-client models list | grep -q "$INFERENCE_MODEL"; then
|
|
||||||
echo "===> test_llama_stack_models: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_llama_stack_models: fail"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_llama_stack_openai_models {
|
|
||||||
echo "===> test_llama_stack_openai_models: start"
|
|
||||||
# shellcheck disable=SC2016
|
|
||||||
resp=$(curl -sS http://localhost:8321/v1/openai/v1/models)
|
|
||||||
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
|
|
||||||
echo "===> test_llama_stack_openai_models: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_llama_stack_openai_models: fail"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_llama_stack_chat_completion {
|
|
||||||
echo "===> test_llama_stack_chat_completion: start"
|
|
||||||
nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
|
|
||||||
if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
|
|
||||||
echo "===> test_llama_stack_chat_completion: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_llama_stack_chat_completion: fail"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_llama_stack_openai_chat_completion {
|
|
||||||
echo "===> test_llama_stack_openai_chat_completion: start"
|
|
||||||
# shellcheck disable=SC2016
|
|
||||||
resp=$(curl -sS -X POST http://localhost:8321/v1/openai/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
|
|
||||||
if echo "$resp" | grep -q "choices"; then
|
|
||||||
echo "===> test_llama_stack_openai_chat_completion: pass"
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "===> test_llama_stack_openai_chat_completion: fail"
|
|
||||||
echo "Server logs:"
|
|
||||||
cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
Loading…
Reference in New Issue