chore: update README for 0.2.1 release (#91 )

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
chore(deps): Bump hynek/build-and-inspect-python-package (#90 )
2025-06-16 09:53:14 -04:00 · 2025-06-16 09:40:14 -04:00 · 2025-06-16 09:36:15 -04:00 · 2025-06-12 11:53:37 -04:00 · 2025-06-05 14:51:17 -04:00 · 2025-06-05 13:18:15 -04:00
28 changed files with 3955 additions and 1731 deletions
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -2,8 +2,21 @@ name: Pre-commit

 on:
  pull_request:
+    branches:
+      - main
  push:
-    branches: [main]
+    branches:
+      - main
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@ -11,27 +24,43 @@ concurrency:

 jobs:
  pre-commit:
+    name: pre-commit
    runs-on: ubuntu-latest
-
    steps:
      - name: Harden Runner
-        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
        with:
          egress-policy: audit

-      - name: Checkout code
+      - name: Checkout containers/ramalama-stack
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          # https://github.com/actions/checkout/issues/249
+          fetch-depth: 0

      - name: Set up Python
        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
-          python-version: '3.11'
+          python-version: "3.11"
          cache: pip
          cache-dependency-path: |
+            **/requirements*.txt
            .pre-commit-config.yaml

-      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+      - name: Run pre-commit
+        uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+        env:
+          SKIP: no-commit-to-branch
+          RUFF_OUTPUT_FORMAT: github

      - name: Verify if there are any diff files after pre-commit
+        run: git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
+
+      - name: Verify if there are any new files after pre-commit
        run: |
-          git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
+          unstaged_files=$(git ls-files --others --exclude-standard)
+          if [ -n "$unstaged_files" ]; then
+            echo "There are uncommitted new files, run pre-commit locally and commit again"
+            echo "$unstaged_files"
+            exit 1
+          fi
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0

-name: Build, test, and upload PyPI package
+name: Build and publish PyPI package

 on:
  push:
@ -37,23 +37,27 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Harden Runner
-        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
        with:
          egress-policy: audit

-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Checkout containers/ramalama-stack
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
-          # for setuptools-scm
+          # https://github.com/actions/checkout/issues/249
          fetch-depth: 0

-      - uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
+      - name: Build and inspect python package
+        uses: hynek/build-and-inspect-python-package@c52c3a4710070b50470d903818a7b25115dcd076 # v2.13.0
+
+      - name: Run 'test-build.sh'
+        run: $GITHUB_WORKSPACE/tests/test-build.sh

  # push to Test PyPI on
  # - a new GitHub release is published
  # - a PR is merged into main branch
  publish-test-pypi:
    name: Publish packages to test.pypi.org
-    # environment: publish-test-pypi
    if: |
      github.repository_owner == 'containers' && (
        github.event.action == 'published' ||
@ -65,10 +69,9 @@ jobs:
      id-token: write
    runs-on: ubuntu-latest
    needs: build-package
-
    steps:
      - name: Harden Runner
-        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
        with:
          egress-policy: audit

@ -88,7 +91,6 @@ jobs:
  # - a new GitHub release is published
  publish-pypi:
    name: Publish release to pypi.org
-    # environment: publish-pypi
    if: |
      github.repository_owner == 'containers' && github.event.action == 'published'
    permissions:
@ -96,13 +98,11 @@ jobs:
      id-token: write
      # allow gh release upload
      contents: write
-
    runs-on: ubuntu-latest
    needs: build-package
-
    steps:
      - name: Harden Runner
-        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
        with:
          egress-policy: audit

--- a/.github/workflows/test-container.yml
+++ b/.github/workflows/test-container.yml
@ -0,0 +1,79 @@
+name: Test Container
+
+on:
+  workflow_dispatch:
+    inputs:
+      inference_model:
+        description: Model to download and inference via RamaLama
+        required: false
+        default: llama3.2:3b
+  schedule:
+    - cron: '0 11 * * *' # Runs at 11AM UTC every morning
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  test-lls-integration:
+    name: test-container
+    runs-on: ubuntu-latest
+    env:
+      INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
+        with:
+          egress-policy: audit
+
+      - name: Checkout containers/ramalama-stack
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          # https://github.com/actions/checkout/issues/249
+          fetch-depth: 0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
+        with:
+          python-version: "3.11"
+
+      - name: Set Up Environment and Install Dependencies
+        run: |
+          # install podman
+          sudo apt-get -y install podman
+
+          # install packaged version of ramalama
+          uv venv
+          uv pip install ramalama
+
+      - name: Cache Ramalama store
+        id: ramalama-store-cache
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: ~/.local/share/ramalama
+          key: ramalama-store-${{ env.INFERENCE_MODEL }}
+
+      - name: Download model to serve with Ramalama
+        if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
+        run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
+
+      - name: Run 'test-container.sh'
+        run: $GITHUB_WORKSPACE/tests/test-container.sh
+
+      - name: Run 'test-ui-linux.sh'
+        run: $GITHUB_WORKSPACE/tests/test-ui-linux.sh
+
+      - name: Upload logs
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        if: always()
+        with:
+          name: logs-test-container
+          retention-days: 5
+          path: |
+            **/*.log
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@ -1,34 +1,82 @@
 name: Test External Providers

 on:
+  workflow_dispatch:
  push:
-    branches: [ main ]
+    branches:
+      - main
+    paths:
+      - 'src/ramalama_stack/**'
+      - 'tests/**'
+      - '.github/workflows/test-external-providers.yml'
+      - pyproject.toml
+      - requirements.txt
+      - uv.lock
  pull_request:
-    branches: [ main ]
+    branches:
+      - main
+    paths:
+      - 'src/ramalama_stack/**'
+      - 'tests/**'
+      - '.github/workflows/test-external-providers.yml'
+      - pyproject.toml
+      - requirements.txt
+      - uv.lock
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read

 jobs:
  test-external-providers:
+    name: test-external-providers
    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        inference_model:
+          - 'llama3.2:3b'
+          - 'granite3.2:2b'
    env:
-      INFERENCE_MODEL: llama3.2:3b-instruct-fp16
+      INFERENCE_MODEL: ${{ matrix.inference_model }}
    steps:
      - name: Harden Runner
-        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
        with:
          egress-policy: audit

-      - name: Checkout repository
+      - name: Set INFERENCE_MODEL_NO_COLON for logging artifacts
+        run: echo "INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')" >> $GITHUB_ENV
+
+      - name: Checkout containers/ramalama-stack
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          # https://github.com/actions/checkout/issues/249
+          fetch-depth: 0

      - name: Install uv
-        uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
+        uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
        with:
-          python-version: "3.10"
+          python-version: "3.11"

      - name: Set Up Environment and Install Dependencies
        run: |
          uv sync
-          uv pip install -e .
+
+          # temporary hack for file writing that should be done by the pip setup script
+          # https://github.com/containers/ramalama-stack/issues/53
+          mkdir -p ~/.llama/distributions/ramalama/
+          cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
+          cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
+
+      - name: Run 'test-build.sh'
+        run: $GITHUB_WORKSPACE/tests/test-build.sh

      - name: Cache Ramalama store
        id: ramalama-store-cache
@ -44,11 +92,14 @@ jobs:
      - name: Run 'test-external-providers.sh'
        run: $GITHUB_WORKSPACE/tests/test-external-providers.sh

+      - name: Run 'test-rag.sh'
+        run: $GITHUB_WORKSPACE/tests/test-rag.sh
+
      - name: Upload logs
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        if: always()
        with:
-          name: logs
+          name: logs-test-external-providers-${{ env.INFERENCE_MODEL_NO_COLON }}
          retention-days: 5
          path: |
            **/*.log
--- a/.github/workflows/test-lls-integration.yml
+++ b/.github/workflows/test-lls-integration.yml
@ -0,0 +1,86 @@
+name: Test LLS Integration
+
+on:
+  workflow_dispatch:
+    inputs:
+      inference_model:
+        description: Model to download and inference via RamaLama
+        required: false
+        default: llama3.2:3b
+  schedule:
+    - cron: '0 11 * * *' # Runs at 11AM UTC every morning
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  test-lls-integration:
+    name: test-lls-integration
+    runs-on: ubuntu-latest
+    env:
+      INFERENCE_MODEL: ${{ inputs.inference_model || 'llama3.2:3b' }}
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1
+        with:
+          egress-policy: audit
+
+      - name: Checkout containers/ramalama-stack
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          # https://github.com/actions/checkout/issues/249
+          fetch-depth: 0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
+        with:
+          python-version: "3.11"
+
+      - name: Set Up Environment and Install Dependencies
+        run: |
+          uv venv
+
+          # install packaged version of ramalama-stack
+          uv pip install ramalama-stack
+
+          # update llama-stack version to main branch
+          uv pip install git+https://github.com/meta-llama/llama-stack.git@main
+
+          # temporary hack for file writing that should be done by the pip setup script
+          # https://github.com/containers/ramalama-stack/issues/53
+          mkdir -p ~/.llama/distributions/ramalama/
+          cp -r $GITHUB_WORKSPACE/src/ramalama_stack/providers.d/ ~/.llama/
+          cp $GITHUB_WORKSPACE/src/ramalama_stack/ramalama-run.yaml ~/.llama/distributions/ramalama/ramalama-run.yaml
+
+      - name: Run 'test-build.sh'
+        run: $GITHUB_WORKSPACE/tests/test-build.sh
+
+      - name: Cache Ramalama store
+        id: ramalama-store-cache
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: ~/.local/share/ramalama
+          key: ramalama-store-${{ env.INFERENCE_MODEL }}
+
+      - name: Download model to serve with Ramalama
+        if: ${{ steps.ramalama-store-cache.outputs.cache-hit != 'true' }}
+        run: uv run ramalama pull ${{ env.INFERENCE_MODEL }}
+
+      - name: Run 'test-external-providers.sh'
+        run: $GITHUB_WORKSPACE/tests/test-external-providers.sh
+
+      - name: Upload logs
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        if: always()
+        with:
+          name: logs-test-lls-integration
+          retention-days: 5
+          path: |
+            **/*.log
--- a/.gitignore
+++ b/.gitignore
@ -176,3 +176,4 @@ cython_debug/
 # Anything additional
 distributions/
 src/ramalama_stack/_version.py
+.python-version
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -16,14 +16,14 @@ repos:
    -   id: check-shebang-scripts-are-executable

 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.4
+    rev: v0.11.12
    hooks:
    -   id: ruff
        args: [ --fix ]
    -   id: ruff-format

 -   repo: https://github.com/astral-sh/uv-pre-commit
-    rev: 0.6.3
+    rev: 0.7.9
    hooks:
    -   id: uv-lock
    -   id: uv-export
@ -31,9 +31,20 @@ repos:
            "--frozen",
            "--no-hashes",
            "--no-emit-project",
+            "--no-default-groups",
+            "--output-file=requirements.txt"
        ]

 -   repo: https://github.com/koalaman/shellcheck-precommit
    rev: v0.10.0
    hooks:
    -   id: shellcheck
+
+-   repo: https://github.com/rhysd/actionlint
+    rev: v1.7.7
+    hooks:
+      - id: actionlint
+        args: [
+            "-shellcheck=",
+            "-pyflakes="
+        ]
--- a/.python-version
+++ b/.python-version
@ -1 +0,0 @@
-3.10
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,316 @@
+# Contributing to ramalama-stack
+
+We'd love to have you join the community!
+Below summarizes the processes that we follow.
+
+## Topics
+`
+* [Reporting Issues](#reporting-issues)
+* [Working On Issues](#working-on-issues)
+* [Contributing To ramalama-stack](#contributing-to-ramalama-stack-1)
+* [Submitting Pull Requests](#submitting-pull-requests)
+* [Communications](#communications)
+* [Code of Conduct](#code-of-conduct)
+
+
+## Reporting Issues
+
+Before reporting an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) to see if someone else has already reported it.
+If so, feel free to add your scenario, or additional information, to the discussion.
+Or simply "subscribe" to it to be notified when it is updated.
+Please do not add comments like "+1" or "I have this issue as well" without adding any new information.
+Instead, please add a thumbs-up emoji to the original report.
+
+Note: Older closed issues/PRs are automatically locked.
+If you have a similar problem please open a new issue instead of commenting.
+
+If you find a new issue with the project we'd love to hear about it!
+The most important aspect of a bug report is that it includes enough information for us to reproduce it.
+To make this easier, there are three types of issue templates you can use.
+* If you have a bug to report, please use *Bug Report* template.
+* If you have an idea to propose, please use the *Feature Request* template.
+* If your issue is something else, please use the default *Blank issue* template.
+
+Please include as much detail as possible, including all requested fields in the template.
+Not having all requested information makes it much harder to find and fix issues.
+A reproducer is the best thing you can include.
+Reproducers make finding and fixing issues much easier for maintainers.
+The easier it is for us to reproduce a bug, the faster it'll be fixed!
+
+Please don't include any private/sensitive information in your issue!
+Security issues should NOT be reported via Github and should instead be reported via the process described [here](https://github.com/containers/common/blob/main/SECURITY.md).
+
+## Working On Issues
+
+Once you have decided to contribute to ramalama-stack by working on an issue, check our backlog of [open issues](https://github.com/containers/ramalama-stack/issues) looking for any that are unassigned.
+If you want to work on a specific issue that is already assigned but does not appear to be actively being worked on, please ping the assignee in the issue and ask if you can take over.
+If they do not respond after several days, you can notify a maintainer to have the issue reassigned.
+When working on an issue, please assign it to yourself.
+If you lack permissions to do so, you can ping the `@containers/ramalama-stack-maintainers` group to have a maintainer set you as assignee.
+
+## Contributing To ramalama-stack
+
+This section describes how to make a contribution to ramalama-stack.
+
+### Prepare your environment
+
+The minimum version of Python required to use ramalama-stack is Python 3.11
+
+### Fork and clone ramalama-stack
+
+First, you need to fork this project on GitHub.
+Then clone your fork locally:
+```shell
+$ git clone git@github.com:<you>/ramalama-stack
+$ cd ./ramalama-stack/
+```
+
+### Install required tools
+
+We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
+You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
+
+You can install the dependencies by running:
+
+```bash
+cd ramalama-stack
+uv sync
+source .venv/bin/activate
+```
+
+> [!NOTE]
+> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.11`)
+> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
+> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
+
+### Adding dependencies
+
+Please add dependencies using the [uv-documented approach](https://docs.astral.sh/uv/concepts/projects/dependencies/#adding-dependencies).
+
+This should update both the `pyproject.toml` and the `uv.lock` file.
+
+The `requirements.txt` file should be updated as well by `pre-commit` - you can also do this manually via `uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt`.
+
+## Testing
+
+ramalama-stack provides a small suite of tests in the `test/` directory.
+Most pull requests should be accompanied by test changes covering the changes in the PR.
+Pull requests without tests will receive additional scrutiny from maintainers and may be blocked from merging unless tests are added.
+Maintainers will decide if tests are not necessary during review.
+
+### Types of Tests
+
+There are several types of tests run by ramalama-stack's upstream CI.
+* Pre-commit checks
+* Functional testing
+* Integration testing
+* PyPI build and upload testing
+
+## Documentation
+
+Make sure to update the documentation if needed.
+ramalama-stack is documented via its [README](https://github.com/containers/ramalama-stack/blob/main/docs/README.md) and files in the `docs/` directory.
+
+## Submitting Pull Requests
+
+No Pull Request (PR) is too small!
+Typos, additional comments in the code, new test cases, bug fixes, new features, more documentation, ... it's all welcome!
+
+While bug fixes can first be identified via an "issue" in Github, that is not required.
+It's ok to just open up a PR with the fix, but make sure you include the same information you would have included in an issue - like how to reproduce it.
+
+PRs for new features should include some background on what use cases the new code is trying to address.
+When possible and when it makes sense, try to break up larger PRs into smaller ones - it's easier to review smaller code changes.
+But only if those smaller ones make sense as stand-alone PRs.
+
+Regardless of the type of PR, all PRs should include:
+* Well-documented code changes, both through comments in the code itself and high-quality commit messages.
+* Additional tests. Ideally, they should fail w/o your code change applied.
+* Documentation updates to reflect the changes made in the pull request.
+
+Squash your commits into logical pieces of work that might want to be reviewed separately from the rest of the PRs.
+Squashing down to just one commit is also acceptable since in the end the entire PR will be reviewed anyway.
+When in doubt, squash.
+
+When your PR fixes an issue, please note that by including `Fixes: #00000` in the commit description.
+More details on this are below, in the "Describe your changes in Commit Messages" section.
+
+The ramalama-stack repo follows a one-ack policy for merges.
+PRs will be approved and merged by a repo owner.
+Two reviews are required for a pull request to merge, including sourcery.ai
+
+### Describe your Changes in Commit Messages
+
+Describe your problem.
+Whether your patch is a one-line bug fix or 5000 lines of a new feature, there must be an underlying problem that motivated you to do this work.
+Convince the reviewer that there is a problem worth fixing and that it makes sense for them to read past the first paragraph.
+
+Describe user-visible impact.
+Straight up crashes and lockups are pretty convincing, but not all bugs are that blatant.
+Even if the problem was spotted during code review, describe the impact you think it can have on users.
+Keep in mind that the majority of users run packages provided by distributions, so include anything that could help route your change downstream.
+
+Quantify optimizations and trade-offs.
+If you claim improvements in performance, memory consumption, stack footprint, or binary size, include
+numbers that back them up.
+But also describe non-obvious costs.
+Optimizations usually aren’t free but trade-offs between CPU, memory, and readability; or, when it comes to heuristics, between different workloads.
+Describe the expected downsides of your optimization so that the reviewer can weigh costs against
+benefits.
+
+Once the problem is established, describe what you are actually doing about it in technical detail.
+It’s important to describe the change in plain English for the reviewer to verify that the code is behaving as you intend it to.
+
+Solve only one problem per patch.
+If your description starts to get long, that’s a sign that you probably need to split up your patch.
+
+If the patch fixes a logged bug entry, refer to that bug entry by number and URL.
+If the patch follows from a mailing list discussion, give a URL to the mailing list archive.
+Please format these lines as `Fixes:` followed by the URL or, for Github bugs, the bug number preceded by a #.
+For example:
+
+```
+Fixes: #00000
+Fixes: https://github.com/containers/ramalama-stack/issues/00000
+Fixes: https://issues.redhat.com/browse/RHEL-00000
+Fixes: RHEL-00000
+```
+
+However, try to make your explanation understandable without external resources.
+In addition to giving a URL to a mailing list archive or bug, summarize the relevant points of the discussion that led to the patch as submitted.
+
+If you want to refer to a specific commit, don’t just refer to the SHA-1 ID of the commit.
+Please also include the one-line summary of the commit, to make it easier for reviewers to know what it is about. If the commit was merged in GitHub, referring to a GitHub PR number is also a good option, as that will retain all discussion from development, and makes including a summary less critical.
+Examples:
+
+```
+Commit f641c2d9384e ("fix bug in rm -fa parallel deletes") [...]
+PR #00000
+```
+
+When referring to a commit by SHA, you should also be sure to use at least the first twelve characters of the SHA-1 ID.
+The ramalama-stack repository holds a lot of objects, making collisions with shorter IDs a real possibility.
+Bear in mind that, even if there is no collision with your six-character ID now, that condition may change five years from now.
+
+The following git config settings can be used to add a pretty format for outputting the above style in the git log or git show commands:
+
+```
+[core]
+	abbrev = 12
+[pretty]
+	fixes = Fixes: %h (\"%s\")
+```
+
+### Sign your PRs
+
+The sign-off is a line at the end of the explanation for the patch.
+Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
+The rules are simple: if you can certify the below (from [developercertificate.org](https://developercertificate.org/)):
+
+```
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+Then you just add a line to every git commit message:
+
+    Signed-off-by: Joe Smith <joe.smith@email.com>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`.
+
+### Continuous Integration
+
+All pull requests automatically run ramalama-stack's test suite.
+
+There is always additional complexity added by automation, and so it sometimes can fail for any number of reasons.
+This includes post-merge testing on all branches, which you may occasionally see [red bars on the status graph](https://github.com/containers/ramalama-stack/blob/main/docs/ci.md).
+
+Most notably, the tests will occasionally flake.
+If you see a single test on your PR has failed, and you do not believe it is caused by your changes, you can rerun the tests.
+If you lack permissions to rerun the tests, please ping the maintainers using the `@containers/ramalama-stack-maintainers` group and request that the failing test be rerun.
+
+If you see multiple test failures, you may wish to check the status graph mentioned above.
+When the graph shows mostly green bars on the right, it's a good indication the main branch is currently stable.
+Alternating red/green bars is indicative of a testing "flake", and should be examined (anybody can do this):
+
+* *One or a small handful of tests, on a single task, (i.e. specific distro/version)
+  where all others ran successfully:*  Frequently the cause is networking or a brief
+  external service outage.  The failed tasks may simply be re-run by pressing the
+  corresponding button on the task details page.
+
+* *Multiple tasks failing*: Logically this should be due to some shared/common element.
+  If that element is identifiable as a networking or external service (e.g. packaging
+  repository outage), a re-run should be attempted.
+
+* *All tasks are failing*: If a common element is **not** identifiable as
+  temporary (i.e. container registry outage), please seek assistance via
+  [the methods below](#communications) as this may be early indication of
+  a more serious problem.
+
+In the (hopefully) rare case there are multiple, contiguous red bars, this is
+a ***very bad*** sign.  It means additional merges are occurring despite an uncorrected
+or persistently faulty condition.  This risks additional bugs being introduced
+and further complication of necessary corrective measures.  Most likely people
+are aware and working on this, but it doesn't hurt [to confirm and/or try and help
+if possible.](#communications).
+
+## Communications
+
+If you need help, you can contact the maintainers using the channels mentioned in RamaLama's [communications](https://github.com/containers/ramalama/blob/main/README.md#community) document.
+
+For discussions around issues/bugs and features, you can use the GitHub
+[issues](https://github.com/containers/ramalama-stack/issues)
+and
+[PRs](https://github.com/containers/ramalama-stack/pulls)
+tracking system.
+
+## Code of Conduct
+
+As contributors and maintainers of the projects under the [Containers](https://github.com/containers) repository,
+and in the interest of fostering an open and welcoming community, we pledge to
+respect all people who contribute through reporting issues, posting feature
+requests, updating documentation, submitting pull requests or patches, and other
+activities to any of the projects under the containers umbrella. The full code of conduct guidelines can be
+found [here](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md).
+
+
+### Bot Interactions
+
+ramalama-stack uses [sourcery.ai](https://sourcery.ai/) for AI code reviews.
+
+You can read their docs [here](https://docs.sourcery.ai/Code-Review/#interacting-with-sourcery) on how to interact with the bot.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,2 @@
+recursive-include ramalama_stack/providers.d *
+include ramalama_stack/ramalama-run.yaml
--- a/README.md
+++ b/README.md
@ -1,7 +1,80 @@
 # ramalama-stack

-[![PyPI version](https://img.shields.io/pypi/v/ramalama-stack.svg)](https://pypi.org/project/ramalama-stack/)
-[![License](https://img.shields.io/pypi/l/ramalama-stack.svg)](https://github.com/containers/ramalama-stack/blob/main/LICENSE)
-![Pre-Commit](https://github.com/containers/ramalama-stack/actions/workflows/pre-commit.yml/badge.svg?branch=main)
-![Test External Providers](https://github.com/containers/ramalama-stack/actions/workflows/test-external-providers.yml/badge.svg?branch=main)
-![PyPI](https://github.com/containers/ramalama-stack/actions/workflows/pypi.yml/badge.svg?branch=main)
+[![PyPI version](https://img.shields.io/pypi/v/ramalama_stack.svg)](https://pypi.org/project/ramalama-stack/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/ramalama-stack)](https://pypi.org/project/ramalama-stack/)
+[![License](https://img.shields.io/pypi/l/ramalama_stack.svg)](https://github.com/containers/ramalama-stack/blob/main/LICENSE)
+
+An external provider for [Llama Stack](https://github.com/meta-llama/llama-stack) allowing for the use of [RamaLama](https://ramalama.ai/) for inference.
+
+## Installing
+
+You can install `ramalama-stack` from PyPI via `pip install ramalama-stack`
+
+This will install Llama Stack and RamaLama as well if they are not installed already.
+
+## Usage
+
+> [!WARNING]
+> The following workaround is currently needed to run this provider - see https://github.com/containers/ramalama-stack/issues/53 for more details
+> ```bash
+> curl --create-dirs --output ~/.llama/providers.d/remote/inference/ramalama.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
+> curl --create-dirs --output ~/.llama/distributions/ramalama/ramalama-run.yaml https://raw.githubusercontent.com/containers/ramalama-stack/refs/tags/v0.2.1/src/ramalama_stack/ramalama-run.yaml
+> ```
+
+1. First you will need a RamaLama server running - see [the RamaLama project](https://github.com/containers/ramalama) docs for more information.
+
+2. Ensure you set your `INFERENCE_MODEL` environment variable to the name of the model you have running via RamaLama.
+
+3. You can then run the RamaLama external provider via `llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml`
+
+> [!NOTE]
+> You can also run the RamaLama external provider inside of a container via [Podman](https://podman.io/)
+> ```bash
+> podman run \
+>  --net=host \
+>  --env RAMALAMA_URL=http://0.0.0.0:8080 \
+>  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+>  quay.io/ramalama/llama-stack
+> ```
+
+This will start a Llama Stack server which will use port 8321 by default. You can test this works by configuring the Llama Stack Client to run against this server and
+sending a test request.
+- If your client is running on the same machine as the server, you can run `llama-stack-client configure --endpoint http://0.0.0.0:8321 --api-key none`
+- If your client is running on a different machine, you can run `llama-stack-client configure --endpoint http://<hostname>:8321 --api-key none`
+- The client should give you a message similar to `Done! You can now use the Llama Stack Client CLI with endpoint <endpoint>`
+- You can then test the server by running `llama-stack-client inference chat-completion --message "tell me a joke"` which should return something like
+
+```bash
+ChatCompletionResponse(
+    completion_message=CompletionMessage(
+        content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs
+and Schrödinger\'s cat?" The librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."',
+        role='assistant',
+        stop_reason='end_of_turn',
+        tool_calls=[]
+    ),
+    logprobs=None,
+    metrics=[
+        Metric(metric='prompt_tokens', value=14.0, unit=None),
+        Metric(metric='completion_tokens', value=63.0, unit=None),
+        Metric(metric='total_tokens', value=77.0, unit=None)
+    ]
+)
+```
+
+## Llama Stack User Interface
+
+Llama Stack includes an experimental user-interface, check it out
+[here](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distribution/ui).
+
+To deploy the UI, run this:
+
+```bash
+podman run -d --rm --network=container:ramalama --name=streamlit quay.io/redhat-et/streamlit_client:0.1.0
+```
+
+> [!NOTE]
+> If running on MacOS (not Linux), `--network=host` doesn't work. You'll need to publish additional ports `8321:8321` and `8501:8501` with the ramalama serve command,
+> then run with `network=container:ramalama`.
+>
+> If running on Linux use `--network=host` or `-p 8501:8501` instead. The streamlit container will be able to access the ramalama endpoint with either.
--- a/docs/ci.md
+++ b/docs/ci.md
@ -0,0 +1,9 @@
+# ramalama-stack CI
+
+| Job | Description | Status |
+| --- | ----------- | ------ |
+| [Pre-commit](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pre-commit.yml) | Runs pre-commit checks | ![Pre-commit](https://github.com/containers/ramalama-stack/actions/workflows/pre-commit.yml/badge.svg?branch=main) |
+| [Test External Providers](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-external-providers.yml) | Tests the current `ramalama-stack` branch against the latest released versions of `ramalama` and `llama-stack` | ![Test External Providers](https://github.com/containers/ramalama-stack/actions/workflows/test-external-providers.yml/badge.svg?branch=main) |
+| [Test LLS Integration](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-lls-integration.yml) | Tests the latest released versions of `ramalama` and `ramalama-stack` against the current `llama-stack` main branch | ![Test LLS Integration](https://github.com/containers/ramalama-stack/actions/workflows/test-lls-integration.yml/badge.svg?branch=main) |
+| [Test Container](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/test-container.yml) | Tests the latest tagged container image of `ramalama/llama-stack` run via Podman | ![Test Container](https://github.com/containers/ramalama-stack/actions/workflows/test-container.yml/badge.svg?branch=main) |
+| [Build and publish PyPI package](https://github.com/containers/ramalama-stack/blob/main/.github/workflows/pypi.yml) | Builds, tests, and publishes `ramalama-stack` package | ![Build and publish PyPI package](https://github.com/containers/ramalama-stack/actions/workflows/pypi.yml/badge.svg?branch=main) |
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,32 +4,47 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "ramalama-stack"
-version = "0.1.1"
-description = "Llama Stack Provider for Ramalama Inference"
+description = "An external provider for Llama Stack allowing for the use of RamaLama for inference."
+authors = [{ name = "The RamaLama Stack Authors" }]
 readme = "README.md"
 license = "Apache-2.0"
 license-files = ["LICENSE"]
 keywords = ["ramalama", "llama", "AI"]
-requires-python = ">=3.10"
+requires-python = ">=3.11"
+dynamic = ["version"]
 dependencies = [
-    "llama-stack>=0.2.3",
-    "ramalama>=0.7.5",
-    "urllib3",
-    "faiss-cpu",
-    "autoevals",
-    "six",
-    "pydantic",
-    "aiohttp",
-    "aiosqlite",
-    "datasets",
-    "fastapi",
-    "httpx",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "requests",
-    "uvicorn",
+    "aiohttp>=3.12.2",
+    "aiosqlite>=0.21.0",
+    "autoevals>=0.0.129",
+    "blobfile>=3.0.0",
+    "chardet>=3.0.0",
+    "datasets>=3.6.0",
+    "fastapi>=0.115.12",
+    "httpx>=0.28.1",
+    "llama-stack==0.2.9",
+    "mcp>=1.9.2",
+    "numpy>=2.2.6",
+    "openai>=1.82.0",
+    "opentelemetry-exporter-otlp-proto-http>=1.33.1",
+    "opentelemetry-sdk>=1.33.1",
+    "peft>=0.15.2",
+    "psutil>=7.0.0",
+    "pydantic>=2.11.5",
+    "pymilvus>=2.5.10",
+    "ramalama==0.9.0",
+    "requests>=2.32.3",
+    "sentence-transformers>=3.0.0",
+    "six>=1.17.0",
+    "sqlalchemy>=2.0.41",
+    "torch>=2.7.0",
+    "trl>=0.18.1",
+    "urllib3>=2.4.0",
+    "uvicorn>=0.34.2",
+]
+
+[dependency-groups]
+dev = [
+    "pre-commit>=3.0.4,<4.0",
 ]

 [project.urls]
@ -44,9 +59,10 @@ local_scheme = "no-local-version"

 [tool.setuptools]
 package-dir = { "" = "src" }
+include-package-data = true

-[tool.setuptools.dynamic]
-dependencies = { file = ["requirements.txt"] }
+[tool.setuptools.package-data]
+"ramalama_stack" = ["providers.d/**/*", "ramalama-run.yaml"]

 [tool.ruff]
 extend-exclude = ["*.ipynb"]
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,5 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-
-r requirements.txt
-
-pre-commit>=3.0.4,<4.0
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,484 @@
-ramalama>=0.7.5
-llama-stack>=0.2.3
-llama-stack-client>=0.2.2
+# This file was autogenerated by uv via the following command:
+#    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
+accelerate==1.7.0
+    # via
+    #   peft
+    #   trl
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.7
+    # via
+    #   fsspec
+    #   llama-stack
+    #   ramalama-stack
+aiosignal==1.3.2
+    # via aiohttp
+aiosqlite==0.21.0
+    # via ramalama-stack
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   httpx
+    #   llama-stack-client
+    #   mcp
+    #   openai
+    #   sse-starlette
+    #   starlette
+argcomplete==3.6.2
+    # via ramalama
+attrs==25.3.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+autoevals==0.0.129
+    # via ramalama-stack
+blobfile==3.0.0
+    # via ramalama-stack
+braintrust-core==0.0.59
+    # via autoevals
+certifi==2025.4.26
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+chardet==5.2.0
+    # via ramalama-stack
+charset-normalizer==3.4.2
+    # via requests
+chevron==0.14.0
+    # via autoevals
+click==8.2.1
+    # via
+    #   llama-stack-client
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+datasets==3.6.0
+    # via
+    #   ramalama-stack
+    #   trl
+deprecated==1.2.18
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+distro==1.9.0
+    # via
+    #   llama-stack-client
+    #   openai
+ecdsa==0.19.1
+    # via python-jose
+fastapi==0.115.12
+    # via ramalama-stack
+filelock==3.18.0
+    # via
+    #   blobfile
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+fire==0.7.0
+    # via llama-stack
+frozenlist==1.6.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.3.0
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+googleapis-common-protos==1.70.0
+    # via opentelemetry-exporter-otlp-proto-http
+greenlet==3.2.2 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
+    # via sqlalchemy
+grpcio==1.67.1
+    # via pymilvus
+h11==0.16.0
+    # via
+    #   httpcore
+    #   llama-stack
+    #   uvicorn
+hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   llama-stack
+    #   llama-stack-client
+    #   mcp
+    #   openai
+    #   ramalama-stack
+httpx-sse==0.4.0
+    # via mcp
+huggingface-hub==0.32.4
+    # via
+    #   accelerate
+    #   datasets
+    #   llama-stack
+    #   peft
+    #   sentence-transformers
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+importlib-metadata==8.6.1
+    # via opentelemetry-api
+jinja2==3.1.6
+    # via
+    #   llama-stack
+    #   torch
+jiter==0.10.0
+    # via openai
+joblib==1.5.1
+    # via scikit-learn
+jsonschema==4.24.0
+    # via
+    #   autoevals
+    #   llama-stack
+jsonschema-specifications==2025.4.1
+    # via jsonschema
+llama-stack==0.2.9
+    # via ramalama-stack
+llama-stack-client==0.2.9
+    # via llama-stack
+lxml==5.4.0
+    # via blobfile
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+mcp==1.9.2
+    # via ramalama-stack
+mdurl==0.1.2
+    # via markdown-it-py
+milvus-lite==2.4.12 ; sys_platform != 'win32'
+    # via pymilvus
+mpmath==1.3.0
+    # via sympy
+multidict==6.4.4
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+networkx==3.5
+    # via torch
+numpy==2.2.6
+    # via
+    #   accelerate
+    #   datasets
+    #   pandas
+    #   peft
+    #   ramalama-stack
+    #   scikit-learn
+    #   scipy
+    #   transformers
+nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+openai==1.84.0
+    # via
+    #   llama-stack
+    #   ramalama-stack
+opentelemetry-api==1.33.1
+    # via
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.33.1
+    # via opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-http==1.33.1
+    # via ramalama-stack
+opentelemetry-proto==1.33.1
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.33.1
+    # via
+    #   opentelemetry-exporter-otlp-proto-http
+    #   ramalama-stack
+opentelemetry-semantic-conventions==0.54b1
+    # via opentelemetry-sdk
+packaging==25.0
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   peft
+    #   transformers
+pandas==2.2.3
+    # via
+    #   datasets
+    #   llama-stack-client
+    #   pymilvus
+peft==0.15.2
+    # via ramalama-stack
+pillow==11.2.1
+    # via
+    #   llama-stack
+    #   sentence-transformers
+polyleven==0.9.0
+    # via autoevals
+prompt-toolkit==3.0.51
+    # via
+    #   llama-stack
+    #   llama-stack-client
+propcache==0.3.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.5
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+    #   pymilvus
+psutil==7.0.0
+    # via
+    #   accelerate
+    #   peft
+    #   ramalama-stack
+pyaml==25.5.0
+    # via llama-stack-client
+pyarrow==20.0.0
+    # via datasets
+pyasn1==0.6.1
+    # via
+    #   python-jose
+    #   rsa
+pycryptodomex==3.23.0
+    # via blobfile
+pydantic==2.11.5
+    # via
+    #   fastapi
+    #   llama-stack
+    #   llama-stack-client
+    #   mcp
+    #   openai
+    #   pydantic-settings
+    #   ramalama-stack
+pydantic-core==2.33.2
+    # via pydantic
+pydantic-settings==2.9.1
+    # via mcp
+pygments==2.19.1
+    # via rich
+pymilvus==2.5.10
+    # via ramalama-stack
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.1.0
+    # via
+    #   llama-stack
+    #   pydantic-settings
+    #   pymilvus
+python-jose==3.5.0
+    # via llama-stack
+python-multipart==0.0.20
+    # via mcp
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   autoevals
+    #   datasets
+    #   huggingface-hub
+    #   peft
+    #   pyaml
+    #   transformers
+ramalama==0.9.0
+    # via ramalama-stack
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2024.11.6
+    # via
+    #   tiktoken
+    #   transformers
+requests==2.32.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   llama-stack
+    #   opentelemetry-exporter-otlp-proto-http
+    #   ramalama-stack
+    #   tiktoken
+    #   transformers
+rich==14.0.0
+    # via
+    #   llama-stack
+    #   llama-stack-client
+rpds-py==0.25.1
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9.1
+    # via python-jose
+safetensors==0.5.3
+    # via
+    #   accelerate
+    #   peft
+    #   transformers
+scikit-learn==1.7.0
+    # via sentence-transformers
+scipy==1.15.3
+    # via
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==4.1.0
+    # via ramalama-stack
+setuptools==80.9.0
+    # via
+    #   llama-stack
+    #   pymilvus
+    #   torch
+    #   triton
+six==1.17.0
+    # via
+    #   ecdsa
+    #   python-dateutil
+    #   ramalama-stack
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   llama-stack-client
+    #   openai
+sqlalchemy==2.0.41
+    # via ramalama-stack
+sse-starlette==2.3.6
+    # via mcp
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   llama-stack
+    #   mcp
+sympy==1.14.0
+    # via torch
+termcolor==3.1.0
+    # via
+    #   fire
+    #   llama-stack
+    #   llama-stack-client
+threadpoolctl==3.6.0
+    # via scikit-learn
+tiktoken==0.9.0
+    # via llama-stack
+tokenizers==0.21.1
+    # via transformers
+torch==2.7.0
+    # via
+    #   accelerate
+    #   peft
+    #   ramalama-stack
+    #   sentence-transformers
+tqdm==4.67.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   llama-stack-client
+    #   milvus-lite
+    #   openai
+    #   peft
+    #   sentence-transformers
+    #   transformers
+transformers==4.52.4
+    # via
+    #   peft
+    #   sentence-transformers
+    #   trl
+triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+trl==0.18.1
+    # via ramalama-stack
+typing-extensions==4.14.0
+    # via
+    #   aiosqlite
+    #   anyio
+    #   fastapi
+    #   huggingface-hub
+    #   llama-stack-client
+    #   openai
+    #   opentelemetry-sdk
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   sentence-transformers
+    #   sqlalchemy
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.1
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+ujson==5.10.0
+    # via pymilvus
+urllib3==2.4.0
+    # via
+    #   blobfile
+    #   ramalama-stack
+    #   requests
+uvicorn==0.34.3
+    # via
+    #   mcp
+    #   ramalama-stack
+wcwidth==0.2.13
+    # via prompt-toolkit
+wrapt==1.17.2
+    # via deprecated
+xxhash==3.5.0
+    # via datasets
+yarl==1.20.0
+    # via aiohttp
+zipp==3.22.0
+    # via importlib-metadata
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,37 @@
+from setuptools import setup
+from setuptools.command.install import install
+import os
+import shutil
+
+
+class CustomInstallCommand(install):
+    def run(self):
+        # Run the standard install
+        super().run()
+
+        # Write 'providers.d' to '~/.llama/providers.d'
+        # This allows users to see the remote providers
+        providers_dir = os.path.join(self.install_lib, "ramalama_stack", "providers.d")
+        target_dir_1 = os.path.expanduser("~/.llama/providers.d")
+        try:
+            os.makedirs(target_dir_1, exist_ok=True)
+            shutil.copytree(providers_dir, target_dir_1, dirs_exist_ok=True)
+            print(f"Copied {providers_dir} to {target_dir_1}")
+        except Exception as error:
+            print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
+            raise
+
+        # Write `ramalama-run.yaml` to '~/.llama/distributions/ramalama'
+        # This allows users to run the stack
+        run_yaml = os.path.join(self.install_lib, "ramalama_stack", "ramalama-run.yaml")
+        target_dir_2 = os.path.expanduser("~/.llama/distributions/ramalama")
+        try:
+            os.makedirs(target_dir_2, exist_ok=True)
+            shutil.copy(run_yaml, target_dir_2)
+            print(f"Copied {run_yaml} to {target_dir_2}")
+        except Exception as error:
+            print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
+            raise
+
+
+setup(cmdclass={"install": CustomInstallCommand})
--- a/src/ramalama_stack/provider.py
+++ b/src/ramalama_stack/provider.py
@ -11,8 +11,8 @@ def get_provider_spec() -> ProviderSpec:
        api=Api.inference,
        adapter=AdapterSpec(
            adapter_type="ramalama",
-            pip_packages=["ramalama>=0.7.5", "faiss-cpu"],
+            pip_packages=["ramalama>=0.8.5", "pymilvus"],
            config_class="config.RamalamaImplConfig",
-            module="ramalama_adapter",
+            module="ramalama_stack",
        ),
    )
--- a/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
+++ b/src/ramalama_stack/providers.d/remote/inference/ramalama.yaml
@ -1,6 +1,6 @@
 adapter:
  adapter_type: ramalama
-  pip_packages: ["ramalama>=0.7.5", "faiss-cpu"]
+  pip_packages: ["ramalama>=0.8.5", "pymilvus"]
  config_class: ramalama_stack.config.RamalamaImplConfig
  module: ramalama_stack
 api_dependencies: []
--- a/src/ramalama_stack/ramalama-run.yaml
+++ b/src/ramalama_stack/ramalama-run.yaml
@ -5,6 +5,7 @@ apis:
 - datasetio
 - eval
 - inference
+- post_training
 - safety
 - scoring
 - telemetry
@ -20,13 +21,10 @@ providers:
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
+  - provider_id: milvus
+    provider_type: inline::milvus
    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:distributions/ramalama}/faiss_store.db
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/milvus_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -40,13 +38,16 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:llamastack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -81,6 +82,13 @@ providers:
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -92,20 +100,32 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ramalama
  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []
@ -116,8 +136,8 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
 server:
  port: 8321
-external_providers_dir: ./providers.d
+external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
--- a/src/ramalama_stack/ramalama_adapter.py
+++ b/src/ramalama_stack/ramalama_adapter.py
@ -191,7 +191,6 @@ class RamalamaInferenceAdapter(Inference, ModelsProtocolPrivate):
        )

    async def register_model(self, model: Model) -> Model:
-        model = await self.register_helper.register_model(model)
        res = await self.client.models.list()
        available_models = [m.id async for m in res]
        # Ramalama handles paths on MacOS and Linux differently
--- a/tests/test-build.sh
+++ b/tests/test-build.sh
@ -0,0 +1,37 @@
+#!/bin/bash
+
+function test_file_writes {
+  PROVIDER_DIR="$HOME/.llama/providers.d"
+  RUN_YAML="$HOME/.llama/distributions/ramalama/ramalama-run.yaml"
+
+  # check for PROVIDER_DIR
+  if [ -d "$PROVIDER_DIR" ]; then
+    echo "$PROVIDER_DIR found"
+  else
+    echo "$PROVIDER_DIR not found"
+    echo "===> test_file_writes: fail"
+    exit 1
+  fi
+
+  # check for RUN_YAML
+  if [ -f "$RUN_YAML" ]; then
+    echo "$RUN_YAML found"
+  else
+    echo "$RUN_YAML not found"
+    echo "===> test_file_writes: fail"
+    exit 1
+  fi
+
+  # return if all checks are successfully
+  echo "===> test_file_writes: pass"
+  return
+}
+
+main() {
+  echo "===> starting 'test-build'..."
+  test_file_writes
+  echo "===> 'test-build' completed successfully!"
+}
+
+main "$@"
+exit 0
--- a/tests/test-container.sh
+++ b/tests/test-container.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+main() {
+  echo "===> starting 'test-container'..."
+  start_and_wait_for_ramalama_server
+  test_ramalama_models
+  test_ramalama_chat_completion
+  start_and_wait_for_llama_stack_container
+  test_llama_stack_models
+  test_llama_stack_openai_models
+  test_llama_stack_chat_completion
+  test_llama_stack_openai_chat_completion
+  echo "===> 'test-container' completed successfully!"
+}
+
+TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+# shellcheck disable=SC1091
+source "$TEST_UTILS/utils.sh"
+main "$@"
+exit 0
--- a/tests/test-external-providers.sh
+++ b/tests/test-external-providers.sh
@ -1,96 +1,22 @@
 #!/bin/bash

-function start_and_wait_for_ramalama_server {
-  # Start ramalama serve in background with logging to 'ramalama.log'
-  nohup uv run ramalama serve "$INFERENCE_MODEL" > ramalama.log 2>&1 &
-  RAMALAMA_PID=$!
-  echo "Started RamaLama with PID: $RAMALAMA_PID"
-
-  # Wait for ramalama to be ready by doing a health check
-  echo "Waiting for RamaLama server..."
-  for i in {1..60}; do
-    echo "Attempt $i to connect to RamaLama..."
-    resp=$(curl -s http://localhost:8080/health)
-    if [ "$resp" == '{"status":"ok"}' ]; then
-      echo "RamaLama server is up and responding!"
-      break
-    fi
-    if [ "$i" -eq 60 ]; then
-      echo "RamaLama server failed to start or respond"
-      echo "RamaLama logs:"
-      cat ramalama.log
-      exit 1
-    fi
-    sleep 1
-  done
-}
-
-function start_and_wait_for_llama_stack_server {
-  # Start llama stack run with logging to 'lls.log'
-  LLAMA_STACK_LOG_FILE=lls.log nohup uv run llama stack run run.yaml --image-type venv &
-  LLS_PID=$!
-  echo "Started Llama Stack with PID: $LLS_PID"
-
-  # Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
-  echo "Waiting for Llama Stack server..."
-  for i in {1..60}; do
-    echo "Attempt $i to connect to Llama Stack..."
-    resp=$(curl -s http://localhost:8321/v1/health)
-    if [ "$resp" == '{"status":"OK"}' ]; then
-      echo "Llama Stack server is up!"
-      if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" lls.log; then
-        echo "Llama Stack server is using RamaLama provider"
-        return
-      else
-        echo "Llama Stack server is not using RamaLama provider"
-        echo "Server logs:"
-        cat lls.log
-        exit 1
-      fi
-    fi
-    sleep 1
-  done
-  echo "Llama Stack server failed to start"
-  echo "Server logs:"
-  cat lls.log
-  exit 1
-}
-
-function test_ramalama_chat_completion {
-  echo "===> test_ramalama_chat_completion: start"
-  # shellcheck disable=SC2016
-  resp=$(curl -s -X POST http://localhost:8080/v1/chat/completions \
-    -H "Content-Type: application/json" \
-    -d '{"messages": [{"role": "user", "content": "Hello"}], "model": "$INFERENCE_MODEL"}')
-  if echo "$resp" | grep -q "choices"; then
-    echo "===> test_ramalama_chat_completion: pass"
-    return
-  else
-    echo "===> test_ramalama_chat_completion: fail"
-    exit 1
-  fi
-}
-
-function test_llama_stack_chat_completion {
-  echo "===> test_llama_stack_chat_completion: start"
-  nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
-  if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
-    echo "===> test_llama_stack_chat_completion: pass"
-    return
-  else
-    echo "===> test_llama_stack_chat_completion: fail"
-    exit 1
-  fi
-}
-
 main() {
  echo "===> starting 'test-external-providers'..."
  start_and_wait_for_ramalama_server
+  test_ramalama_models
  test_ramalama_chat_completion
  start_and_wait_for_llama_stack_server
+  test_llama_stack_models
+  test_llama_stack_openai_models
  test_llama_stack_chat_completion
+  test_llama_stack_openai_chat_completion
  echo "===> 'test-external-providers' completed successfully!"
 }

+TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+# shellcheck disable=SC2153,SC2034
+INFERENCE_MODEL_NO_COLON=$(echo "$INFERENCE_MODEL" | tr ':' '_')
+# shellcheck disable=SC1091
+source "$TEST_UTILS/utils.sh"
 main "$@"
 exit 0
--- a/tests/test-rag.py
+++ b/tests/test-rag.py
@ -0,0 +1,166 @@
+import os
+import uuid
+from llama_stack_client import LlamaStackClient, RAGDocument
+
+
+def setup_client():
+    """Initialize Llama Stack client with configuration"""
+    base_url = "http://localhost:8321"
+
+    client = LlamaStackClient(base_url=base_url, api_key="none", timeout=10.0)
+
+    print(f"Connected to Llama Stack server at {base_url}")
+    return client
+
+
+def setup_inference_params():
+    """Configure inference parameters"""
+    model_id = os.getenv(
+        "INFERENCE_MODEL",
+        "bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf",
+    )
+
+    temperature = float(os.getenv("TEMPERATURE", 0.0))
+    if temperature > 0.0:
+        top_p = float(os.getenv("TOP_P", 0.95))
+        strategy = {"type": "top_p", "temperature": temperature, "top_p": top_p}
+    else:
+        strategy = {"type": "greedy"}
+
+    max_tokens = int(os.getenv("MAX_TOKENS", 4096))
+
+    sampling_params = {
+        "strategy": strategy,
+        "max_tokens": max_tokens,
+    }
+
+    stream_env = os.getenv("STREAM", "False")
+    stream = stream_env == "True"
+
+    print("Inference Parameters:")
+    print(f"\tModel: {model_id}")
+    print(f"\tSampling Parameters: {sampling_params}")
+    print(f"\tStream: {stream}")
+
+    return model_id, sampling_params, stream
+
+
+def setup_vector_db(client):
+    """Setup vector database for RAG"""
+    vector_db_id = f"test_vector_db_{uuid.uuid4().hex[:8]}"
+
+    # Find embedding model from available models
+    models = client.models.list()
+    embedding_model = None
+    for model in models:
+        if hasattr(model, "model_type") and model.model_type == "embedding":
+            embedding_model = model.identifier
+            break
+
+    if not embedding_model:
+        raise Exception("No embedding model found")
+
+    print(f"Using embedding model: {embedding_model}")
+
+    # Register vector database
+    client.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model,
+        embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
+        provider_id=os.getenv("VDB_PROVIDER", "milvus"),
+    )
+
+    # Ingest simple test documents instead of external URLs
+    test_content = [
+        "RamaLama Stack is an external provider for Llama Stack that allows for the use of RamaLama for inference.",
+        "Podman is a container management tool that provides a Docker-compatible command line interface without requiring a daemon.",
+        "Podman can run containers rootlessly and provides robust security isolation.",
+    ]
+
+    documents = [
+        RAGDocument(
+            document_id=f"test_doc_{i}",
+            content=content,
+            mime_type="text/plain",
+            metadata={"source": f"test_document_{i}"},
+        )
+        for i, content in enumerate(test_content)
+    ]
+
+    print(f"Ingesting {len(documents)} test documents into vector database...")
+    client.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=vector_db_id,
+        chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 128)),
+    )
+
+    print(f"Vector database '{vector_db_id}' setup complete")
+    return vector_db_id
+
+
+def run_rag_query(client, model_id, sampling_params, stream, vector_db_id, query):
+    """Execute RAG query and return response"""
+    print(f"\nUser> {query}")
+
+    rag_response = client.tool_runtime.rag_tool.query(
+        content=query, vector_db_ids=[vector_db_id]
+    )
+
+    messages = [{"role": "system", "content": "You are a helpful assistant."}]
+
+    prompt_context = rag_response.content
+    extended_prompt = f"Please answer the given query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{query}"
+    messages.append({"role": "user", "content": extended_prompt})
+
+    response = client.inference.chat_completion(
+        messages=messages,
+        model_id=model_id,
+        sampling_params=sampling_params,
+        stream=stream,
+    )
+
+    print("inference> ", end="")
+    if stream:
+        for chunk in response:
+            if hasattr(chunk, "event") and hasattr(chunk.event, "delta"):
+                if hasattr(chunk.event.delta, "text"):
+                    print(chunk.event.delta.text, end="")
+        print()
+    else:
+        print(response.completion_message.content)
+
+
+def main():
+    """Main function to run RAG test"""
+    print("=== Llama Stack RAG Test ===")
+
+    try:
+        client = setup_client()
+        model_id, sampling_params, stream = setup_inference_params()
+
+        vector_db_id = setup_vector_db(client)
+
+        queries = [
+            "What is RamaLama Stack?",
+            "What is Podman?",
+            "Can Podman run in rootless mode?",
+        ]
+
+        print("\n=== Running RAG Queries ===")
+        for query in queries:
+            run_rag_query(
+                client, model_id, sampling_params, stream, vector_db_id, query
+            )
+            print()
+
+        print("=== RAG Test Complete ===")
+
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
--- a/tests/test-rag.sh
+++ b/tests/test-rag.sh
@ -0,0 +1,40 @@
+#!/bin/bash
+
+function test_rag_functionality {
+  echo "===> test_rag_functionality: start"
+
+  if uv run python tests/test-rag.py; then
+    echo "===> test_rag_functionality: pass"
+    return 0
+  else
+    echo "===> test_rag_functionality: fail"
+    echo "RAG test script output above shows the failure details"
+    return 1
+  fi
+}
+
+main() {
+  echo "===> starting 'test-rag'..."
+
+  # Check if services are already running (from previous tests)
+  if curl -s http://localhost:8321/v1/health >/dev/null 2>&1 && curl -s http://localhost:8080/health >/dev/null 2>&1; then
+    echo "Using existing RamaLama and Llama Stack servers"
+  else
+    echo "Starting fresh servers for RAG test"
+    start_and_wait_for_ramalama_server
+    start_and_wait_for_llama_stack_server
+  fi
+
+  if test_rag_functionality; then
+    echo "===> 'test-rag' completed successfully!"
+  else
+    echo "===> 'test-rag' failed!"
+    exit 1
+  fi
+}
+
+TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+# shellcheck disable=SC1091
+source "$TEST_UTILS/utils.sh"
+main "$@"
+exit 0
--- a/tests/test-ui-linux.sh
+++ b/tests/test-ui-linux.sh
@ -0,0 +1,77 @@
+#!/bin/bash
+
+function start_and_wait_for_streamlit_ui_linux {
+  echo "Starting Streamlit UI for Linux..."
+
+  podman run -d --rm --network=host --name=streamlit-ui quay.io/redhat-et/streamlit_client:0.1.0
+
+  echo "Waiting for Streamlit UI to be ready..."
+  for i in {1..30}; do
+    echo "Attempt $i to connect to Streamlit UI..."
+    if curl -s http://localhost:8501 >/dev/null 2>&1; then
+      echo "Streamlit UI is up and responding on port 8501!"
+      return 0
+    fi
+    if [ "$i" -eq 30 ]; then
+      echo "Streamlit UI failed to start or respond"
+      echo "Container logs:"
+      podman logs streamlit-ui
+      return 1
+    fi
+    sleep 2
+  done
+}
+
+function test_streamlit_ui_linux {
+  echo "===> test_streamlit_ui_linux: start"
+
+  if start_and_wait_for_streamlit_ui_linux; then
+    # Test that the UI is accessible and returns HTML content
+    resp=$(curl -sS http://localhost:8501)
+    if echo "$resp" | grep -q -i "streamlit\|html"; then
+      echo "===> test_streamlit_ui_linux: pass"
+      return 0
+    else
+      echo "===> test_streamlit_ui_linux: fail - UI not serving expected content"
+      echo "Response: $resp"
+      return 1
+    fi
+  else
+    echo "===> test_streamlit_ui_linux: fail - UI failed to start"
+    return 1
+  fi
+}
+
+function cleanup_streamlit_ui {
+  echo "Cleaning up Streamlit UI container..."
+  podman rm -f streamlit-ui >/dev/null 2>&1 || true
+}
+
+main() {
+  echo "===> starting 'test-ui-linux'..."
+
+  # Only run on Linux
+  # Need a fix to published ports in ramalama to run on MacOS
+  if [[ "$OSTYPE" != "linux-gnu"* ]]; then
+    echo "This test is only for Linux systems. Current OS: $OSTYPE"
+    echo "===> 'test-ui-linux' skipped!"
+    exit 0
+  fi
+
+  trap cleanup_streamlit_ui EXIT
+
+  start_and_wait_for_ramalama_server
+  start_and_wait_for_llama_stack_server
+
+  test_streamlit_ui_linux
+
+  cleanup_streamlit_ui
+
+  echo "===> 'test-ui-linux' completed successfully!"
+}
+
+TEST_UTILS=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+# shellcheck disable=SC1091
+source "$TEST_UTILS/utils.sh"
+main "$@"
+exit 0
--- a/tests/utils.sh
+++ b/tests/utils.sh
@ -0,0 +1,186 @@
+#!/bin/bash
+
+function start_and_wait_for_ramalama_server {
+  # Start ramalama serve in background with logging to 'ramalama-$INFERENCE_MODEL_NO_COLON.log'
+  nohup uv run ramalama serve "$INFERENCE_MODEL" > "ramalama-$INFERENCE_MODEL_NO_COLON.log" 2>&1 &
+  RAMALAMA_PID=$!
+  echo "Started RamaLama with PID: $RAMALAMA_PID"
+
+  # Wait for ramalama to be ready by doing a health check
+  echo "Waiting for RamaLama server..."
+  for i in {1..60}; do
+    echo "Attempt $i to connect to RamaLama..."
+    resp=$(curl -s http://localhost:8080/health)
+    if [ "$resp" == '{"status":"ok"}' ]; then
+      echo "RamaLama server is up and responding!"
+      break
+    fi
+    if [ "$i" -eq 60 ]; then
+      echo "RamaLama server failed to start or respond"
+      echo "RamaLama logs:"
+      cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
+      exit 1
+    fi
+    sleep 1
+  done
+}
+
+function start_and_wait_for_llama_stack_server {
+  # Start llama stack run with logging to 'lls-$INFERENCE_MODEL_NO_COLON.log'
+  LLAMA_STACK_LOG_FILE="lls-$INFERENCE_MODEL_NO_COLON.log" nohup uv run llama stack run ~/.llama/distributions/ramalama/ramalama-run.yaml --image-type venv &
+  LLS_PID=$!
+  echo "Started Llama Stack server with PID: $LLS_PID"
+
+  # Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
+  echo "Waiting for Llama Stack server..."
+  for i in {1..60}; do
+    echo "Attempt $i to connect to Llama Stack..."
+    resp=$(curl -s http://localhost:8321/v1/health)
+    if [ "$resp" == '{"status":"OK"}' ]; then
+      echo "Llama Stack server is up!"
+      if grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml" "lls-$INFERENCE_MODEL_NO_COLON.log"; then
+        echo "Llama Stack server is using RamaLama provider"
+        return
+      else
+        echo "Llama Stack server is not using RamaLama provider"
+        echo "Server logs:"
+        cat "lls-$INFERENCE_MODEL_NO_COLON.log"
+        exit 1
+      fi
+    fi
+    sleep 1
+  done
+  echo "Llama Stack server failed to start"
+  echo "Server logs:"
+  cat "lls-$INFERENCE_MODEL_NO_COLON.log"
+  exit 1
+}
+
+function start_and_wait_for_llama_stack_container {
+  # Start llama stack run
+  podman run \
+    -d \
+    --net=host \
+    --env INFERENCE_MODEL="$INFERENCE_MODEL" \
+    --env RAMALAMA_URL=http://0.0.0.0:8080 \
+    --name llama-stack \
+    quay.io/ramalama/llama-stack:latest
+  LLS_PID=$!
+  echo "Started Llama Stack container with PID: $LLS_PID"
+
+  # Wait for llama stack to be ready by doing a health check, then test for the ramalama provider
+  echo "Waiting for Llama Stack server..."
+  for i in {1..60}; do
+    echo "Attempt $i to connect to Llama Stack..."
+    resp=$(curl -s http://localhost:8321/v1/health)
+    if [ "$resp" == '{"status":"OK"}' ]; then
+      echo "Llama Stack server is up!"
+      if podman logs llama-stack | grep -q -e "remote::ramalama from .*providers.d/remote/inference/ramalama.yaml"; then
+        echo "Llama Stack server is using RamaLama provider"
+        return
+      else
+        echo "Llama Stack server is not using RamaLama provider"
+        echo "Container logs:"
+        podman logs llama-stack
+        exit 1
+      fi
+    fi
+    sleep 1
+  done
+  echo "Llama Stack server failed to start"
+  echo "Container logs:"
+  podman logs llama-stack
+  exit 1
+}
+
+function test_ramalama_models {
+  echo "===> test_ramalama_models: start"
+  # shellcheck disable=SC2016
+  resp=$(curl -sS http://localhost:8080/v1/models)
+  if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
+    echo "===> test_ramalama_models: pass"
+    return
+  else
+    echo "===> test_ramalama_models: fail"
+    echo "RamaLama logs:"
+    cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
+    exit 1
+  fi
+}
+
+function test_ramalama_chat_completion {
+  echo "===> test_ramalama_chat_completion: start"
+  # shellcheck disable=SC2016
+  resp=$(curl -sS -X POST http://localhost:8080/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
+  if echo "$resp" | grep -q "choices"; then
+    echo "===> test_ramalama_chat_completion: pass"
+    return
+  else
+    echo "===> test_ramalama_chat_completion: fail"
+    echo "RamaLama logs:"
+    cat "ramalama-$INFERENCE_MODEL_NO_COLON.log"
+    exit 1
+  fi
+}
+
+function test_llama_stack_models {
+  echo "===> test_llama_stack_models: start"
+  nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
+  if nohup uv run llama-stack-client models list | grep -q "$INFERENCE_MODEL"; then
+    echo "===> test_llama_stack_models: pass"
+    return
+  else
+    echo "===> test_llama_stack_models: fail"
+    echo "Server logs:"
+    cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
+    exit 1
+  fi
+}
+
+function test_llama_stack_openai_models {
+  echo "===> test_llama_stack_openai_models: start"
+  # shellcheck disable=SC2016
+  resp=$(curl -sS http://localhost:8321/v1/openai/v1/models)
+  if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
+    echo "===> test_llama_stack_openai_models: pass"
+    return
+  else
+    echo "===> test_llama_stack_openai_models: fail"
+    echo "Server logs:"
+    cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
+    exit 1
+  fi
+}
+
+function test_llama_stack_chat_completion {
+  echo "===> test_llama_stack_chat_completion: start"
+  nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
+  if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
+    echo "===> test_llama_stack_chat_completion: pass"
+    return
+  else
+    echo "===> test_llama_stack_chat_completion: fail"
+    echo "Server logs:"
+    cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
+    exit 1
+  fi
+}
+
+function test_llama_stack_openai_chat_completion {
+  echo "===> test_llama_stack_openai_chat_completion: start"
+  # shellcheck disable=SC2016
+  resp=$(curl -sS -X POST http://localhost:8321/v1/openai/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d "{\"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"model\": \"$INFERENCE_MODEL\"}")
+  if echo "$resp" | grep -q "choices"; then
+    echo "===> test_llama_stack_openai_chat_completion: pass"
+    return
+  else
+    echo "===> test_llama_stack_openai_chat_completion: fail"
+    echo "Server logs:"
+    cat "lls-$INFERENCE_MODEL_NO_COLON.log" || podman logs llama-stack
+    exit 1
+  fi
+}
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
Nathan Weinberg	d09a8cf3ef	chore: update README for 0.2.1 release (#91 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-06-16 09:53:14 -04:00
dependabot[bot]	a347f05b4c	chore(deps): Bump hynek/build-and-inspect-python-package (#90 ) Bumps [hynek/build-and-inspect-python-package](https://github.com/hynek/build-and-inspect-python-package) from 2.12.0 to 2.13.0. - [Release notes](https://github.com/hynek/build-and-inspect-python-package/releases) - [Changelog](https://github.com/hynek/build-and-inspect-python-package/blob/main/CHANGELOG.md) - [Commits](`b5076c307d...c52c3a4710`) --- updated-dependencies: - dependency-name: hynek/build-and-inspect-python-package dependency-version: 2.13.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-06-16 09:40:14 -04:00
Sally O'Malley	aab4fc9f7d	Update run config (#85 ) * add config for vectordb/RAG Signed-off-by: sallyom <somalley@redhat.com> * add Llama Stack UI to README.md Signed-off-by: sallyom <somalley@redhat.com> * add test for RAG and llamastack-ui Signed-off-by: sallyom <somalley@redhat.com> --------- Signed-off-by: sallyom <somalley@redhat.com>	2025-06-16 09:36:15 -04:00
dependabot[bot]	88e1e574c3	chore(deps): Bump step-security/harden-runner from 2.12.0 to 2.12.1 (#84 ) Bumps [step-security/harden-runner](https://github.com/step-security/harden-runner) from 2.12.0 to 2.12.1. - [Release notes](https://github.com/step-security/harden-runner/releases) - [Commits](`0634a2670c...002fdce3c6`) --- updated-dependencies: - dependency-name: step-security/harden-runner dependency-version: 2.12.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-06-12 11:53:37 -04:00
Nathan Weinberg	e067155c4c	deps: freeze 'ramalama' and 'llama-stack' deps (#83 ) * deps: freeze 'ramalama' and 'llama-stack' deps Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * docs: update README for 0.2.0 release Signed-off-by: Nathan Weinberg <nweinber@redhat.com> --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-06-05 14:51:17 -04:00
Nathan Weinberg	d701060d6e	Revamp dependency system (#81 ) * ci: update pre-commit config and CI actions Updated some pre-commit revs as well as the config for uv-export, which now writes to 'requirements.txt' based on 'pyproject.toml' - motivation is this is much more aligned with 'uv' conventions and mimics what upstream llama-stack is doing Also updated the pre-commit CI action to what llama-stack is currently doing, as this was originally based on that same action and we likely want to keep them aligned Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * refactor: modify how dependency system works this commit centralizes the dependency and python version information in 'pyproject.toml' allowing us to remove '.python-version' and 'requirements-dev.txt' 'requirements.txt' is now a uv-managed file and 'uv.lock' is updated to align with the new system Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * docs: update CONTRIBUTING.md with details about using 'uv' Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * ci: run 'test-external-providers' when dependency-related files are modified Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'blobfile' dependency this was previously included in Llama Stack, but was removed in 809e7650a7338a6c5b11e58965b20a983b99055a Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'mcp' dependency this is needed for the 'meta-reference' provider for the 'agents' API Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * fix: update 'ramalama-run.yaml' to align with newer LLS interface Llama Stack has made some changes to how the run.yaml files need to be defined Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'sqlalchemy' dependency needed for revamped 'ramalama-run.yaml' file Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'psutil' dependency this is needed for the huggingface provider used for the post-training API Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'torch' dependency this is needed for the huggingface provider used for the post-training API Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'peft' dependency this is needed for the huggingface provider used for the post-training API Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * deps: add 'trl' dependency this is needed for the huggingface provider used for the post-training API Signed-off-by: Nathan Weinberg <nweinber@redhat.com> --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-06-05 13:18:15 -04:00
Brian M	6d59b1b408	replaced faiss with milvus for RAG testing (#76 ) Signed-off-by: Brian <bmahabir@bu.edu>	2025-06-03 09:57:43 -04:00
Nathan Weinberg	c1044db093	tests: add testing for model listing (#75 ) * tests: add testing for model listing Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * ci: replace 'llama3.2:3b-instruct-fp16' with 'llama3.2:3b' Signed-off-by: Nathan Weinberg <nweinber@redhat.com> --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-28 15:37:11 -04:00
Nathan Weinberg	29de6525cc	chore: update README curl tags to 0.1.5 also remove outdated config from test-external-providers Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-28 14:10:44 -04:00
Nathan Weinberg	2a3fad344e	feat: allow use of non-Llama models (#74 ) * feat: allow use of non-llama models Co-authored-by: Charlie Doern <cdoern@redhat.com> Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * ci: add granite testing to 'test-external-providers' job Signed-off-by: Nathan Weinberg <nweinber@redhat.com> --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com> Co-authored-by: Charlie Doern <cdoern@redhat.com>	2025-05-28 14:08:05 -04:00
Nathan Weinberg	187ea3db43	deps: bump min python version to 3.11 (#72 ) bump ramalama to 0.8.5 and lls to 0.2.7 update README curl and podman commands Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-28 11:54:06 -04:00
dependabot[bot]	1f460aa32a	chore(deps): Bump astral-sh/setup-uv from 6.0.1 to 6.1.0 (#71 ) Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.0.1 to 6.1.0. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](`6b9c6063ab...f0ec1fc3b3`) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-version: 6.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-27 10:40:19 -04:00
Nathan Weinberg	49957a64f1	deps: bump ramalama to 0.8.3 and lls to 0.2.6 (#68 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-12 22:55:49 -04:00
Nathan Weinberg	7a82c46c96	ci(fix): different approach to lls.log for container (#65 ) exec'ing into the container won't work if it exits tried writing to a mounted lls.log file but lls couldn't handle it using podman logs in the meantime while I make a patch for logging upstream Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-07 13:18:50 -04:00
Nathan Weinberg	58aa4d00ce	docs: add CONTRIBUTING.md (#64 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-07 13:17:47 -04:00
Nathan Weinberg	184287e210	deps: bump ramalama to 0.8.2 and lls to 0.2.5 (#60 ) * deps: bump ramalama to 0.8.2 and lls to 0.2.5 Signed-off-by: Nathan Weinberg <nweinber@redhat.com> * fix: remove troublesome sections from ramalama-run.yaml Signed-off-by: Nathan Weinberg <nweinber@redhat.com> --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-07 10:05:43 -04:00
Nathan Weinberg	2847af917b	ci(hotfix): fix podman command Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 16:47:05 -04:00
Nathan Weinberg	575d8b52c7	ci(hotfix): fix log reading Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 16:42:39 -04:00
Nathan Weinberg	fb3bad4ce7	ci(hotfix): checkout repo for test script access Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 16:20:39 -04:00
Nathan Weinberg	6045d3928e	ci(hotfix): missing 'sudo' for apt-get command Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 16:16:58 -04:00
Nathan Weinberg	380b5eb56c	ci: add job to test ramalama llama-stack container (#63 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 16:13:42 -04:00
Nathan Weinberg	3876094820	docs: add directions for running ramalama-stack container (#62 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-06 14:16:49 -04:00
Nathan Weinberg	7984600608	docs: populate README.md and create docs/ci.md (#58 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-05 23:50:22 -04:00
Charlie Doern	bc4a340609	fix: adjust module (#59 ) in get_provider_spec, if we want this to actually work we need the module to be ramalama_stack this method isn't currently used which is why we never caught this but it should be Signed-off-by: Charlie Doern <cdoern@redhat.com>	2025-05-05 11:43:10 -04:00
Nathan Weinberg	0af9f38bb2	ci: standardize hack for file writing (#57 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 23:03:43 -04:00
Nathan Weinberg	0790a15c64	ci: standardize actions around common schema (#55 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 22:44:45 -04:00
Nathan Weinberg	7638e36e2a	chore: bump llama-stack floor to 0.2.4 (#54 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 14:20:40 -04:00
Nathan Weinberg	72e498d024	ci(fix): typo in lls integration job installation step (#52 ) also standardize python-based jobs to use 3.10 Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 11:55:17 -04:00
Nathan Weinberg	50076ebcf4	ci: add new nightly integration job (#51 ) we want to test the currently-released ramalama-stack against the llama-stack main branch on a nightly basis this will allow us to do any course-correcting needed to keep the provider working with llama-stack prior to them publishing a release Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 11:29:58 -04:00
Nathan Weinberg	a426a3a184	build: add custom file writing to pypi installation (#37 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-05-01 10:23:29 -04:00
dependabot[bot]	a206e2ed77	chore(deps): Bump astral-sh/setup-uv from 6.0.0 to 6.0.1 (#50 ) Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.0.0 to 6.0.1. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](`c7f87aa956...6b9c6063ab`) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-version: 6.0.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-30 08:29:02 -04:00
Nathan Weinberg	ca5c3a6938	tests: add llama stack openai chat completion test (#49 ) * tests: add llama stack openai chat completion test Signed-off-by: Nathan Weinberg <nathan2@stwmd.net> * try a different method Signed-off-by: Nathan Weinberg <nathan2@stwmd.net> --------- Signed-off-by: Nathan Weinberg <nathan2@stwmd.net>	2025-04-30 00:25:36 -04:00
Nathan Weinberg	55c018a56b	chore: bump ramalama version to 0.8.1 (#46 ) also bump several pre-commit dep versions Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-29 15:55:35 -04:00
Nathan Weinberg	849dfc203b	deps: reduce dependency duplication (#44 ) use requirements.txt for dependencies outside of uv.lock update pyproject.toml to use requirements.txt Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-29 14:52:55 -04:00
Nathan Weinberg	ea26cd8a5d	chore: bump ramalama floor to 0.8.0 (#40 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-29 13:52:35 -04:00
Nathan Weinberg	6922879a08	ci: allow 'test-external-providers' to run on demand (#41 ) also allow the usage of other models 'llama3.2:3b-instruct-fp16' will continue to be the default Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-29 12:06:33 -04:00
Nathan Weinberg	2d01fdc5af	ci: add pre commit hook for linting GitHub Actions (#42 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-29 12:01:18 -04:00
Nathan Weinberg	0412605d6e	fix: minor bug with README badges (#39 ) Signed-off-by: Nathan Weinberg <nweinber@redhat.com>	2025-04-28 15:48:00 -04:00