From f446b3075a657be72a4f533a7c7a3d0da3147923 Mon Sep 17 00:00:00 2001 From: Taylor Dolezal Date: Wed, 6 Mar 2024 17:24:18 -0800 Subject: [PATCH] Add instructions and scripts Signed-off-by: Taylor Dolezal --- README.md | 64 +++++++++++++++++++++++++++++++++++++- kubernetes/deployment.yaml | 1 + shutdown.sh | 25 +++++++++++++++ startup.sh | 19 +++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100755 shutdown.sh create mode 100755 startup.sh diff --git a/README.md b/README.md index fd1e5e6..722e1a1 100644 --- a/README.md +++ b/README.md @@ -1 +1,63 @@ -# LLMs In Action +# LLMs in Action: A Cloud Native Story + +## Prerequisites + +- [Docker](https://docs.docker.com/install/) + - Docker is a platform for developers and sysadmins to develop, ship, and run applications. Docker enables you to separate your applications from your infrastructure so you can deliver software quickly. +- [Ollama](https://ollama.com/) + - Ollama is a Language Model as a Service (LMaaS) that provides a RESTful API for interacting with large language models. It's a great way to get started with LLMs without having to worry about the infrastructure. +- [kind](https://kind.sigs.k8s.io/) + - kind is "Kubernetes in Docker," used by the Kubernetes project to help test features and run integration tests. It turns out it's a handy way for anyone to spin up a cluster quickly. Big thank you to @bentheelder for developing it 👏🏼 👏🏼 +- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) + - kubectl is how you interact with k8s using the command line. This allows you to become a k8s whisperer :-) + +With Ollama installed on your machine, you will need to pull the LLaVa model by running + +```sh +ollama pull llava +``` + +You can verify that the model is installed by running + +```sh +ollama list + +NAME ID SIZE MODIFIED +llava:latest 8dd30f6b0cb1 4.7 GB 17 seconds ago +``` + +## Startup + +We have crafted a few scripts to make this demo run as quickly as possible on your machine once you've installed the prerequisites. + +This script will: + +- Create a kind cluster +- Apply the Kubernetes manifests we need for our demo +- Use port-forwarding to help us access our service in the browser so we can take photos and describe them with LLaVa (Large Language and Vision Assistant) + +```sh +./startup.sh +``` + +To access the service, open your browser and navigate to [http://localhost:8501](http://localhost:8501) + +## Shutdown + +To shut down the demo, run the following command, which will: + +- Remove the Kubernetes manifests +- Remove the port-forwarding +- Delete the kind cluster + +```sh +./shutdown.sh +``` + +## Operating System Information + +This demo has been tested on the following operating systems and will work if you have the prerequisites installed. + +- macOS +- Linux +- Windows diff --git a/kubernetes/deployment.yaml b/kubernetes/deployment.yaml index 65351b8..ac749d3 100644 --- a/kubernetes/deployment.yaml +++ b/kubernetes/deployment.yaml @@ -17,6 +17,7 @@ spec: containers: - name: keynote image: onlydole/llm-in-action + imagePullPolicy: IfNotPresent ports: - containerPort: 8501 env: diff --git a/shutdown.sh b/shutdown.sh new file mode 100755 index 0000000..a67e00e --- /dev/null +++ b/shutdown.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +echo "Deleting the Kubernetes manifest for this demo..." +kubectl delete -f kubernetes/ + +# Use pgrep to find the PIDs of the processes with 'kubectl port-forward' command +pids=$(pgrep -f "kubectl port-forward") + +if [ -z "$pids" ]; then + echo "No kubectl port-forward processes found." +else + # Display the PIDs and kill them + for pid in $pids; do + echo "Terminating process with PID: $pid" + kill $pid + if [ $? -eq 0 ]; then + echo "Successfully terminated process with PID: $pid" + else + echo "Failed to terminate process with PID: $pid. You may need to run the script as root or use 'sudo'." + fi + done +fi + +echo "Delete the kind cluster to clean up our machine..." +kind delete cluster -n llm \ No newline at end of file diff --git a/startup.sh b/startup.sh new file mode 100755 index 0000000..e31562e --- /dev/null +++ b/startup.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +echo "Create the kind cluster..." +kind create cluster --config cluster.yaml + +echo "Applying the Kubernets manifests..." +kubectl apply -f kubernetes/deployment.yaml +kubectl apply -f kubernetes/service.yaml + +# Wait for the pod starting with "keynote" to be running +while [[ $(kubectl get pods --no-headers | grep 'keynote' | awk '{print $3}') != "Running" ]]; do + echo "Keynote demo is starting, make some noise..." + sleep 10 +done + +echo "Forwarding the Keynote pod..." +kubectl port-forward svc/keynote 8501:8501 & + +echo "Keynote demo is running, it's time to present! Open your browser to http://localhost:8501" \ No newline at end of file