From 0598f0762a905fd87d19891e3f084cd1a43559b8 Mon Sep 17 00:00:00 2001 From: Jayesh Mahajan Date: Tue, 3 Jun 2025 20:48:38 -0400 Subject: [PATCH] AI Example model serving tensorflow (#563) * Create AI Example model serving tensorflow * ai/model-serving-tensorflow service.yaml * ai/model-serving-tensorflow ingress.yaml * ai/model-serving-tensorflow pv.yaml * ai/model-serving-tensorflow pvc.yaml * Create Readme.md * Rename Readme.md to README.md * Update with structure format for README.md * Correct link for serving in ai/model-serving-tensorflow/README.md Co-authored-by: Janet Kuo * Fix kubectl README.md * Update README.md * Update as per comments README.md * Update tensorflow/serving:2.19.0 deployment.yaml * remove hostname ai/model-serving-tensorflow/ingress.yaml --------- Co-authored-by: Janet Kuo --- ai/model-serving-tensorflow/README.md | 132 ++++++++++++++++++++ ai/model-serving-tensorflow/deployment.yaml | 34 +++++ ai/model-serving-tensorflow/ingress.yaml | 17 +++ ai/model-serving-tensorflow/pv.yaml | 12 ++ ai/model-serving-tensorflow/pvc.yaml | 11 ++ ai/model-serving-tensorflow/service.yaml | 15 +++ 6 files changed, 221 insertions(+) create mode 100644 ai/model-serving-tensorflow/README.md create mode 100644 ai/model-serving-tensorflow/deployment.yaml create mode 100644 ai/model-serving-tensorflow/ingress.yaml create mode 100644 ai/model-serving-tensorflow/pv.yaml create mode 100644 ai/model-serving-tensorflow/pvc.yaml create mode 100644 ai/model-serving-tensorflow/service.yaml diff --git a/ai/model-serving-tensorflow/README.md b/ai/model-serving-tensorflow/README.md new file mode 100644 index 00000000..3cb50dbd --- /dev/null +++ b/ai/model-serving-tensorflow/README.md @@ -0,0 +1,132 @@ +# TensorFlow Model Serving on Kubernetes + +## 1 Purpose / What You'll Learn + +This example demonstrates how to deploy a TensorFlow model for inference using [TensorFlow Serving](https://www.tensorflow.org/serving) on Kubernetes. Youโ€™ll learn how to: + +- Set up TensorFlow Serving with a pre-trained model +- Use a PersistentVolume to mount your model directory +- Expose the inference endpoint using a Kubernetes `Service` and `Ingress` +- Send a sample prediction request to the model + +--- + +## ๐Ÿ“š Table of Contents + +- [Prerequisites](#prerequisites) +- [Quick Start / TL;DR](#quick-start--tldr) +- [Detailed Steps & Explanation](#detailed-steps--explanation) +- [Verification / Seeing it Work](#verification--seeing-it-work) +- [Configuration Customization](#configuration-customization) +- [Cleanup](#cleanup) +- [Further Reading / Next Steps](#further-reading--next-steps) + +--- + +## โš™๏ธ Prerequisites + +- Kubernetes cluster (tested with v1.29+) +- `kubectl` configured +- Optional: `ingress-nginx` for external access +- x86-based machine (for running TensorFlow Serving image) +- Local hostPath support (for demo) or a cloud-based PVC + +--- + +## โšก Quick Start / TL;DR + +```bash + +# Apply manifests +kubectl apply -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/pv.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/pvc.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/deployment.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/service.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/ingress.yaml # Optional +``` + +--- + +## 2. Expose the Servic + +### 1. PersistentVolume & PVC Setup + +> โš ๏ธ Note: For local testing, `hostPath` is used to mount `/mnt/models/my_model`. In production, replace this with a cloud-native storage backend (e.g., AWS EBS, GCP PD, or NFS). + + +Model folder structure: +``` +/mnt/models/my_model/ +โ””โ”€โ”€ 1/ + โ”œโ”€โ”€ saved_model.pb + โ””โ”€โ”€ variables/ +``` + +--- + +### 2. Expose the Service + +- A `ClusterIP` service exposes gRPC (8500) and REST (8501). +- An optional `Ingress` exposes `/tf/v1/models/my_model:predict` to external clients. + +Update the `host` value in `ingress.yaml` to match your domain. + +--- + +## 3 Verification / Seeing it Work + +If using ingress: + +```bash +curl -X POST http:///tf/v1/models/my_model:predict \ + -H "Content-Type: application/json" \ + -d '{ "instances": [[1.0, 2.0, 5.0]] }' +``` + +Expected output: + +```json +{ + "predictions": [...] +} +``` + +To verify the pod is running: + +```bash +kubectl get pods +kubectl wait --for=condition=Available deployment/tf-serving --timeout=300s +kubectl logs deployment/tf-serving +``` + +--- + +## ๐Ÿ› ๏ธ Configuration Customization + +- Update `model_name` and `model_base_path` in the deployment +- Replace `hostPath` with `PersistentVolumeClaim` bound to cloud storage +- Modify resource requests/limits for TensorFlow container + +--- + +## ๐Ÿงน Cleanup + +```bash +kubectl delete -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/ingress.yaml # Optional +kubectl delete -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/service.yaml +kubectl delete -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/deployment.yaml +kubectl delete -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/pvc.yaml +kubectl delete -f https://raw.githubusercontent.com/kubernetes/examples/refs/heads/master/ai/model-serving-tensorflow/pv.yaml + +``` + +--- + +## 4 Further Reading / Next Steps + +- [TensorFlow Serving](https://www.tensorflow.org/tfx/serving) +- [TF Serving REST API Reference](https://www.tensorflow.org/tfx/serving/api_rest) +- [Kubernetes Ingress Controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) +- [Persistent Volumes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) + + diff --git a/ai/model-serving-tensorflow/deployment.yaml b/ai/model-serving-tensorflow/deployment.yaml new file mode 100644 index 00000000..5b2358d4 --- /dev/null +++ b/ai/model-serving-tensorflow/deployment.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tf-serving + labels: + app: tf-serving +spec: + replicas: 1 + selector: + matchLabels: + app: tf-serving + template: + metadata: + labels: + app: tf-serving + spec: + containers: + - name: tensorflow-serving + image: tensorflow/serving:2.19.0 + args: + - "--model_name=my_model" + - "--port=8500" + - "--rest_api_port=8501" + - "--model_base_path=/models/my_model" + ports: + - containerPort: 8500 # gRPC + - containerPort: 8501 # REST + volumeMounts: + - name: model-volume + mountPath: /models/my_model + volumes: + - name: model-volume + persistentVolumeClaim: + claimName: my-model-pvc diff --git a/ai/model-serving-tensorflow/ingress.yaml b/ai/model-serving-tensorflow/ingress.yaml new file mode 100644 index 00000000..72b392da --- /dev/null +++ b/ai/model-serving-tensorflow/ingress.yaml @@ -0,0 +1,17 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: tf-serving-ingress + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$2 +spec: + rules: + - http: + paths: + - path: /tf(/|$)(.*) + pathType: Prefix + backend: + service: + name: tf-serving + port: + number: 8501 diff --git a/ai/model-serving-tensorflow/pv.yaml b/ai/model-serving-tensorflow/pv.yaml new file mode 100644 index 00000000..3fdfa6af --- /dev/null +++ b/ai/model-serving-tensorflow/pv.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: my-model-pv +spec: + capacity: + storage: 1Gi + accessModes: + - ReadOnlyMany + persistentVolumeReclaimPolicy: Retain + hostPath: + path: /mnt/models/my_model diff --git a/ai/model-serving-tensorflow/pvc.yaml b/ai/model-serving-tensorflow/pvc.yaml new file mode 100644 index 00000000..4dacae33 --- /dev/null +++ b/ai/model-serving-tensorflow/pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-model-pvc +spec: + accessModes: + - ReadOnlyMany + resources: + requests: + storage: 1Gi + volumeName: my-model-pv diff --git a/ai/model-serving-tensorflow/service.yaml b/ai/model-serving-tensorflow/service.yaml new file mode 100644 index 00000000..00a6e102 --- /dev/null +++ b/ai/model-serving-tensorflow/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: tf-serving +spec: + selector: + app: tf-serving + ports: + - name: grpc + port: 8500 + targetPort: 8500 + - name: rest + port: 8501 + targetPort: 8501 + type: ClusterIP