mirror of https://github.com/kubeflow/examples.git
				
				
				
			Update serving in mnist example; use 0.4 and add testing. (#469)
* Add the TFServing component * Create TFServing components. * The model.py code doesn't appear to be exporting a model in saved model format; it was a missing a call to export. * I'm not sure how this ever worked. * It also looks like there is a bug in the code in that its using the cnn input fn even if the model is the linear one. I'm going to leave that as is for now. * Create a namespace for each test run; delete the namespace on teardown * We need to copy the GCP service account key to the new namespace. * Add a shell script to do that.
This commit is contained in:
		
							parent
							
								
									ef108dbbcc
								
							
						
					
					
						commit
						2494fdf8c5
					
				|  | @ -473,7 +473,74 @@ kubectl port-forward ${PODNAME} 6006:6006 | |||
| 
 | ||||
| Tensorboard can now be accessed at [http://127.0.0.1:6006](http://127.0.0.1:6006). | ||||
| 
 | ||||
| ## Using Tensorflow serving | ||||
| ## Serving the model | ||||
| 
 | ||||
| The model code will export the model in saved model format which is suitable for serving with TensorFlow serving. | ||||
| 
 | ||||
| To serve the model follow the instructions below. The instructins vary slightly based on where you are storing your | ||||
| model (e.g. GCS, S3, PVC). Depending on the storage system we provide different ksonnet components as a convenience | ||||
| for setting relevant environment variables. | ||||
| 
 | ||||
| 
 | ||||
| ### GCS | ||||
| 
 | ||||
| Here we show to serve the model when it is stored on GCS. This assumes that when you trained the model you set `exportDir` to a GCS | ||||
| URI; if not you can always copy it to GCS using `gsutil`. | ||||
| 
 | ||||
| Check that a model was exported | ||||
| 
 | ||||
| ``` | ||||
| gsutil ls -r ${EXPORT_DIR} | ||||
| 
 | ||||
| ``` | ||||
| 
 | ||||
| The output should look something like | ||||
| 
 | ||||
| ``` | ||||
| gs://${EXPORT_DIR}/1547100373/saved_model.pb | ||||
| gs://${EXPORT_DIR}/1547100373/variables/: | ||||
| gs://${EXPORT_DIR}/1547100373/variables/ | ||||
| gs://${EXPORT_DIR}/1547100373/variables/variables.data-00000-of-00001 | ||||
| gs://${EXPORT_DIR}/1547100373/variables/variables.index | ||||
| ``` | ||||
| 
 | ||||
| The number `1547100373` is a version number auto-generated by TensorFlow; it will vary on each run but should be monotonically increasing if you save a model to the same location as a previous location. | ||||
| 
 | ||||
| 
 | ||||
| Set your model path | ||||
| 
 | ||||
| ``` | ||||
| ks param set ${ENV} mnist-deploy-gcp modelBasePath ${EXPORT_DIR} | ||||
| 
 | ||||
| ``` | ||||
| 
 | ||||
| Deploy it | ||||
| 
 | ||||
| ``` | ||||
| ks param apply ${ENV} -c mnist-deploy-gcp | ||||
| ``` | ||||
| 
 | ||||
| You can check the deployment by running | ||||
| 
 | ||||
| ``` | ||||
| kubectl describe deployments mnist-deploy-gcp | ||||
| ``` | ||||
| 
 | ||||
| ### S3 | ||||
| 
 | ||||
| TODO: Add instructions | ||||
| 
 | ||||
| ### PVC | ||||
| 
 | ||||
| TODO: Add instructions | ||||
| 
 | ||||
| ### Create the K8s service | ||||
| 
 | ||||
| Next we need to create a K8s service to route traffic to our model | ||||
| 
 | ||||
| ``` | ||||
| ks apply jlewi -c mnist-service | ||||
| ``` | ||||
| 
 | ||||
| By default the workflow deploys our model via Tensorflow Serving. Included in this example is a client that can query your model and provide results: | ||||
| 
 | ||||
|  |  | |||
|  | @ -2,14 +2,28 @@ apiVersion: 0.3.0 | |||
| environments: | ||||
|   jlewi: | ||||
|     destination: | ||||
|       namespace: kubeflow | ||||
|       namespace: jlewi | ||||
|       server: https://35.196.210.94 | ||||
|     k8sVersion: v1.11.5 | ||||
|     path: jlewi | ||||
|   test-env-d5e3: | ||||
|     destination: | ||||
|       namespace: jlewi | ||||
|       server: https://35.196.210.94 | ||||
|     k8sVersion: v1.11.5 | ||||
|     path: test-env-d5e3 | ||||
| kind: ksonnet.io/app | ||||
| libraries: | ||||
|   kubeflow/tf-serving: | ||||
|     name: tf-serving | ||||
|     registry: kubeflow | ||||
|     version: fed535eaa276220e4edf59530c0629f4375a40a9 | ||||
| name: ks_app | ||||
| registries: | ||||
|   incubator: | ||||
|     protocol: github | ||||
|     uri: github.com/ksonnet/parts/tree/master/incubator | ||||
|   kubeflow: | ||||
|     protocol: github | ||||
|     uri: github.com/kubeflow/kubeflow/tree/v0.4-branch/kubeflow | ||||
| version: 0.0.1 | ||||
|  |  | |||
|  | @ -0,0 +1,39 @@ | |||
| local env = std.extVar("__ksonnet/environments"); | ||||
| local params = std.extVar("__ksonnet/params").components["mnist-deploy-aws"]; | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local deployment = k.apps.v1beta1.deployment; | ||||
| local container = deployment.mixin.spec.template.spec.containersType; | ||||
| 
 | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet"; | ||||
| 
 | ||||
| local base = tfserving.new(env, params); | ||||
| local tfDeployment = base.tfDeployment + | ||||
|                      deployment.mapContainers( | ||||
|                        function(c) { | ||||
|                          result:: | ||||
|                            c + container.withEnvMixin( | ||||
|                              if util.toBool(params.s3Enable) then ( | ||||
|                                [ | ||||
|                                  { | ||||
|                                    name: "AWS_ACCESS_KEY_ID", | ||||
|                                    valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretAccesskeyidKeyName } }, | ||||
|                                  }, | ||||
|                                  { | ||||
|                                    name: "AWS_SECRET_ACCESS_KEY", | ||||
|                                    valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretSecretaccesskeyKeyName } }, | ||||
|                                  }, | ||||
|                                  { name: "AWS_REGION", value: params.s3AwsRegion }, | ||||
|                                  { name: "S3_USE_HTTPS", value: std.toString(params.s3UseHttps) }, | ||||
|                                  { name: "S3_VERIFY_SSL", value: std.toString(params.s3VerifySsl) }, | ||||
|                                  { name: "S3_ENDPOINT", value: params.s3Endpoint }, | ||||
|                                ] | ||||
|                              ) else [], | ||||
|                            ), | ||||
|                        }.result, | ||||
|                      ); | ||||
| util.list([ | ||||
|   tfDeployment, | ||||
|   base.tfservingConfig, | ||||
| ],) | ||||
|  | @ -0,0 +1,47 @@ | |||
| local env = std.extVar("__ksonnet/environments"); | ||||
| local params = std.extVar("__ksonnet/params").components["mnist-deploy-gcp"]; | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local deployment = k.apps.v1beta1.deployment; | ||||
| local container = deployment.mixin.spec.template.spec.containersType; | ||||
| 
 | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet"; | ||||
| 
 | ||||
| local base = tfserving.new(env, params); | ||||
| local tfDeployment = base.tfDeployment + | ||||
|                      deployment.mixin.spec.template.spec.withVolumesMixin( | ||||
|                        if params.gcpCredentialSecretName != "null" then ( | ||||
|                          [{ | ||||
|                            name: "gcp-credentials", | ||||
|                            secret: { | ||||
|                              secretName: params.gcpCredentialSecretName, | ||||
|                            }, | ||||
|                          }] | ||||
|                        ) else [], | ||||
|                      ) + | ||||
|                      deployment.mapContainers( | ||||
|                        function(c) { | ||||
|                          result:: | ||||
|                            c + container.withEnvMixin( | ||||
|                              if params.gcpCredentialSecretName != "null" then ( | ||||
|                                [{ | ||||
|                                  name: "GOOGLE_APPLICATION_CREDENTIALS", | ||||
|                                  value: "/secret/gcp-credentials/user-gcp-sa.json", | ||||
|                                }] | ||||
|                              ) else [], | ||||
|                            ) + | ||||
|                            container.withVolumeMountsMixin( | ||||
|                              if params.gcpCredentialSecretName != "null" then ( | ||||
|                                [{ | ||||
|                                  name: "gcp-credentials", | ||||
|                                  mountPath: "/secret/gcp-credentials", | ||||
|                                }] | ||||
|                              ) else [], | ||||
|                            ), | ||||
|                        }.result, | ||||
|                      ); | ||||
| util.list([ | ||||
|   tfDeployment, | ||||
|   base.tfservingConfig, | ||||
| ],) | ||||
|  | @ -0,0 +1,8 @@ | |||
| local env = std.extVar("__ksonnet/environments"); | ||||
| local params = std.extVar("__ksonnet/params").components["mnist-service"]; | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local tfservingService = import "kubeflow/tf-serving/tf-serving-service-template.libsonnet"; | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| 
 | ||||
| tfservingService.new(env, params).all | ||||
|  | @ -4,15 +4,58 @@ | |||
|     train: { | ||||
|       batchSize: 100, | ||||
|       envVariables: 'GOOGLE_APPLICATION_CREDENTIALS=/var/secrets/user-gcp-sa.json', | ||||
|       exportDir: 'gs://kubeflow-ci_temp/mnist-jlewi', | ||||
|       image: 'gcr.io/kubeflow-examples/mnist/model:v20190108-v0.2-137-g38daafa-dirty-911944', | ||||
|       exportDir: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|       image: 'gcr.io/kubeflow-examples/mnist/model:v20190111-v0.2-148-g313770f', | ||||
|       learningRate: '0.01', | ||||
|       modelDir: 'gs://kubeflow-ci_temp/mnist-jlewi', | ||||
|       name: 'mnist-train', | ||||
|       numPs: 1, | ||||
|       numWorkers: 2, | ||||
|       numPs: 0, | ||||
|       numWorkers: 0, | ||||
|       secret: '', | ||||
|       trainSteps: 200, | ||||
|     }, | ||||
|     "mnist-deploy-gcp": { | ||||
|       defaultCpuImage: 'tensorflow/serving:1.11.1', | ||||
|       defaultGpuImage: 'tensorflow/serving:1.11.1-gpu', | ||||
|       deployHttpProxy: 'false', | ||||
|       enablePrometheus: 'true', | ||||
|       gcpCredentialSecretName: 'user-gcp-sa', | ||||
|       httpProxyImage: '', | ||||
|       injectIstio: 'false', | ||||
|       modelBasePath: 'gs://kubeflow-examples-data/mnist', | ||||
|       modelName: 'mnist', | ||||
|       name: 'mnist-deploy-gcp', | ||||
|       numGpus: '0', | ||||
|       versionName: 'v1', | ||||
|     }, | ||||
|     "mnist-deploy-aws": { | ||||
|       defaultCpuImage: 'tensorflow/serving:1.11.1', | ||||
|       defaultGpuImage: 'tensorflow/serving:1.11.1-gpu', | ||||
|       deployHttpProxy: 'false', | ||||
|       enablePrometheus: 'true', | ||||
|       httpProxyImage: 'gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723', | ||||
|       injectIstio: 'false', | ||||
|       modelBasePath: 's3://kubeflow-examples-data/mnist', | ||||
|       modelName: 'null', | ||||
|       name: 'mnist-deploy-aws', | ||||
|       numGpus: '0', | ||||
|       s3AwsRegion: 'us-west-1', | ||||
|       s3Enable: 'false', | ||||
|       s3Endpoint: 's3.us-west-1.amazonaws.com', | ||||
|       s3SecretAccesskeyidKeyName: 'AWS_ACCESS_KEY_ID', | ||||
|       s3SecretName: 'null', | ||||
|       s3SecretSecretaccesskeyKeyName: 'AWS_SECRET_ACCESS_KEY', | ||||
|       s3UseHttps: 'true', | ||||
|       s3VerifySsl: 'true', | ||||
|       versionName: 'v1', | ||||
|     }, | ||||
|     "mnist-service": { | ||||
|       enablePrometheus: 'true', | ||||
|       injectIstio: 'false', | ||||
|       modelName: 'null', | ||||
|       name: 'mnist-service', | ||||
|       serviceType: 'ClusterIP', | ||||
|       trafficRule: 'v1:100', | ||||
|     }, | ||||
|   }, | ||||
| } | ||||
|  | @ -43,8 +43,9 @@ local trainEnv = [ | |||
|   }, | ||||
| ]; | ||||
| 
 | ||||
| local secretName = std.split(params.secret, "=")[0]; | ||||
| local secretMountPath = std.split(params.secret, "=")[1]; | ||||
| local secretPieces = std.split(params.secret, "="); | ||||
| local secretName = if std.length(secretPieces) > 0 then secretPieces[0] else ""; | ||||
| local secretMountPath = if std.length(secretPieces) > 1 then secretPieces[1] else ""; | ||||
| 
 | ||||
| local replicaSpec = { | ||||
|   containers: [ | ||||
|  |  | |||
|  | @ -8,6 +8,23 @@ local envParams = params + { | |||
|     train+: { | ||||
|       name: 'mnist-train-dist', | ||||
|       secret: 'user-gcp-sa=/var/secrets', | ||||
|       numSteps: 10, | ||||
|       image: 'gcr.io/kubeflow-examples/mnist/model:v20190111-v0.2-146-g0bbff62-dirty-12f353', | ||||
|       numWorkers: 2, | ||||
|       numPs: 1, | ||||
|     }, | ||||
|     "deploy-gcp"+: { | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|     }, | ||||
|     "mnist-deploy-gcp"+: { | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|       name: 'jlewi-deploy-test', | ||||
|       namespace: 'jlewi', | ||||
|     }, | ||||
|     "mnist-service"+: { | ||||
|       name: 'jlewi-deploy-test', | ||||
|       namespace: 'jlewi', | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|     }, | ||||
|   }, | ||||
| }; | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| { | ||||
| } | ||||
|  | @ -0,0 +1,9 @@ | |||
| local base = import "base.libsonnet"; | ||||
| // uncomment if you reference ksonnet-lib | ||||
| // local k = import "k.libsonnet"; | ||||
| // local deployment = k.apps.v1beta2.deployment; | ||||
| 
 | ||||
| base + { | ||||
|   // Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n") | ||||
|   // "nginx-deployment"+: deployment.mixin.metadata.withLabels({foo: "bar"}) | ||||
| } | ||||
|  | @ -0,0 +1,28 @@ | |||
| local params = std.extVar('__ksonnet/params'); | ||||
| local globals = import 'globals.libsonnet'; | ||||
| local envParams = params + { | ||||
|   components+: { | ||||
|     train+: { | ||||
|       name: 'jlewi-deploy-test', | ||||
|       namespace: 'jlewi', | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|     }, | ||||
|     "mnist-deploy-gcp"+: { | ||||
|       name: 'jlewi-deploy-test', | ||||
|       namespace: 'jlewi', | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|     }, | ||||
|     "mnist-service"+: { | ||||
|       name: 'jlewi-deploy-test', | ||||
|       namespace: 'jlewi', | ||||
|       modelBasePath: 'gs://kubeflow-ci_temp/mnist-jlewi/export', | ||||
|     }, | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| { | ||||
|   components: { | ||||
|     [x]: envParams.components[x] + globals | ||||
|     for x in std.objectFields(envParams.components) | ||||
|   }, | ||||
| } | ||||
							
								
								
									
										73
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/README.md
								
								
								
									vendored
								
								
									Normal file
								
							
							
						
						
									
										73
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/README.md
								
								
								
									vendored
								
								
									Normal file
								
							|  | @ -0,0 +1,73 @@ | |||
| <!-- START doctoc generated TOC please keep comment here to allow auto update --> | ||||
| <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE --> | ||||
| **Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)* | ||||
| 
 | ||||
| - [tf-serving](#tf-serving) | ||||
|   - [Quickstart](#quickstart) | ||||
|   - [Using the library](#using-the-library) | ||||
|     - [io.ksonnet.pkg.tf-serving](#ioksonnetpkgtf-serving) | ||||
|       - [Example](#example) | ||||
|       - [Parameters](#parameters) | ||||
| 
 | ||||
| <!-- END doctoc generated TOC please keep comment here to allow auto update --> | ||||
| 
 | ||||
| # tf-serving | ||||
| 
 | ||||
| > TensorFlow serving is a server for TensorFlow models. | ||||
| 
 | ||||
| 
 | ||||
| * [Quickstart](#quickstart) | ||||
| * [Using Prototypes](#using-prototypes) | ||||
|   * [io.ksonnet.pkg.tf-serving](#io.ksonnet.pkg.tf-serving) | ||||
| 
 | ||||
| ## Quickstart | ||||
| 
 | ||||
| *The following commands use the `io.ksonnet.pkg.tf-serving` prototype to generate Kubernetes YAML for tf-serving, and then deploys it to your Kubernetes cluster.* | ||||
| 
 | ||||
| First, create a cluster and install the ksonnet CLI (see root-level [README.md](rootReadme)). | ||||
| 
 | ||||
| If you haven't yet created a [ksonnet application](linkToSomewhere), do so using `ks init <app-name>`. | ||||
| 
 | ||||
| Finally, in the ksonnet application directory, run the following: | ||||
| 
 | ||||
| ```shell | ||||
| # Expand prototype as a Jsonnet file, place in a file in the | ||||
| # `components/` directory. (YAML and JSON are also available.) | ||||
| $ ks prototype use io.ksonnet.pkg.tf-serving tf-serving \ | ||||
|   --name tf-serving \ | ||||
|   --namespace default | ||||
| 
 | ||||
| # Apply to server. | ||||
| $ ks apply -f tf-serving.jsonnet | ||||
| ``` | ||||
| 
 | ||||
| ## Using the library | ||||
| 
 | ||||
| The library files for tf-serving define a set of relevant *parts* (_e.g._, deployments, services, secrets, and so on) that can be combined to configure tf-serving for a wide variety of scenarios. For example, a database like Redis may need a secret to hold the user password, or it may have no password if it's acting as a cache. | ||||
| 
 | ||||
| This library provides a set of pre-fabricated "flavors" (or "distributions") of tf-serving, each of which is configured for a different use case. These are captured as ksonnet *prototypes*, which allow users to interactively customize these distributions for their specific needs. | ||||
| 
 | ||||
| These prototypes, as well as how to use them, are enumerated below. | ||||
| 
 | ||||
| ### io.ksonnet.pkg.tf-serving | ||||
| 
 | ||||
| TensorFlow serving | ||||
| #### Example | ||||
| 
 | ||||
| ```shell | ||||
| # Expand prototype as a Jsonnet file, place in a file in the | ||||
| # `components/` directory. (YAML and JSON are also available.) | ||||
| $ ks prototype use io.ksonnet.pkg.tf-serving tf-serving \ | ||||
|   --name YOUR_NAME_HERE \ | ||||
|   --model_path YOUR_MODEL_PATH_HERE | ||||
| ``` | ||||
| 
 | ||||
| #### Parameters | ||||
| 
 | ||||
| The available options to pass prototype are: | ||||
| 
 | ||||
| * `--name=<name>`: Name to give to each of the components [string] | ||||
| * `--model_path=<model_path>`: Path to the model. This can be a GCS path. [string] | ||||
| 
 | ||||
| 
 | ||||
| [rootReadme]: https://github.com/ksonnet/mixins | ||||
							
								
								
									
										35
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/parts.yaml
								
								
								
									vendored
								
								
									Normal file
								
							
							
						
						
									
										35
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/parts.yaml
								
								
								
									vendored
								
								
									Normal file
								
							|  | @ -0,0 +1,35 @@ | |||
| { | ||||
|    "name": "tf-serving", | ||||
|    "apiVersion": "0.0.1", | ||||
|    "kind": "ksonnet.io/parts", | ||||
|    "description": "TensorFlow serving is a server for TensorFlow models.\n", | ||||
|    "author": "kubeflow team <kubeflow-team@google.com>", | ||||
|    "contributors": [ | ||||
|       { | ||||
|          "name": "Jeremy Lewi", | ||||
|          "email": "jlewi@google.com" | ||||
|       } | ||||
|    ], | ||||
|    "repository": { | ||||
|       "type": "git", | ||||
|       "url": "https://github.com/kubeflow/kubeflow" | ||||
|    }, | ||||
|    "bugs": { | ||||
|       "url": "https://github.com/kubeflow/kubeflow/issues" | ||||
|    }, | ||||
|    "keywords": [ | ||||
|       "kubeflow", | ||||
|       "tensorflow", | ||||
|       "database" | ||||
|    ], | ||||
|    "quickStart": { | ||||
|       "prototype": "io.ksonnet.pkg.tf-serving", | ||||
|       "componentName": "tf-serving", | ||||
|       "flags": { | ||||
|          "name": "tf-serving", | ||||
|          "namespace": "default" | ||||
|       }, | ||||
|       "comment": "Run TensorFlow Serving" | ||||
|    }, | ||||
|    "license": "Apache 2.0" | ||||
| } | ||||
|  | @ -0,0 +1,23 @@ | |||
| // @apiVersion 0.1 | ||||
| // @name io.ksonnet.pkg.tf-serving | ||||
| // @description TensorFlow serving | ||||
| // @shortDescription A TensorFlow serving deployment | ||||
| // @param name string Name to give to each of the components | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| 
 | ||||
| // ksonnet appears to require name be a parameter of the prototype which is why we handle it differently. | ||||
| local name = import "param://name"; | ||||
| 
 | ||||
| // updatedParams includes the namespace from env by default. | ||||
| local updatedParams = params + env; | ||||
| 
 | ||||
| local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet"; | ||||
| local tfServing = tfServingBase { | ||||
|   // Override parameters with user supplied parameters. | ||||
|   params+: updatedParams { | ||||
|     name: name, | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| std.prune(k.core.v1.list.new(tfServing.components)) | ||||
|  | @ -0,0 +1,61 @@ | |||
| // @apiVersion 0.1 | ||||
| // @name io.ksonnet.pkg.tf-serving-deployment-aws | ||||
| // @description TensorFlow serving | ||||
| // @shortDescription A TensorFlow serving deployment | ||||
| // @param name string Name to give to each of the components | ||||
| // @optionalParam numGpus string 0 Number of gpus to use | ||||
| // @optionalParam deployHttpProxy string false Whether to deploy http proxy | ||||
| // @optionalParam injectIstio string false Whether to inject istio sidecar; should be true or false. | ||||
| // @optionalParam enablePrometheus string true Whether to enable prometheus endpoint (requires TF 1.11) | ||||
| // @optionalParam modelBasePath string s3://kubeflow-examples-data/mnist The model path | ||||
| // @optionalParam modelName string null The model name | ||||
| // @optionalParam versionName string v1 The version name | ||||
| // @optionalParam defaultCpuImage string tensorflow/serving:1.11.1 The default model server image (cpu) | ||||
| // @optionalParam defaultGpuImage string tensorflow/serving:1.11.1-gpu The default model server image (gpu) | ||||
| // @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image | ||||
| // @optionalParam s3Enable string false Whether to enable S3 | ||||
| // Following parameters are needed only if s3Enable is true | ||||
| // @optionalParam s3SecretName string null Name of the k8s secrets containing S3 credentials | ||||
| // @optionalParam s3SecretAccesskeyidKeyName string AWS_ACCESS_KEY_ID Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID | ||||
| // @optionalParam s3SecretSecretaccesskeyKeyName string AWS_SECRET_ACCESS_KEY Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY | ||||
| // @optionalParam s3AwsRegion string us-west-1 S3 region | ||||
| // @optionalParam s3UseHttps string true Whether or not to use https | ||||
| // @optionalParam s3VerifySsl string true Whether or not to verify https certificates for S3 connections | ||||
| // @optionalParam s3Endpoint string s3.us-west-1.amazonaws.com URL for your s3-compatible endpoint | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local deployment = k.apps.v1beta1.deployment; | ||||
| local container = deployment.mixin.spec.template.spec.containersType; | ||||
| 
 | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet"; | ||||
| 
 | ||||
| local base = tfserving.new(env, params); | ||||
| local tfDeployment = base.tfDeployment + | ||||
|                      deployment.mapContainers( | ||||
|                        function(c) { | ||||
|                          result:: | ||||
|                            c + container.withEnvMixin( | ||||
|                              if util.toBool(params.s3Enable) then ( | ||||
|                                [ | ||||
|                                  { | ||||
|                                    name: "AWS_ACCESS_KEY_ID", | ||||
|                                    valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretAccesskeyidKeyName } }, | ||||
|                                  }, | ||||
|                                  { | ||||
|                                    name: "AWS_SECRET_ACCESS_KEY", | ||||
|                                    valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretSecretaccesskeyKeyName } }, | ||||
|                                  }, | ||||
|                                  { name: "AWS_REGION", value: params.s3AwsRegion }, | ||||
|                                  { name: "S3_USE_HTTPS", value: std.toString(params.s3UseHttps) }, | ||||
|                                  { name: "S3_VERIFY_SSL", value: std.toString(params.s3VerifySsl) }, | ||||
|                                  { name: "S3_ENDPOINT", value: params.s3Endpoint }, | ||||
|                                ] | ||||
|                              ) else [], | ||||
|                            ), | ||||
|                        }.result, | ||||
|                      ); | ||||
| util.list([ | ||||
|   tfDeployment, | ||||
|   base.tfservingConfig, | ||||
| ],) | ||||
|  | @ -0,0 +1,61 @@ | |||
| // @apiVersion 0.1 | ||||
| // @name io.ksonnet.pkg.tf-serving-deployment-gcp | ||||
| // @description TensorFlow serving | ||||
| // @shortDescription A TensorFlow serving deployment | ||||
| // @param name string Name to give to each of the components | ||||
| // @optionalParam numGpus string 0 Number of gpus to use | ||||
| // @optionalParam deployHttpProxy string false Whether to deploy http proxy | ||||
| // @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path | ||||
| // @optionalParam modelName string null The model name | ||||
| // @optionalParam versionName string v1 The version name | ||||
| // @optionalParam defaultCpuImage string tensorflow/serving:1.11.1 The default model server image (cpu) | ||||
| // @optionalParam defaultGpuImage string tensorflow/serving:1.11.1-gpu The default model server image (gpu) | ||||
| // @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image | ||||
| // @optionalParam gcpCredentialSecretName string null If not empty, insert the secret credential | ||||
| // @optionalParam injectIstio string false Whether to inject istio sidecar; should be true or false. | ||||
| // @optionalParam enablePrometheus string true Whether to enable prometheus endpoint (requires TF 1.11) | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local deployment = k.apps.v1beta1.deployment; | ||||
| local container = deployment.mixin.spec.template.spec.containersType; | ||||
| 
 | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet"; | ||||
| 
 | ||||
| local base = tfserving.new(env, params); | ||||
| local tfDeployment = base.tfDeployment + | ||||
|                      deployment.mixin.spec.template.spec.withVolumesMixin( | ||||
|                        if params.gcpCredentialSecretName != "null" then ( | ||||
|                          [{ | ||||
|                            name: "gcp-credentials", | ||||
|                            secret: { | ||||
|                              secretName: params.gcpCredentialSecretName, | ||||
|                            }, | ||||
|                          }] | ||||
|                        ) else [], | ||||
|                      ) + | ||||
|                      deployment.mapContainers( | ||||
|                        function(c) { | ||||
|                          result:: | ||||
|                            c + container.withEnvMixin( | ||||
|                              if params.gcpCredentialSecretName != "null" then ( | ||||
|                                [{ | ||||
|                                  name: "GOOGLE_APPLICATION_CREDENTIALS", | ||||
|                                  value: "/secret/gcp-credentials/user-gcp-sa.json", | ||||
|                                }] | ||||
|                              ) else [], | ||||
|                            ) + | ||||
|                            container.withVolumeMountsMixin( | ||||
|                              if params.gcpCredentialSecretName != "null" then ( | ||||
|                                [{ | ||||
|                                  name: "gcp-credentials", | ||||
|                                  mountPath: "/secret/gcp-credentials", | ||||
|                                }] | ||||
|                              ) else [], | ||||
|                            ), | ||||
|                        }.result, | ||||
|                      ); | ||||
| util.list([ | ||||
|   tfDeployment, | ||||
|   base.tfservingConfig, | ||||
| ],) | ||||
|  | @ -0,0 +1,16 @@ | |||
| // @apiVersion 0.1 | ||||
| // @name io.ksonnet.pkg.tf-serving-service | ||||
| // @description TensorFlow serving | ||||
| // @shortDescription A TensorFlow serving model | ||||
| // @param name string Name to give to each of the components | ||||
| // @optionalParam serviceType string ClusterIP The k8s service type for tf serving. | ||||
| // @optionalParam modelName string null The model name | ||||
| // @optionalParam trafficRule string v1:100 The traffic rule, in the format of version:percentage,version:percentage,.. | ||||
| // @optionalParam injectIstio string false Whether to inject istio sidecar; should be true or false. | ||||
| // @optionalParam enablePrometheus string true Whether to enable prometheus endpoint (requires TF 1.11) | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local tfservingService = import "kubeflow/tf-serving/tf-serving-service-template.libsonnet"; | ||||
| local util = import "kubeflow/tf-serving/util.libsonnet"; | ||||
| 
 | ||||
| tfservingService.new(env, params).all | ||||
|  | @ -0,0 +1,230 @@ | |||
| // @apiVersion 0.1 | ||||
| // @name io.ksonnet.pkg.tf-serving-request-log | ||||
| // @description tf-serving with request logging | ||||
| // @shortDescription tf-serving with request logging | ||||
| // @param name string Name to give to each of the components | ||||
| // @param gcpProject string The gcp project for Bigquery dataset | ||||
| // @param dataset string The Bigquery dataset | ||||
| // @param table string The Bigquery table | ||||
| // @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path | ||||
| // @optionalParam modelName string mnist The model name | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| 
 | ||||
| local namespace = "kubeflow"; | ||||
| local appName = import "param://name"; | ||||
| local image = "gcr.io/kubeflow-images-public/tf-model-server-cpu:v20180327-995786ec"; | ||||
| local httpProxyImage = "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723"; | ||||
| local loggingImage = "gcr.io/kubeflow-images-public/tf-model-server-request-logger:v20180723"; | ||||
| 
 | ||||
| local gcpSecretName = "user-gcp-sa"; | ||||
| 
 | ||||
| local service = { | ||||
|   apiVersion: "v1", | ||||
|   kind: "Service", | ||||
|   metadata: { | ||||
|     labels: { | ||||
|       app: appName, | ||||
|     }, | ||||
|     name: appName, | ||||
|     namespace: namespace, | ||||
|   }, | ||||
|   spec: { | ||||
|     ports: [ | ||||
|       { | ||||
|         name: "grpc-tf-serving", | ||||
|         port: 9000, | ||||
|         targetPort: 9000, | ||||
|       }, | ||||
|       { | ||||
|         name: "http-tf-serving-proxy", | ||||
|         port: 8000, | ||||
|         targetPort: 8000, | ||||
|       }, | ||||
|     ], | ||||
|     selector: { | ||||
|       app: appName, | ||||
|     }, | ||||
|     type: "ClusterIP", | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| local configMap = { | ||||
|   apiVersion: "v1", | ||||
|   kind: "ConfigMap", | ||||
|   metadata: { | ||||
|     name: appName + "fluentd-config", | ||||
|     namespace: namespace, | ||||
|   }, | ||||
|   data: { | ||||
|     "fluent.conf": std.format(||| | ||||
|       <source> | ||||
|         @type tail | ||||
|         path /tmp/logs/request.log | ||||
|         pos_file /tmp/logs/request.log.pos | ||||
|         <parse> | ||||
|           @type json | ||||
|         </parse> | ||||
|         tag dummy | ||||
|       </source> | ||||
|       <match dummy> | ||||
|         @type bigquery_insert | ||||
|         auth_method application_default | ||||
|         project %s | ||||
|         dataset %s | ||||
|         table %s | ||||
|         fetch_schema true | ||||
|       </match> | ||||
|     |||, [params.gcpProject, params.dataset, params.table]), | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| local deployment = { | ||||
|   apiVersion: "extensions/v1beta1", | ||||
|   kind: "Deployment", | ||||
|   metadata: { | ||||
|     labels: { | ||||
|       app: appName, | ||||
|     }, | ||||
|     name: appName, | ||||
|     namespace: namespace, | ||||
|   }, | ||||
|   spec: { | ||||
|     template: { | ||||
|       metadata: { | ||||
|         labels: { | ||||
|           app: appName, | ||||
|         }, | ||||
|       }, | ||||
|       spec: { | ||||
|         containers: [ | ||||
|           // ModelServer | ||||
|           { | ||||
|             args: [ | ||||
|               "/usr/bin/tensorflow_model_server", | ||||
|               "--port=9000", | ||||
|               "--model_name=" + params.modelName, | ||||
|               "--model_base_path=" + params.modelBasePath, | ||||
|             ], | ||||
|             image: image, | ||||
|             imagePullPolicy: "IfNotPresent", | ||||
|             name: "model-server", | ||||
|             ports: [ | ||||
|               { | ||||
|                 containerPort: 9000, | ||||
|               }, | ||||
|             ], | ||||
|             resources: { | ||||
|               limits: { | ||||
|                 cpu: "4", | ||||
|                 memory: "4Gi", | ||||
|               }, | ||||
|               requests: { | ||||
|                 cpu: "1", | ||||
|                 memory: "1Gi", | ||||
|               }, | ||||
|             }, | ||||
|           }, | ||||
|           // Http proxy | ||||
|           { | ||||
|             name: "http-proxy", | ||||
|             image: httpProxyImage, | ||||
|             imagePullPolicy: "Always", | ||||
|             command: [ | ||||
|               "python", | ||||
|               "/usr/src/app/server.py", | ||||
|               "--port=8000", | ||||
|               "--rpc_port=9000", | ||||
|               "--rpc_timeout=10.0", | ||||
|               "--log_request=true", | ||||
|             ], | ||||
|             env: [], | ||||
|             ports: [ | ||||
|               { | ||||
|                 containerPort: 8000, | ||||
|               }, | ||||
|             ], | ||||
|             resources: { | ||||
|               requests: { | ||||
|                 memory: "1Gi", | ||||
|                 cpu: "1", | ||||
|               }, | ||||
|               limits: { | ||||
|                 memory: "4Gi", | ||||
|                 cpu: "4", | ||||
|               }, | ||||
|             }, | ||||
|             securityContext: { | ||||
|               runAsUser: 1000, | ||||
|               fsGroup: 1000, | ||||
|             }, | ||||
|             volumeMounts: [ | ||||
|               { | ||||
|                 name: "request-logs", | ||||
|                 mountPath: "/tmp/logs", | ||||
|               }, | ||||
|             ], | ||||
|           }, | ||||
|           // TODO(lunkai): use admission controller to inject. | ||||
|           // Logging container. | ||||
|           { | ||||
|             name: "logging", | ||||
|             image: loggingImage, | ||||
|             imagePullPolicy: "Always", | ||||
|             env: [ | ||||
|               { name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" }, | ||||
|             ], | ||||
|             resources: { | ||||
|               requests: { | ||||
|                 memory: "250Mi", | ||||
|                 cpu: "0.25", | ||||
|               }, | ||||
|               limits: { | ||||
|                 memory: "500Mi", | ||||
|                 cpu: "0.5", | ||||
|               }, | ||||
|             }, | ||||
|             volumeMounts: [ | ||||
|               { | ||||
|                 name: "request-logs", | ||||
|                 mountPath: "/tmp/logs", | ||||
|               }, | ||||
|               { | ||||
|                 name: "gcp-credentials", | ||||
|                 mountPath: "/secret/gcp-credentials", | ||||
|               }, | ||||
|               { | ||||
|                 name: "fluentd-config-volume", | ||||
|                 mountPath: "/fluentd/etc/custom", | ||||
|               }, | ||||
|             ], | ||||
|           }, | ||||
|         ], | ||||
|         volumes: [ | ||||
|           { | ||||
|             name: "gcp-credentials", | ||||
|             secret: { | ||||
|               secretName: gcpSecretName, | ||||
|             }, | ||||
|           }, | ||||
|           { | ||||
|             name: "request-logs", | ||||
|             emptyDir: {}, | ||||
|           }, | ||||
|           { | ||||
|             configMap: { | ||||
|               name: "fluentd-config", | ||||
|             }, | ||||
|             name: "fluentd-config-volume", | ||||
|           }, | ||||
|         ], | ||||
|       }, | ||||
|     }, | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| k.core.v1.list.new([ | ||||
|   service, | ||||
|   deployment, | ||||
|   configMap, | ||||
| ]) | ||||
|  | @ -0,0 +1,112 @@ | |||
| local tfservingService = import "kubeflow/tf-serving/tf-serving-service-template.libsonnet"; | ||||
| local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet"; | ||||
| 
 | ||||
| local params = { | ||||
|   name: "m", | ||||
|   serviceType: "ClusterIP", | ||||
|   modelName: "mnist", | ||||
|   trafficRule: "v1:100", | ||||
|   injectIstio: false, | ||||
| }; | ||||
| 
 | ||||
| local istioParams = params { | ||||
|   injectIstio: true, | ||||
| }; | ||||
| 
 | ||||
| local env = { | ||||
|   namespace: "kubeflow", | ||||
| }; | ||||
| 
 | ||||
| local deploymentParam = { | ||||
|   name: "m", | ||||
|   modelName: "mnist", | ||||
|   versionName: "v1", | ||||
|   modelBasePath: "gs://abc", | ||||
|   numGpus: 0, | ||||
|   defaultCpuImage: "gcr.io/abc", | ||||
|   defaultGpuImage: "gcr.io/abc", | ||||
|   injectIstio: false, | ||||
|   enablePrometheus: true, | ||||
| }; | ||||
| 
 | ||||
| local gpuParam1 = { | ||||
|   name: "m", | ||||
|   modelName: "mnist", | ||||
|   versionName: "v1", | ||||
|   modelBasePath: "gs://abc", | ||||
|   numGpus: 1, | ||||
|   defaultCpuImage: "gcr.io/abc", | ||||
|   defaultGpuImage: "gcr.io/abc", | ||||
|   injectIstio: false, | ||||
|   enablePrometheus: true, | ||||
| }; | ||||
| 
 | ||||
| local gpuParamString0 = { | ||||
|   name: "m", | ||||
|   modelName: "mnist", | ||||
|   versionName: "v1", | ||||
|   modelBasePath: "gs://abc", | ||||
|   numGpus: "0", | ||||
|   defaultCpuImage: "gcr.io/abc", | ||||
|   defaultGpuImage: "gcr.io/abc", | ||||
|   injectIstio: false, | ||||
|   enablePrometheus: true, | ||||
| }; | ||||
| 
 | ||||
| local gpuParamString1 = { | ||||
|   name: "m", | ||||
|   modelName: "mnist", | ||||
|   versionName: "v1", | ||||
|   modelBasePath: "gs://abc", | ||||
|   numGpus: "1", | ||||
|   defaultCpuImage: "gcr.io/abc", | ||||
|   defaultGpuImage: "gcr.io/abc", | ||||
|   injectIstio: false, | ||||
|   enablePrometheus: true, | ||||
| }; | ||||
| 
 | ||||
| local serviceInstance = tfservingService.new(env, params); | ||||
| local istioServiceInstance = tfservingService.new(env, istioParams); | ||||
| 
 | ||||
| local deploymentInstance = tfserving.new(env, deploymentParam); | ||||
| 
 | ||||
| local gpuInstance = tfserving.new(env, gpuParam1); | ||||
| local gpuString0Instance = tfserving.new(env, gpuParamString0); | ||||
| local gpuString1Instance = tfserving.new(env, gpuParamString1); | ||||
| 
 | ||||
| // This one should only have tfService | ||||
| std.assertEqual( | ||||
|   std.length(serviceInstance.all.items), | ||||
|   1, | ||||
| ) && | ||||
| 
 | ||||
| // This one should have tfService, virtualService, and DestinationRule | ||||
| std.assertEqual( | ||||
|   std.length(istioServiceInstance.all.items), | ||||
|   3 | ||||
| ) && | ||||
| 
 | ||||
| std.startsWith( | ||||
|   deploymentInstance.tfDeployment.spec.template.spec.containers[0].args[4], | ||||
|   "--monitoring_config_file" | ||||
| ) && | ||||
| 
 | ||||
| std.assertEqual( | ||||
|   deploymentInstance.tfDeployment.spec.template.spec.containers[0].resources.limits, | ||||
|   { cpu: "4", memory: "4Gi" } | ||||
| ) && | ||||
| 
 | ||||
| std.assertEqual( | ||||
|   gpuInstance.tfDeployment.spec.template.spec.containers[0].resources.limits, | ||||
|   { cpu: "4", memory: "4Gi", "nvidia.com/gpu": 1 } | ||||
| ) && | ||||
| 
 | ||||
| std.assertEqual( | ||||
|   gpuString0Instance.tfDeployment.spec.template.spec.containers[0].resources.limits, | ||||
|   { cpu: "4", memory: "4Gi" } | ||||
| ) && | ||||
| 
 | ||||
| std.assertEqual( | ||||
|   gpuString1Instance.tfDeployment.spec.template.spec.containers[0].resources.limits, | ||||
|   { cpu: "4", memory: "4Gi", "nvidia.com/gpu": 1 } | ||||
| ) | ||||
|  | @ -0,0 +1,147 @@ | |||
| { | ||||
|   local k = import "k.libsonnet", | ||||
|   local util = import "kubeflow/tf-serving/util.libsonnet", | ||||
|   new(_env, _params):: { | ||||
|     local params = _params + _env, | ||||
|     local namespace = params.namespace, | ||||
|     local name = params.name, | ||||
|     local modelName = | ||||
|       if params.modelName == "null" then | ||||
|         params.name | ||||
|       else | ||||
|         params.modelName, | ||||
| 
 | ||||
|     local tfService = { | ||||
|       apiVersion: "v1", | ||||
|       kind: "Service", | ||||
|       metadata: { | ||||
|         labels: { | ||||
|           app: modelName, | ||||
|         }, | ||||
|         name: name, | ||||
|         namespace: namespace, | ||||
|         annotations: { | ||||
|           "getambassador.io/config": | ||||
|             std.join("\n", [ | ||||
|               "---", | ||||
|               "apiVersion: ambassador/v0", | ||||
|               "kind:  Mapping", | ||||
|               "name: tfserving-predict-mapping-" + modelName, | ||||
|               "prefix: /tfserving/models/" + modelName, | ||||
|               "rewrite: /v1/models/" + modelName + ":predict", | ||||
|               "method: POST", | ||||
|               "service: " + name + "." + namespace + ":8500", | ||||
|               "---", | ||||
|               "apiVersion: ambassador/v0", | ||||
|               "kind:  Mapping", | ||||
|               "name: tfserving-predict-mapping-" + modelName + "-get", | ||||
|               "prefix: /tfserving/models/" + modelName, | ||||
|               "rewrite: /v1/models/" + modelName, | ||||
|               "method: GET", | ||||
|               "service: " + name + "." + namespace + ":8500", | ||||
|             ]), | ||||
|         } + if util.toBool(params.enablePrometheus) then { | ||||
|           "prometheus.io/scrape": "true", | ||||
|           "prometheus.io/path": "/monitoring/prometheus/metrics", | ||||
|           "prometheus.io/port": "8500", | ||||
|         } else {},  //annotations | ||||
|       }, | ||||
|       spec: { | ||||
|         ports: [ | ||||
|           { | ||||
|             name: "grpc-tf-serving", | ||||
|             port: 9000, | ||||
|             targetPort: 9000, | ||||
|           }, | ||||
|           { | ||||
|             name: "http-tf-serving", | ||||
|             port: 8500, | ||||
|             targetPort: 8500, | ||||
|           }, | ||||
|         ], | ||||
|         selector: { | ||||
|           app: modelName, | ||||
|         }, | ||||
|         type: params.serviceType, | ||||
|       }, | ||||
|     },  // tfService | ||||
|     tfService:: tfService, | ||||
| 
 | ||||
|     local versionWeights = std.split(params.trafficRule, ","), | ||||
|     local virtualService = { | ||||
|       apiVersion: "networking.istio.io/v1alpha3", | ||||
|       kind: "VirtualService", | ||||
|       metadata: { | ||||
|         name: name, | ||||
|         namespace: namespace, | ||||
|       }, | ||||
|       spec: { | ||||
|         hosts: [ | ||||
|           "*", | ||||
|         ], | ||||
|         gateways: [ | ||||
|           "kubeflow-gateway", | ||||
|         ], | ||||
|         http: [ | ||||
|           { | ||||
|             match: [ | ||||
|               { | ||||
|                 uri: { | ||||
|                   prefix: "/istio/tfserving/models/" + modelName, | ||||
|                 }, | ||||
|                 method: { | ||||
|                   exact: "POST", | ||||
|                 }, | ||||
|               }, | ||||
|             ], | ||||
|             rewrite: { | ||||
|               uri: "/v1/models/" + modelName + ":predict", | ||||
|             }, | ||||
|             route: [ | ||||
|               { | ||||
|                 destination: { | ||||
|                   host: name, | ||||
|                   port: { | ||||
|                     number: 8500, | ||||
|                   }, | ||||
|                   subset: std.split(versionWeight, ":")[0], | ||||
|                 }, | ||||
|                 weight: std.parseInt(std.split(versionWeight, ":")[1]), | ||||
|               } | ||||
|               for versionWeight in versionWeights | ||||
|             ], | ||||
|           }, | ||||
|         ], | ||||
|       }, | ||||
|     }, | ||||
|     virtualService:: virtualService, | ||||
| 
 | ||||
|     local destinationRule = { | ||||
|       apiVersion: "networking.istio.io/v1alpha3", | ||||
|       kind: "DestinationRule", | ||||
|       metadata: { | ||||
|         name: name, | ||||
|         namespace: namespace, | ||||
|       }, | ||||
|       spec: { | ||||
|         host: name, | ||||
|         subsets: [ | ||||
|           { | ||||
|             name: std.split(versionWeight, ":")[0], | ||||
|             labels: { | ||||
|               version: std.split(versionWeight, ":")[0], | ||||
|             }, | ||||
|           } | ||||
|           for versionWeight in versionWeights | ||||
|         ], | ||||
|       }, | ||||
|     }, | ||||
|     destinationRule:: destinationRule, | ||||
|     all:: util.list([ | ||||
|       tfService, | ||||
|     ] + if util.toBool(params.injectIstio) then [ | ||||
|       virtualService, | ||||
|       destinationRule, | ||||
|     ] else []), | ||||
|   },  // new | ||||
| } | ||||
|  | @ -0,0 +1,137 @@ | |||
| { | ||||
|   local k = import "k.libsonnet", | ||||
|   local util = import "kubeflow/tf-serving/util.libsonnet", | ||||
|   new(_env, _params):: { | ||||
|     local params = _params + _env, | ||||
|     local namespace = params.namespace, | ||||
|     local name = params.name, | ||||
|     local modelName = | ||||
|       if params.modelName == "null" then | ||||
|         params.name | ||||
|       else | ||||
|         params.modelName, | ||||
|     local versionName = params.versionName, | ||||
|     local numGpus = | ||||
|       if std.type(params.numGpus) == "string" then | ||||
|         std.parseInt(params.numGpus) | ||||
|       else | ||||
|         params.numGpus, | ||||
|     local modelServerImage = | ||||
|       if numGpus == 0 then | ||||
|         params.defaultCpuImage | ||||
|       else | ||||
|         params.defaultGpuImage, | ||||
| 
 | ||||
|     // Optional features. | ||||
|     // TODO(lunkai): Add request logging | ||||
| 
 | ||||
|     local modelServerContainer = { | ||||
|       command: [ | ||||
|         "/usr/bin/tensorflow_model_server", | ||||
|       ], | ||||
|       args: [ | ||||
|         "--port=9000", | ||||
|         "--rest_api_port=8500", | ||||
|         "--model_name=" + modelName, | ||||
|         "--model_base_path=" + params.modelBasePath, | ||||
|       ] + if util.toBool(params.enablePrometheus) then [ | ||||
|         "--monitoring_config_file=/var/config/monitoring_config.txt", | ||||
|       ] else [], | ||||
|       image: modelServerImage, | ||||
|       imagePullPolicy: "IfNotPresent", | ||||
|       name: modelName, | ||||
|       ports: [ | ||||
|         { | ||||
|           containerPort: 9000, | ||||
|         }, | ||||
|         { | ||||
|           containerPort: 8500, | ||||
|         }, | ||||
|       ], | ||||
|       env: [], | ||||
|       resources: { | ||||
|         limits: { | ||||
|           cpu: "4", | ||||
|           memory: "4Gi", | ||||
|         } + if numGpus != 0 then { | ||||
|           "nvidia.com/gpu": numGpus, | ||||
|         } else {}, | ||||
|         requests: { | ||||
|           cpu: "1", | ||||
|           memory: "1Gi", | ||||
|         }, | ||||
|       }, | ||||
|       volumeMounts: [ | ||||
|         { | ||||
|           mountPath: "/var/config/", | ||||
|           name: "config-volume", | ||||
|         }, | ||||
|       ], | ||||
|       // TCP liveness probe on gRPC port | ||||
|       livenessProbe: { | ||||
|         tcpSocket: { | ||||
|           port: 9000, | ||||
|         }, | ||||
|         initialDelaySeconds: 30, | ||||
|         periodSeconds: 30, | ||||
|       }, | ||||
|     },  // modelServerContainer | ||||
| 
 | ||||
|     local tfDeployment = { | ||||
|       apiVersion: "extensions/v1beta1", | ||||
|       kind: "Deployment", | ||||
|       metadata: { | ||||
|         labels: { | ||||
|           app: modelName, | ||||
|         }, | ||||
|         name: name, | ||||
|         namespace: namespace, | ||||
|       }, | ||||
|       spec: { | ||||
|         template: { | ||||
|           metadata: { | ||||
|             labels: { | ||||
|               app: modelName, | ||||
|               version: versionName, | ||||
|             }, | ||||
|             annotations: { | ||||
|               "sidecar.istio.io/inject": if util.toBool(params.injectIstio) then "true", | ||||
|             }, | ||||
|           }, | ||||
|           spec: { | ||||
|             containers: [ | ||||
|               modelServerContainer, | ||||
|             ], | ||||
|             volumes: [ | ||||
|               { | ||||
|                 configMap: { | ||||
|                   name: name + "-config", | ||||
|                 }, | ||||
|                 name: "config-volume", | ||||
|               }, | ||||
|             ], | ||||
|           }, | ||||
|         }, | ||||
|       }, | ||||
|     },  // tfDeployment | ||||
|     tfDeployment:: tfDeployment, | ||||
| 
 | ||||
|     local tfservingConfig = { | ||||
|       apiVersion: "v1", | ||||
|       kind: "ConfigMap", | ||||
|       metadata: { | ||||
|         name: name + "-config", | ||||
|         namespace: namespace, | ||||
|       }, | ||||
|       data: { | ||||
|         "monitoring_config.txt": std.join("\n", [ | ||||
|           "prometheus_config: {", | ||||
|           "  enable: true,", | ||||
|           '  path: "/monitoring/prometheus/metrics"', | ||||
|           "}", | ||||
|         ]), | ||||
|       }, | ||||
|     },  // tfservingConfig | ||||
|     tfservingConfig:: tfservingConfig, | ||||
|   },  // new | ||||
| } | ||||
|  | @ -0,0 +1,380 @@ | |||
| { | ||||
|   util:: import "kubeflow/tf-serving/util.libsonnet", | ||||
| 
 | ||||
|   // Parameters are intended to be late bound. | ||||
|   params:: { | ||||
|     name: null, | ||||
|     numGpus: 0, | ||||
|     labels: { | ||||
|       app: $.params.name, | ||||
|     }, | ||||
|     modelName: $.params.name, | ||||
|     modelPath: null, | ||||
|     modelStorageType: "storageType", | ||||
| 
 | ||||
|     version: "v1", | ||||
|     firstVersion: true, | ||||
| 
 | ||||
|     deployIstio: false, | ||||
| 
 | ||||
|     deployHttpProxy: false, | ||||
|     httpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2", | ||||
| 
 | ||||
|     serviceType: "ClusterIP", | ||||
| 
 | ||||
|     // If users want to override the image then can override defaultCpuImage and/or defaultGpuImage | ||||
|     // in which case the image used will still depend on whether GPUs are used or not. | ||||
|     // Users can also override modelServerImage in which case the user supplied value will always be used | ||||
|     // regardless of numGpus. | ||||
|     defaultCpuImage: "tensorflow/serving:1.11.1", | ||||
|     defaultGpuImage: "tensorflow/serving:1.11.1-gpu", | ||||
|     modelServerImage: if $.params.numGpus == 0 then | ||||
|       $.params.defaultCpuImage | ||||
|     else | ||||
|       $.params.defaultGpuImage, | ||||
| 
 | ||||
| 
 | ||||
|     // Whether or not to enable s3 parameters | ||||
|     s3Enable:: false, | ||||
| 
 | ||||
|     // Which storageType to use | ||||
|     storageType:: null, | ||||
|   }, | ||||
| 
 | ||||
|   // Parametes specific to GCP. | ||||
|   gcpParams:: { | ||||
|     gcpCredentialSecretName: "", | ||||
|   } + $.params, | ||||
| 
 | ||||
|   // Parameters that control S3 access | ||||
|   // params overrides s3params because params can be overwritten by the user to override the defaults. | ||||
|   s3params:: { | ||||
|     //  Name of the k8s secrets containing S3 credentials | ||||
|     s3SecretName: "", | ||||
|     // Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID. | ||||
|     s3SecretAccesskeyidKeyName: "AWS_ACCESS_KEY_ID", | ||||
| 
 | ||||
|     // Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY. | ||||
|     s3SecretSecretaccesskeyKeyName: "AWS_SECRET_ACCESS_KEY", | ||||
| 
 | ||||
|     // S3 region | ||||
|     s3AwsRegion: "us-west-1", | ||||
| 
 | ||||
|     // TODO(jlewi): We should use util.toBool to automatically conver to actual boolean values. | ||||
|     // The use of strings is left over from when they were prototype parameters which only supports string type. | ||||
| 
 | ||||
|     // true Whether or not to use https for S3 connections | ||||
|     s3UseHttps: "true", | ||||
| 
 | ||||
|     // Whether or not to verify https certificates for S3 connections | ||||
|     s3VerifySsl: "true", | ||||
| 
 | ||||
|     // URL for your s3-compatible endpoint. | ||||
|     s3Endpoint: "http://s3.us-west-1.amazonaws.com,", | ||||
|   } + $.params, | ||||
| 
 | ||||
| 
 | ||||
|   components:: { | ||||
| 
 | ||||
|     all:: [ | ||||
|             // Default routing rule for the first version of model. | ||||
|             if $.util.toBool($.params.deployIstio) && $.util.toBool($.params.firstVersion) then | ||||
|               $.parts.defaultRouteRule, | ||||
|           ] + | ||||
|           // TODO(jlewi): It would be better to structure s3 as a mixin. | ||||
|           // As an example it would be great to allow S3 and GCS parameters | ||||
|           // to be enabled simultaneously. This should be doable because | ||||
|           // each entails adding a set of environment variables and volumes | ||||
|           // to the containers. These volumes/environment variables shouldn't | ||||
|           // overlap so there's no reason we shouldn't be able to just add | ||||
|           // both modifications to the base container. | ||||
|           // I think we want to restructure things as mixins so they can just | ||||
|           // be added. | ||||
|           if $.params.s3Enable then | ||||
|             [ | ||||
|               $.s3parts.tfService, | ||||
|               $.s3parts.tfDeployment, | ||||
|             ] | ||||
|           else if $.params.storageType == "gcp" then | ||||
|             [ | ||||
|               $.gcpParts.tfService, | ||||
|               $.gcpParts.tfDeployment, | ||||
|             ] | ||||
|           else | ||||
|             [ | ||||
|               $.parts.tfService, | ||||
|               $.parts.tfDeployment, | ||||
|             ], | ||||
|   }.all, | ||||
| 
 | ||||
|   parts:: { | ||||
|     // We define the containers one level beneath parts because combined with jsonnet late binding | ||||
|     // this makes it easy for users to override specific bits of the container. | ||||
|     tfServingContainerBase:: { | ||||
|       name: $.params.name, | ||||
|       image: $.params.modelServerImage, | ||||
|       imagePullPolicy: "IfNotPresent", | ||||
|       command: [ | ||||
|         "/usr/bin/tensorflow_model_server", | ||||
|       ], | ||||
|       args: [ | ||||
|         "--port=9000", | ||||
|         "--model_name=" + $.params.modelName, | ||||
|         "--model_base_path=" + $.params.modelPath, | ||||
|       ], | ||||
|       ports: [ | ||||
|         { | ||||
|           containerPort: 9000, | ||||
|         }, | ||||
|       ], | ||||
|       // TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that | ||||
|       // model-server doesn't have something we can use out of the box. | ||||
|       resources: { | ||||
|         requests: { | ||||
|           memory: "1Gi", | ||||
|           cpu: "1", | ||||
|         }, | ||||
|         limits: { | ||||
|           memory: "4Gi", | ||||
|           cpu: "4", | ||||
|         }, | ||||
|       }, | ||||
|       // The is user and group should be defined in the Docker image. | ||||
|       // Per best practices we don't run as the root user. | ||||
|       securityContext: { | ||||
|         runAsUser: 1000, | ||||
|         fsGroup: 1000, | ||||
|       }, | ||||
|       volumeMounts+: if $.params.modelStorageType == "nfs" then [{ | ||||
|         name: "nfs", | ||||
|         mountPath: "/mnt", | ||||
|       }] | ||||
|       else [], | ||||
|     },  // tfServingContainer | ||||
| 
 | ||||
|     tfServingContainer+: $.parts.tfServingContainerBase + | ||||
|                          if $.params.numGpus > 0 then | ||||
|                            { | ||||
|                              resources+: { | ||||
|                                limits+: { | ||||
|                                  "nvidia.com/gpu": $.params.numGpus, | ||||
|                                }, | ||||
|                              }, | ||||
|                            } | ||||
|                          else {}, | ||||
| 
 | ||||
|     tfServingMetadata+: { | ||||
|       labels: $.params.labels { version: $.params.version }, | ||||
|       annotations: { | ||||
|         "sidecar.istio.io/inject": if $.util.toBool($.params.deployIstio) then "true", | ||||
|       }, | ||||
|     }, | ||||
| 
 | ||||
|     httpProxyContainer:: { | ||||
|       name: $.params.name + "-http-proxy", | ||||
|       image: $.params.httpProxyImage, | ||||
|       imagePullPolicy: "IfNotPresent", | ||||
|       command: [ | ||||
|         "python", | ||||
|         "/usr/src/app/server.py", | ||||
|         "--port=8000", | ||||
|         "--rpc_port=9000", | ||||
|         "--rpc_timeout=10.0", | ||||
|       ], | ||||
|       env: [], | ||||
|       ports: [ | ||||
|         { | ||||
|           containerPort: 8000, | ||||
|         }, | ||||
|       ], | ||||
|       resources: { | ||||
|         requests: { | ||||
|           memory: "500Mi", | ||||
|           cpu: "0.5", | ||||
|         }, | ||||
|         limits: { | ||||
|           memory: "1Gi", | ||||
|           cpu: "1", | ||||
|         }, | ||||
|       }, | ||||
|       securityContext: { | ||||
|         runAsUser: 1000, | ||||
|         fsGroup: 1000, | ||||
|       }, | ||||
|     },  // httpProxyContainer | ||||
| 
 | ||||
| 
 | ||||
|     tfDeployment: { | ||||
|       apiVersion: "extensions/v1beta1", | ||||
|       kind: "Deployment", | ||||
|       metadata: { | ||||
|         name: $.params.name + "-" + $.params.version, | ||||
|         namespace: $.params.namespace, | ||||
|         labels: $.params.labels, | ||||
|       }, | ||||
|       spec: { | ||||
|         template: { | ||||
|           metadata: $.parts.tfServingMetadata, | ||||
|           spec: { | ||||
|             containers: [ | ||||
|               $.parts.tfServingContainer, | ||||
|               if $.util.toBool($.params.deployHttpProxy) then | ||||
|                 $.parts.httpProxyContainer, | ||||
|             ], | ||||
|             volumes+: if $.params.modelStorageType == "nfs" then | ||||
|               [{ | ||||
|                 name: "nfs", | ||||
|                 persistentVolumeClaim: { | ||||
|                   claimName: $.params.nfsPVC, | ||||
|                 }, | ||||
|               }] | ||||
|             else [], | ||||
|           }, | ||||
|         }, | ||||
|       }, | ||||
|     },  // tfDeployment | ||||
| 
 | ||||
|     tfService: { | ||||
|       apiVersion: "v1", | ||||
|       kind: "Service", | ||||
|       metadata: { | ||||
|         labels: $.params.labels, | ||||
|         name: $.params.name, | ||||
|         namespace: $.params.namespace, | ||||
|         annotations: { | ||||
|           "getambassador.io/config": | ||||
|             std.join("\n", [ | ||||
|               "---", | ||||
|               "apiVersion: ambassador/v0", | ||||
|               "kind:  Mapping", | ||||
|               "name: tfserving-mapping-" + $.params.name + "-get", | ||||
|               "prefix: /models/" + $.params.name + "/", | ||||
|               "rewrite: /", | ||||
|               "method: GET", | ||||
|               "service: " + $.params.name + "." + $.params.namespace + ":8000", | ||||
|               "---", | ||||
|               "apiVersion: ambassador/v0", | ||||
|               "kind:  Mapping", | ||||
|               "name: tfserving-mapping-" + $.params.name + "-post", | ||||
|               "prefix: /models/" + $.params.name + "/", | ||||
|               "rewrite: /model/" + $.params.name + ":predict", | ||||
|               "method: POST", | ||||
|               "service: " + $.params.name + "." + $.params.namespace + ":8000", | ||||
|             ]), | ||||
|         },  //annotations | ||||
|       }, | ||||
|       spec: { | ||||
|         ports: [ | ||||
|           { | ||||
|             name: "grpc-tf-serving", | ||||
|             port: 9000, | ||||
|             targetPort: 9000, | ||||
|           }, | ||||
|           { | ||||
|             name: "http-tf-serving-proxy", | ||||
|             port: 8000, | ||||
|             targetPort: 8000, | ||||
|           }, | ||||
|         ], | ||||
|         selector: $.params.labels, | ||||
|         type: $.params.serviceType, | ||||
|       }, | ||||
|     },  // tfService | ||||
| 
 | ||||
|     defaultRouteRule: { | ||||
|       apiVersion: "config.istio.io/v1alpha2", | ||||
|       kind: "RouteRule", | ||||
|       metadata: { | ||||
|         name: $.params.name + "-default", | ||||
|         namespace: $.params.namespace, | ||||
|       }, | ||||
|       spec: { | ||||
|         destination: { | ||||
|           name: $.params.name, | ||||
|         }, | ||||
|         precedence: 0, | ||||
|         route: [ | ||||
|           { | ||||
|             labels: { version: $.params.version }, | ||||
|           }, | ||||
|         ], | ||||
|       }, | ||||
|     }, | ||||
| 
 | ||||
|   },  // parts | ||||
| 
 | ||||
|   // Parts specific to S3 | ||||
|   s3parts:: $.parts { | ||||
|     s3Env:: [ | ||||
|       { name: "AWS_ACCESS_KEY_ID", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretAccesskeyidKeyName } } }, | ||||
|       { name: "AWS_SECRET_ACCESS_KEY", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretSecretaccesskeyKeyName } } }, | ||||
|       { name: "AWS_REGION", value: $.s3params.s3AwsRegion }, | ||||
|       { name: "S3_REGION", value: $.s3params.s3AwsRegion }, | ||||
|       { name: "S3_USE_HTTPS", value: $.s3params.s3UseHttps }, | ||||
|       { name: "S3_VERIFY_SSL", value: $.s3params.s3VerifySsl }, | ||||
|       { name: "S3_ENDPOINT", value: $.s3params.s3Endpoint }, | ||||
|     ], | ||||
| 
 | ||||
|     tfServingContainer: $.parts.tfServingContainer { | ||||
|       env+: $.s3parts.s3Env, | ||||
|     }, | ||||
| 
 | ||||
|     tfDeployment: $.parts.tfDeployment { | ||||
|       spec: +{ | ||||
|         template: +{ | ||||
|           metadata: $.parts.tfServingMetadata, | ||||
|           spec: +{ | ||||
|             containers: [ | ||||
|               $.s3parts.tfServingContainer, | ||||
|               if $.util.toBool($.params.deployHttpProxy) then | ||||
|                 $.parts.httpProxyContainer, | ||||
|             ], | ||||
|           }, | ||||
|         }, | ||||
|       }, | ||||
|     },  // tfDeployment | ||||
|   },  // s3parts | ||||
| 
 | ||||
|   // Parts specific to GCP | ||||
|   gcpParts:: $.parts { | ||||
|     gcpEnv:: [ | ||||
|       if $.gcpParams.gcpCredentialSecretName != "" then | ||||
|         { name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/user-gcp-sa.json" }, | ||||
|     ], | ||||
| 
 | ||||
|     tfServingContainer: $.parts.tfServingContainer { | ||||
|       env+: $.gcpParts.gcpEnv, | ||||
|       volumeMounts+: [ | ||||
|         if $.gcpParams.gcpCredentialSecretName != "" then | ||||
|           { | ||||
|             name: "gcp-credentials", | ||||
|             mountPath: "/secret/gcp-credentials", | ||||
|           }, | ||||
|       ], | ||||
|     }, | ||||
| 
 | ||||
|     tfDeployment: $.parts.tfDeployment { | ||||
|       spec+: { | ||||
|         template+: { | ||||
|           metadata: $.parts.tfServingMetadata, | ||||
|           spec+: { | ||||
|             containers: [ | ||||
|               $.gcpParts.tfServingContainer, | ||||
|               if $.util.toBool($.params.deployHttpProxy) then | ||||
|                 $.parts.httpProxyContainer, | ||||
|             ], | ||||
|             volumes: [ | ||||
|               if $.gcpParams.gcpCredentialSecretName != "" then | ||||
|                 { | ||||
|                   name: "gcp-credentials", | ||||
|                   secret: { | ||||
|                     secretName: $.gcpParams.gcpCredentialSecretName, | ||||
|                   }, | ||||
|                 }, | ||||
|             ], | ||||
|           }, | ||||
|         }, | ||||
|       }, | ||||
|     },  // tfDeployment | ||||
|   },  // gcpParts | ||||
| } | ||||
							
								
								
									
										21
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/util.libsonnet
								
								
								
									vendored
								
								
									Normal file
								
							
							
						
						
									
										21
									
								
								mnist/ks_app/vendor/kubeflow/tf-serving@fed535eaa276220e4edf59530c0629f4375a40a9/util.libsonnet
								
								
								
									vendored
								
								
									Normal file
								
							|  | @ -0,0 +1,21 @@ | |||
| // Some useful routines. | ||||
| { | ||||
|   local k = import "k.libsonnet", | ||||
| 
 | ||||
|   // Convert non-boolean types like string,number to a boolean. | ||||
|   // This is primarily intended for dealing with parameters that should be booleans. | ||||
|   toBool:: function(x) { | ||||
|     result:: | ||||
|       if std.type(x) == "boolean" then | ||||
|         x | ||||
|       else if std.type(x) == "string" then | ||||
|         std.asciiUpper(x) == "TRUE" | ||||
|       else if std.type(x) == "number" then | ||||
|         x != 0 | ||||
|       else | ||||
|         false, | ||||
|   }.result, | ||||
| 
 | ||||
|   // Produce a list of manifests. obj must be an array | ||||
|   list(obj):: k.core.v1.list.new(obj,), | ||||
| } | ||||
|  | @ -21,6 +21,7 @@ from __future__ import absolute_import | |||
| from __future__ import division | ||||
| from __future__ import print_function | ||||
| 
 | ||||
| import json | ||||
| import os | ||||
| import sys | ||||
| import numpy as np | ||||
|  | @ -126,6 +127,22 @@ def linear_serving_input_receiver_fn(): | |||
| def main(_): | ||||
|   tf.logging.set_verbosity(tf.logging.INFO) | ||||
| 
 | ||||
|   tf_config = os.environ.get('TF_CONFIG', '{}') | ||||
|   tf.logging.info("TF_CONFIG %s", tf_config) | ||||
|   tf_config_json = json.loads(tf_config) | ||||
|   cluster = tf_config_json.get('cluster') | ||||
|   job_name = tf_config_json.get('task', {}).get('type') | ||||
|   task_index = tf_config_json.get('task', {}).get('index') | ||||
|   tf.logging.info("cluster=%s job_name=%s task_index=%s", cluster, job_name, | ||||
|                   task_index) | ||||
| 
 | ||||
|   is_chief = False | ||||
|   if not job_name or job_name.lower() in ["chief", "master"]: | ||||
|     is_chief = True | ||||
|     tf.logging.info("Will export model") | ||||
|   else: | ||||
|     tf.logging.info("Will not export model") | ||||
| 
 | ||||
|   # Download and load MNIST dataset. | ||||
|   mnist = tf.contrib.learn.datasets.DATASETS['mnist'](TF_DATA_DIR) | ||||
|   train_input_fn = tf.estimator.inputs.numpy_input_fn( | ||||
|  | @ -151,6 +168,8 @@ def main(_): | |||
|     classifier = tf.estimator.LinearClassifier( | ||||
|         feature_columns=feature_columns, n_classes=N_DIGITS, | ||||
|         model_dir=TF_MODEL_DIR, config=training_config) | ||||
|     # TODO(jlewi): Should it be linear_serving_input_receiver_fn here? | ||||
|     serving_fn = cnn_serving_input_receiver_fn | ||||
|     export_final = tf.estimator.FinalExporter( | ||||
|         TF_EXPORT_DIR, serving_input_receiver_fn=cnn_serving_input_receiver_fn) | ||||
| 
 | ||||
|  | @ -158,6 +177,7 @@ def main(_): | |||
|     # Convolutional network | ||||
|     classifier = tf.estimator.Estimator( | ||||
|         model_fn=conv_model, model_dir=TF_MODEL_DIR, config=training_config) | ||||
|     serving_fn = cnn_serving_input_receiver_fn | ||||
|     export_final = tf.estimator.FinalExporter( | ||||
|         TF_EXPORT_DIR, serving_input_receiver_fn=cnn_serving_input_receiver_fn) | ||||
|   else: | ||||
|  | @ -171,7 +191,14 @@ def main(_): | |||
|                                       exporters=export_final, | ||||
|                                       throttle_secs=1, | ||||
|                                       start_delay_secs=1) | ||||
|   print("Train and evaluate") | ||||
|   tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec) | ||||
|   print("Training done") | ||||
| 
 | ||||
|   if is_chief: | ||||
|     print("Export saved model") | ||||
|     classifier.export_savedmodel(TF_EXPORT_DIR, serving_input_receiver_fn=serving_fn) | ||||
|     print("Done exporting the model") | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|   tf.app.run() | ||||
|  |  | |||
|  | @ -0,0 +1,76 @@ | |||
| """Test deploying the mnist model. | ||||
| 
 | ||||
| This file tests that we can deploy the model. | ||||
| 
 | ||||
| TODO(jlewi): Test that we can send predictions to the model. | ||||
| 
 | ||||
| It is an integration test as it depends on having access to | ||||
| a Kubeflow deployment to deploy on. It also depends on having a model. | ||||
| 
 | ||||
| Python Path Requirements: | ||||
|   kubeflow/testing/py - https://github.com/kubeflow/testing/tree/master/py | ||||
|      * Provides utilities for testing | ||||
| 
 | ||||
| Manually running the test | ||||
|  1. Configure your KUBECONFIG file to point to the desired cluster | ||||
|  2. Set --params=name=${NAME},namespace=${NAMESPACE} | ||||
|     * name should be the name for your job | ||||
|     * namespace should be the namespace to use | ||||
|  3. Use the modelBasePath parameter to the model to test. | ||||
|      --params=...,modelBasePath=${MODEL_BASE_PATH} | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| import logging | ||||
| import os | ||||
| 
 | ||||
| from kubernetes import client as k8s_client | ||||
| from py import test_runner | ||||
| 
 | ||||
| from kubeflow.testing import ks_util | ||||
| from kubeflow.testing import test_util | ||||
| from kubeflow.testing import util | ||||
| 
 | ||||
| class MnistDeployTest(test_util.TestCase): | ||||
|   def __init__(self, args): | ||||
|     namespace, name, env = test_runner.parse_runtime_params(args) | ||||
|     self.app_dir = args.app_dir | ||||
| 
 | ||||
|     if not self.app_dir: | ||||
|       self.app_dir = os.path.join(os.path.dirname(__file__), "..", | ||||
|                                   "ks_app") | ||||
|       self.app_dir = os.path.abspath(self.app_dir) | ||||
|       logging.info("--app_dir not set defaulting to: %s", self.app_dir) | ||||
| 
 | ||||
|     self.env = env | ||||
|     self.namespace = namespace | ||||
|     self.params = args.params | ||||
|     self.ks_cmd = ks_util.get_ksonnet_cmd(self.app_dir) | ||||
|     super(MnistDeployTest, self).__init__(class_name="MnistDeployTest", | ||||
|                                           name=name) | ||||
| 
 | ||||
|   def test_serve(self): | ||||
|     # We repeat the test multiple times. | ||||
|     # This ensures that if we delete the job we can create a new job with the | ||||
|     # same name. | ||||
|     api_client = k8s_client.ApiClient() | ||||
| 
 | ||||
|     # Apply the components | ||||
|     for component in ["mnist-deploy-gcp", "mnist-service"]: | ||||
|       # Setup the ksonnet app | ||||
|       ks_util.setup_ks_app(self.app_dir, self.env, self.namespace, component, | ||||
|                            self.params) | ||||
| 
 | ||||
|       util.run([self.ks_cmd, "apply", self.env, "-c", component], | ||||
|                cwd=self.app_dir) | ||||
| 
 | ||||
|       logging.info("Created deployment %s in namespaces %s", self.name, self.namespace) | ||||
| 
 | ||||
|     util.wait_for_deployment(api_client, self.namespace, self.name, | ||||
|                              timeout_minutes=4) | ||||
| 
 | ||||
|     # We don't delete the resources. We depend on the namespace being | ||||
|     # garbage collected. | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|   test_runner.main(module=__name__) | ||||
|  | @ -0,0 +1,33 @@ | |||
| #!/bin/bash | ||||
| # | ||||
| # A simple script to copy a secret from 1 namespace to another | ||||
| # | ||||
| # Usage  | ||||
| # copy_secret <source namepspace> <dest namespace> <secret name> | ||||
| set -e | ||||
| SOURCE=$1 | ||||
| DEST=$2 | ||||
| NAME=$3 | ||||
| 
 | ||||
| usage() { | ||||
| 	echo copy_secret "<source namepspace> <dest namespace> <secret name>" | ||||
| } | ||||
| 
 | ||||
| if [ -z ${SOURCE} ]; then | ||||
| 	usage | ||||
| 	exit -1 | ||||
| fi | ||||
| 
 | ||||
| if [ -z ${DEST} ]; then | ||||
| 	usage | ||||
| 	exit -1 | ||||
| fi | ||||
| 
 | ||||
| if [ -z ${NAME} ]; then | ||||
| 	usage | ||||
| 	exit -1 | ||||
| fi | ||||
| 
 | ||||
| echo getting secret | ||||
| SECRET=$(kubectl -n ${SOURCE} get secrets user-gcp-sa -o jsonpath="{.data.${NAME}\.json}" | base64 -d) | ||||
| kubectl create -n ${DEST} secret generic ${NAME} --from-literal="${NAME}.json=${SECRET}" | ||||
|  | @ -25,6 +25,12 @@ local defaultParams = { | |||
|   // The bucket where the model should be written | ||||
|   // This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster) | ||||
|   modelBucket: "kubeflow-ci_temp", | ||||
| 
 | ||||
|   // Whether to delete the namespace at the end. | ||||
|   // Leaving the namespace around can be useful for debugging. | ||||
|   // | ||||
|   // TODO(jlewi): We should consider running a cronjob to GC so namespaces. | ||||
|   deleteNamespace: false, | ||||
| }; | ||||
| 
 | ||||
| local params = defaultParams + overrides; | ||||
|  | @ -77,6 +83,9 @@ local modelDir = "gs://" + params.modelBucket + "/mnist/models/" + prowDict["BUI | |||
| // value of KUBECONFIG environment variable. This should be  a full path. | ||||
| local kubeConfig = testDir + "/.kube/kubeconfig"; | ||||
| 
 | ||||
| // Namespace where tests should run | ||||
| local testNamespace = "mnist-" + prowDict["BUILD_ID"]; | ||||
| 
 | ||||
| // Build template is a template for constructing Argo step templates. | ||||
| // | ||||
| // step_name: Name for the template | ||||
|  | @ -233,10 +242,48 @@ local dagTemplates = [ | |||
|         params.kfCluster, | ||||
|       ]] | ||||
|       ), | ||||
|       workingDir: srcDir + "/github_issue_summarization", | ||||
|     }, | ||||
|     dependencies: ["checkout"], | ||||
|   }, // get-kubeconfig | ||||
|   { | ||||
|     // Create the namespace | ||||
|     // TODO(jlewi): We should add some sort of retry. | ||||
|     template: buildTemplate { | ||||
|       name: "create-namespace", | ||||
|       command: util.buildCommand([ | ||||
|       [ | ||||
|         "echo", | ||||
|         "KUBECONFIG=", | ||||
|         "${KUBECONFIG}", | ||||
|       ], | ||||
|       [ | ||||
|         "gcloud", | ||||
|         "auth", | ||||
|         "activate-service-account", | ||||
|         "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", | ||||
|       ], | ||||
|       [ | ||||
|         "kubectl", | ||||
|         "config" , | ||||
|         "current-context", | ||||
|       ], | ||||
|       [ | ||||
|         "kubectl", | ||||
|         "create", | ||||
|         "namespace", | ||||
|         testNamespace, | ||||
|       ], | ||||
|       # Copy the GCP secret from the kubeflow namespace to the test namespace | ||||
|       [ | ||||
|         srcDir + "/test/copy_secret.sh", | ||||
|         "kubeflow", | ||||
|         testNamespace, | ||||
|         "user-gcp-sa", | ||||
|       ]] | ||||
|       ), | ||||
|     }, | ||||
|     dependencies: ["get-kubeconfig"], | ||||
|   }, // create-namespace | ||||
|   { | ||||
|     // Run the python test for TFJob | ||||
|     template: buildTemplate { | ||||
|  | @ -247,7 +294,7 @@ local dagTemplates = [ | |||
|         "--artifacts_path=" + artifactsDir, | ||||
|         "--params=" + std.join(",", [ | ||||
|           "name=mnist-test-" + prowDict["BUILD_ID"],  | ||||
|           "namespace=kubeflow", | ||||
|           "namespace=" + testNamespace, | ||||
|           "numTrainSteps=10", | ||||
|           "batchSize=10", | ||||
|           "image=" + trainerImage, | ||||
|  | @ -260,8 +307,25 @@ local dagTemplates = [ | |||
|       ])], | ||||
|       workingDir: srcDir + "/mnist/testing", | ||||
|     }, | ||||
|     dependencies: ["build-images", "get-kubeconfig"], | ||||
|     dependencies: ["build-images", "create-namespace"], | ||||
|   },  // tfjob-test | ||||
|   { | ||||
|     // Run the python test for TFJob | ||||
|     template: buildTemplate { | ||||
|       name: "deploy-test", | ||||
|       command: [ | ||||
|         "python", | ||||
|         "deploy_test.py",         | ||||
|         "--params=" + std.join(",", [ | ||||
|           "name=mnist-test-" + prowDict["BUILD_ID"],  | ||||
|           "namespace=" + testNamespace,           | ||||
|           "modelBasePath=" + modelDir  + "/export", | ||||
|           "exportDir=" + modelDir, | ||||
|       ])], | ||||
|       workingDir: srcDir + "/mnist/testing", | ||||
|     }, | ||||
|     dependencies: ["tfjob-test"], | ||||
|   },  // deploy-test | ||||
|   // TODO(jlewi): We should add a non-distributed test that just uses the default values. | ||||
| ]; | ||||
| 
 | ||||
|  | @ -277,8 +341,35 @@ local dag = { | |||
| 
 | ||||
| // Define templates for the steps to be performed when the | ||||
| // test exits | ||||
| 
 | ||||
| local deleteTemplates = if params.deleteNamespace then | ||||
|  [ | ||||
|     { | ||||
|       // Delete the namespace | ||||
|       // TODO(jlewi): We should add some sort of retry. | ||||
|       template: buildTemplate { | ||||
|         name: "delete-namespace", | ||||
|         command: util.buildCommand([ | ||||
|         [ | ||||
|           "gcloud", | ||||
|           "auth", | ||||
|           "activate-service-account", | ||||
|           "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", | ||||
|         ], | ||||
|         [ | ||||
|           "kubectl", | ||||
|           "delete", | ||||
|           "namespace", | ||||
|           testNamespace, | ||||
|         ]] | ||||
|         ), | ||||
|       }, | ||||
|     }, // delete-namespace | ||||
|   ] else []; | ||||
| 
 | ||||
| local exitTemplates = | ||||
|   [ | ||||
|   deleteTemplates + | ||||
|   [   | ||||
|     { | ||||
|       // Copy artifacts to GCS for gubernator. | ||||
|       // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink | ||||
|  | @ -294,7 +385,6 @@ local exitTemplates = | |||
|           "--bucket=" + bucket, | ||||
|         ], | ||||
|       },  // copy-artifacts, | ||||
| 
 | ||||
|     }, | ||||
|     { | ||||
|       // Delete the test directory in NFS. | ||||
|  | @ -314,7 +404,7 @@ local exitTemplates = | |||
|         	  }, | ||||
|           }, | ||||
|         },  // test-dir-delete | ||||
|       dependencies: ["copy-artifacts"], | ||||
|       dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [], | ||||
|     }, | ||||
|   ]; | ||||
| 
 | ||||
|  |  | |||
|  | @ -14,8 +14,8 @@ local envParams = params + { | |||
|     }, | ||||
|     mnist+: { | ||||
|       namespace: 'kubeflow-test-infra', | ||||
|       name: 'jlewi-mnist-test-465-0109-050605', | ||||
|       prow_env: 'JOB_NAME=mnist-test,JOB_TYPE=presubmit,REPO_NAME=examples,REPO_OWNER=kubeflow,BUILD_NUMBER=0109-050605,BUILD_ID=0109-050605,PULL_NUMBER=465', | ||||
|       name: 'jlewi-mnist-test-469-0111-081531', | ||||
|       prow_env: 'JOB_NAME=mnist-test,JOB_TYPE=presubmit,REPO_NAME=examples,REPO_OWNER=kubeflow,BUILD_NUMBER=0111-081531,BUILD_ID=0111-081531,PULL_NUMBER=469', | ||||
|     }, | ||||
|   }, | ||||
| }; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue