mirror of https://github.com/kubeflow/examples.git
				
				
				
			Modify K8s models to export the models; tensorboard manifests (#320)
* Modify K8s models to export the models; tensorboard manifests * Use a K8s job not a TFJob to export the model. * Start an experiments.libsonnet file to define groups of parameters for different experiments that should be reused * Need to install tensorflow_hub in the Docker image because it is required by t2t exporter. * * Address review comments.
This commit is contained in:
		
							parent
							
								
									c6ff5dbef8
								
							
						
					
					
						commit
						2487194fbd
					
				|  | @ -6,7 +6,7 @@ RUN pip --no-cache-dir install oauth2client~=4.1.0 &&\ | |||
|     apt-get update && apt-get install -y jq git &&\ | ||||
|     rm -rf /var/lib/apt/lists/* | ||||
| 
 | ||||
| RUN pip --no-cache-dir install tensor2tensor~=1.10.0 | ||||
| RUN pip --no-cache-dir install tensor2tensor~=1.10.0 tensorflow-hub~=0.1.1 | ||||
| 
 | ||||
| ADD src/code_search /app/code_search | ||||
| ADD src             /src | ||||
|  |  | |||
|  | @ -0,0 +1,11 @@ | |||
| local experiments = import "experiments.libsonnet"; | ||||
| local exporter = import "export-model.libsonnet"; | ||||
| local k = import "k.libsonnet"; | ||||
| local env = std.extVar("__ksonnet/environments"); | ||||
| 
 | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-exporter"] + | ||||
|                experiments["demo-trainer-11-07-dist-sync-gpu"] + { | ||||
|   name: "demo-export-11-07-dist-sync-gpu", | ||||
| }; | ||||
| 
 | ||||
| std.prune(k.core.v1.list.new([exporter.parts(params, env).job])) | ||||
|  | @ -46,7 +46,7 @@ local parts(name, logDir) = { | |||
|         "tb-job": name, | ||||
|       }, | ||||
|     }, | ||||
|   }, // service | ||||
|   },  // service | ||||
| 
 | ||||
|   deployment:: { | ||||
|     apiVersion: "apps/v1beta1", | ||||
|  | @ -107,10 +107,10 @@ local parts(name, logDir) = { | |||
|         }, | ||||
|       }, | ||||
|     }, | ||||
|   }, // deployment | ||||
|   },  // deployment | ||||
| 
 | ||||
|   items: [self.service, self.deployment], | ||||
| }; // parts | ||||
| };  // parts | ||||
| 
 | ||||
| local tbObjects = std.flattenArrays(std.map(function(f) parts(f, instances[f]).items, | ||||
|                                             std.objectFieldsAll(instances))); | ||||
|  |  | |||
|  | @ -7,14 +7,14 @@ local t2tJob = import "t2t-job.libsonnet"; | |||
| local env = std.extVar("__ksonnet/environments"); | ||||
| 
 | ||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | ||||
| 	name: "demo-trainer-11-07-dist-sync-gpu", | ||||
| 	outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||
| 	train_steps: 200000, | ||||
| 	eval_steps: 100, | ||||
| 	hparams_set: "transformer_base", | ||||
| 	numWorkerGpu: 1, | ||||
| 	numChief: 1, | ||||
| 	numWorker: 8, | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||
|   name: "demo-trainer-11-07-dist-sync-gpu", | ||||
|   outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||
|   train_steps: 200000, | ||||
|   eval_steps: 100, | ||||
|   hparams_set: "transformer_base", | ||||
|   numWorkerGpu: 1, | ||||
|   numChief: 1, | ||||
|   numWorker: 8, | ||||
| }; | ||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).jobDistSync])) | ||||
|  |  | |||
|  | @ -5,12 +5,12 @@ local t2tJob = import "t2t-job.libsonnet"; | |||
| 
 | ||||
| local env = std.extVar("__ksonnet/environments"); | ||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | ||||
| 	name: "demo-trainer-11-05-single-gpu", | ||||
| 	outputDir: "gs://code-search-demo/models/20181105-dist-gpu", | ||||
| 	train_steps: 200000, | ||||
| 	eval_steps: 100, | ||||
| 	hparams_set: "transformer_base_single_gpu", | ||||
| 	numWorkerGpu: 1, | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||
|   name: "demo-trainer-11-05-single-gpu", | ||||
|   outputDir: "gs://code-search-demo/models/20181105-dist-gpu", | ||||
|   train_steps: 200000, | ||||
|   eval_steps: 100, | ||||
|   hparams_set: "transformer_base_single_gpu", | ||||
|   numWorkerGpu: 1, | ||||
| }; | ||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | ||||
|  |  | |||
|  | @ -5,9 +5,9 @@ local t2tJob = import "t2t-job.libsonnet"; | |||
| 
 | ||||
| local env = std.extVar("__ksonnet/environments"); | ||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | ||||
| 	outputDir: "gs://code-search-demo/models/20181105-tinyparams", | ||||
| 	train_steps: 200000, | ||||
| 	eval_steps: 100,	 | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||
|   outputDir: "gs://code-search-demo/models/20181105-tinyparams", | ||||
|   train_steps: 200000, | ||||
|   eval_steps: 100, | ||||
| }; | ||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | ||||
|  |  | |||
|  | @ -0,0 +1,11 @@ | |||
| // Data for various experiments. | ||||
| // Paths are deliberately hard coded so they get versioned and checked into source control. | ||||
| { | ||||
|   "demo-trainer-11-07-dist-sync-gpu": { | ||||
|     name: "demo-trainer-11-07-dist-sync-gpu", | ||||
|     outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||
|     train_steps: 200000, | ||||
|     eval_steps: 100, | ||||
|     hparams_set: "transformer_base", | ||||
|   }, | ||||
| } | ||||
|  | @ -0,0 +1,67 @@ | |||
| { | ||||
|   parts(params, env):: { | ||||
|     job: { | ||||
|       apiVersion: "batch/v1", | ||||
|       kind: "Job", | ||||
|       metadata: { | ||||
|         name: params.name, | ||||
|         namespace: env.namespace, | ||||
|         labels: { | ||||
|           app: params.name, | ||||
|         }, | ||||
|       }, | ||||
|       spec: { | ||||
|         replicas: 1, | ||||
|         template: { | ||||
|           metadata: { | ||||
|             labels: { | ||||
|               app: params.name, | ||||
|             }, | ||||
|           }, | ||||
|           spec: { | ||||
|             restartPolicy: "OnFailure", | ||||
|             containers: [ | ||||
|               { | ||||
|                 name: "exporter", | ||||
|                 image: params.image, | ||||
|                 command: [ | ||||
|                   "t2t-exporter", | ||||
|                   "--problem=" + params.problem, | ||||
|                   "--data_dir=" + params.dataDir, | ||||
|                   // TODO(kubeflow/examples#331): t2t-exporter should have flags --export and --export_dir | ||||
|                   // which allow us to control the location of the exported model. | ||||
|                   "--output_dir=" + params.outputDir, | ||||
|                   "--model=" + params.model, | ||||
|                   "--hparams_set=" + params.hparams_set, | ||||
|                   // Need to import the problems. | ||||
|                   "--t2t_usr_dir=/src/code_search/t2t", | ||||
|                 ], | ||||
|                 env: [ | ||||
|                   { | ||||
|                     name: "GOOGLE_APPLICATION_CREDENTIALS", | ||||
|                     value: "/secret/gcp-credentials/user-gcp-sa.json", | ||||
|                   }, | ||||
|                 ], | ||||
|                 workingDir: "/src", | ||||
|                 volumeMounts: [ | ||||
|                   { | ||||
|                     mountPath: "/secret/gcp-credentials", | ||||
|                     name: "gcp-credentials", | ||||
|                   }, | ||||
|                 ],  //volumeMounts | ||||
|               }, | ||||
|             ],  // containers | ||||
|             volumes: [ | ||||
|               { | ||||
|                 name: "gcp-credentials", | ||||
|                 secret: { | ||||
|                   secretName: "user-gcp-sa", | ||||
|                 }, | ||||
|               }, | ||||
|             ], | ||||
|           },  // spec | ||||
|         }, | ||||
|       }, | ||||
|     }, | ||||
|   },  // parts | ||||
| } | ||||
|  | @ -8,7 +8,7 @@ | |||
|     // are not picked up by the individual components. | ||||
|     // Need to see if we can find a way to fix this. | ||||
| 
 | ||||
|     local imageTag = "v20181107-30bab1f-dirty-1ac751", | ||||
|     local imageTag = "v20181108-004b5ad-dirty-eba459", | ||||
|     "t2t-job": { | ||||
|       jobType: "trainer", | ||||
|       numChief: 0, | ||||
|  | @ -60,6 +60,7 @@ | |||
|       outputDir: $.components["t2t-code-search"].workingDir + "/output", | ||||
|       model: $.components["t2t-code-search"].model, | ||||
|       hparams_set: $.components["t2t-code-search"].hparams_set, | ||||
|       image: $.components["t2t-job"].image, | ||||
|     }, | ||||
|     "t2t-code-search-serving": { | ||||
|       name: "t2t-code-search", | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| local exporter = import "export-model.libsonnet"; | ||||
| local k = import "k.libsonnet"; | ||||
| local t2tJob = import "t2t-job.libsonnet"; | ||||
| 
 | ||||
| local env = std.extVar("__ksonnet/environments"); | ||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-exporter"]; | ||||
| 
 | ||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | ||||
| std.prune(k.core.v1.list.new([exporter.parts(params, env).job])) | ||||
|  |  | |||
|  | @ -1,19 +1,6 @@ | |||
| local baseParams = std.extVar("__ksonnet/params").components["t2t-job"]; | ||||
| 
 | ||||
| { | ||||
|   getExporterCmd(params):: | ||||
|     [ | ||||
|       // TODO(jlewi): Do we need to use the T2T entrypoint wrapper for the exporter? | ||||
|       // Why would we need to parse TF_CONFIG into command line flags? | ||||
|       "/usr/local/sbin/t2t-entrypoint", | ||||
|       "t2t-exporter", | ||||
|       "--problem=" + params.problem, | ||||
|       "--data_dir=" + params.dataDir, | ||||
|       "--output_dir=" + params.outputDir, | ||||
|       "--model=" + params.model, | ||||
|       "--hparams_set=" + params.hparams_set, | ||||
|     ], | ||||
| 
 | ||||
|   getTrainerCmd(params):: { | ||||
|     local trainer = [ | ||||
|       // t2t-entrypoint is a wrapper that parses TF_CONFIG | ||||
|  |  | |||
|  | @ -1,8 +1,8 @@ | |||
| { | ||||
| 	// Warning: Do not define a global "image" as that will end up overriding | ||||
| 	// the image parameter for all components. Define more specific names | ||||
| 	// e.g. "dataflowImage", "trainerCpuImage", "trainerGpuImage", | ||||
| 	workingDir: 'gs://code-search-demo/20181104', | ||||
|     dataDir: 'gs://code-search-demo/20181104/data', | ||||
|     project: 'code-search-demo', | ||||
|   // Warning: Do not define a global "image" as that will end up overriding | ||||
|   // the image parameter for all components. Define more specific names | ||||
|   // e.g. "dataflowImage", "trainerCpuImage", "trainerGpuImage", | ||||
|   workingDir: "gs://code-search-demo/20181104", | ||||
|   dataDir: "gs://code-search-demo/20181104/data", | ||||
|   project: "code-search-demo", | ||||
| } | ||||
|  | @ -2,7 +2,7 @@ local base = import "base.libsonnet"; | |||
| // uncomment if you reference ksonnet-lib | ||||
| // local k = import "k.libsonnet"; | ||||
| 
 | ||||
| base + { | ||||
| base { | ||||
|   // Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n") | ||||
|   // "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"}) | ||||
| } | ||||
|  |  | |||
|  | @ -1,14 +1,14 @@ | |||
| local params = std.extVar('__ksonnet/params'); | ||||
| local globals = import 'globals.libsonnet'; | ||||
| local envParams = params + { | ||||
| local params = std.extVar("__ksonnet/params"); | ||||
| local globals = import "globals.libsonnet"; | ||||
| local envParams = params { | ||||
|   components+: { | ||||
|     "t2t-code-search"+: { | ||||
|     }, | ||||
|     "t2t-code-search-datagen"+: { | ||||
|       githubTable: '', | ||||
|       githubTable: "", | ||||
|     }, | ||||
|     "submit-preprocess-job"+: { | ||||
|       githubTable: '', | ||||
|       githubTable: "", | ||||
|     }, | ||||
|   }, | ||||
| }; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue