mirror of https://github.com/kubeflow/examples.git
				
				
				
			Modify K8s models to export the models; tensorboard manifests (#320)
* Modify K8s models to export the models; tensorboard manifests * Use a K8s job not a TFJob to export the model. * Start an experiments.libsonnet file to define groups of parameters for different experiments that should be reused * Need to install tensorflow_hub in the Docker image because it is required by t2t exporter. * * Address review comments.
This commit is contained in:
		
							parent
							
								
									c6ff5dbef8
								
							
						
					
					
						commit
						2487194fbd
					
				|  | @ -6,7 +6,7 @@ RUN pip --no-cache-dir install oauth2client~=4.1.0 &&\ | ||||||
|     apt-get update && apt-get install -y jq git &&\ |     apt-get update && apt-get install -y jq git &&\ | ||||||
|     rm -rf /var/lib/apt/lists/* |     rm -rf /var/lib/apt/lists/* | ||||||
| 
 | 
 | ||||||
| RUN pip --no-cache-dir install tensor2tensor~=1.10.0 | RUN pip --no-cache-dir install tensor2tensor~=1.10.0 tensorflow-hub~=0.1.1 | ||||||
| 
 | 
 | ||||||
| ADD src/code_search /app/code_search | ADD src/code_search /app/code_search | ||||||
| ADD src             /src | ADD src             /src | ||||||
|  |  | ||||||
|  | @ -0,0 +1,11 @@ | ||||||
|  | local experiments = import "experiments.libsonnet"; | ||||||
|  | local exporter = import "export-model.libsonnet"; | ||||||
|  | local k = import "k.libsonnet"; | ||||||
|  | local env = std.extVar("__ksonnet/environments"); | ||||||
|  | 
 | ||||||
|  | local params = std.extVar("__ksonnet/params").components["t2t-code-search-exporter"] + | ||||||
|  |                experiments["demo-trainer-11-07-dist-sync-gpu"] + { | ||||||
|  |   name: "demo-export-11-07-dist-sync-gpu", | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | std.prune(k.core.v1.list.new([exporter.parts(params, env).job])) | ||||||
|  | @ -13,7 +13,7 @@ local instances = { | ||||||
|   "demo-trainer-11-07-dist-sync-gpu": "gs://code-search-demo/models/20181107-dist-sync-gpu", |   "demo-trainer-11-07-dist-sync-gpu": "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| local parts(name, logDir) = {   | local parts(name, logDir) = { | ||||||
|   service:: { |   service:: { | ||||||
|     apiVersion: "v1", |     apiVersion: "v1", | ||||||
|     kind: "Service", |     kind: "Service", | ||||||
|  | @ -46,7 +46,7 @@ local parts(name, logDir) = { | ||||||
|         "tb-job": name, |         "tb-job": name, | ||||||
|       }, |       }, | ||||||
|     }, |     }, | ||||||
|   }, // service |   },  // service | ||||||
| 
 | 
 | ||||||
|   deployment:: { |   deployment:: { | ||||||
|     apiVersion: "apps/v1beta1", |     apiVersion: "apps/v1beta1", | ||||||
|  | @ -107,10 +107,10 @@ local parts(name, logDir) = { | ||||||
|         }, |         }, | ||||||
|       }, |       }, | ||||||
|     }, |     }, | ||||||
|   }, // deployment |   },  // deployment | ||||||
| 
 | 
 | ||||||
|   items: [self.service, self.deployment], |   items: [self.service, self.deployment], | ||||||
| }; // parts | };  // parts | ||||||
| 
 | 
 | ||||||
| local tbObjects = std.flattenArrays(std.map(function(f) parts(f, instances[f]).items, | local tbObjects = std.flattenArrays(std.map(function(f) parts(f, instances[f]).items, | ||||||
|                                             std.objectFieldsAll(instances))); |                                             std.objectFieldsAll(instances))); | ||||||
|  |  | ||||||
|  | @ -7,14 +7,14 @@ local t2tJob = import "t2t-job.libsonnet"; | ||||||
| local env = std.extVar("__ksonnet/environments"); | local env = std.extVar("__ksonnet/environments"); | ||||||
| 
 | 
 | ||||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||||
| 	name: "demo-trainer-11-07-dist-sync-gpu", |   name: "demo-trainer-11-07-dist-sync-gpu", | ||||||
| 	outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", |   outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||||
| 	train_steps: 200000, |   train_steps: 200000, | ||||||
| 	eval_steps: 100, |   eval_steps: 100, | ||||||
| 	hparams_set: "transformer_base", |   hparams_set: "transformer_base", | ||||||
| 	numWorkerGpu: 1, |   numWorkerGpu: 1, | ||||||
| 	numChief: 1, |   numChief: 1, | ||||||
| 	numWorker: 8, |   numWorker: 8, | ||||||
| }; | }; | ||||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).jobDistSync])) | std.prune(k.core.v1.list.new([t2tJob.parts(params, env).jobDistSync])) | ||||||
|  |  | ||||||
|  | @ -5,12 +5,12 @@ local t2tJob = import "t2t-job.libsonnet"; | ||||||
| 
 | 
 | ||||||
| local env = std.extVar("__ksonnet/environments"); | local env = std.extVar("__ksonnet/environments"); | ||||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||||
| 	name: "demo-trainer-11-05-single-gpu", |   name: "demo-trainer-11-05-single-gpu", | ||||||
| 	outputDir: "gs://code-search-demo/models/20181105-dist-gpu", |   outputDir: "gs://code-search-demo/models/20181105-dist-gpu", | ||||||
| 	train_steps: 200000, |   train_steps: 200000, | ||||||
| 	eval_steps: 100, |   eval_steps: 100, | ||||||
| 	hparams_set: "transformer_base_single_gpu", |   hparams_set: "transformer_base_single_gpu", | ||||||
| 	numWorkerGpu: 1, |   numWorkerGpu: 1, | ||||||
| }; | }; | ||||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | ||||||
|  |  | ||||||
|  | @ -5,9 +5,9 @@ local t2tJob = import "t2t-job.libsonnet"; | ||||||
| 
 | 
 | ||||||
| local env = std.extVar("__ksonnet/environments"); | local env = std.extVar("__ksonnet/environments"); | ||||||
| // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | // Note we are reusing the parameters for t2t-code-search-trainer and then explicitly overriding them. | ||||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] + { | local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"] { | ||||||
| 	outputDir: "gs://code-search-demo/models/20181105-tinyparams", |   outputDir: "gs://code-search-demo/models/20181105-tinyparams", | ||||||
| 	train_steps: 200000, |   train_steps: 200000, | ||||||
| 	eval_steps: 100,	 |   eval_steps: 100, | ||||||
| }; | }; | ||||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | ||||||
|  |  | ||||||
|  | @ -0,0 +1,11 @@ | ||||||
|  | // Data for various experiments. | ||||||
|  | // Paths are deliberately hard coded so they get versioned and checked into source control. | ||||||
|  | { | ||||||
|  |   "demo-trainer-11-07-dist-sync-gpu": { | ||||||
|  |     name: "demo-trainer-11-07-dist-sync-gpu", | ||||||
|  |     outputDir: "gs://code-search-demo/models/20181107-dist-sync-gpu", | ||||||
|  |     train_steps: 200000, | ||||||
|  |     eval_steps: 100, | ||||||
|  |     hparams_set: "transformer_base", | ||||||
|  |   }, | ||||||
|  | } | ||||||
|  | @ -0,0 +1,67 @@ | ||||||
|  | { | ||||||
|  |   parts(params, env):: { | ||||||
|  |     job: { | ||||||
|  |       apiVersion: "batch/v1", | ||||||
|  |       kind: "Job", | ||||||
|  |       metadata: { | ||||||
|  |         name: params.name, | ||||||
|  |         namespace: env.namespace, | ||||||
|  |         labels: { | ||||||
|  |           app: params.name, | ||||||
|  |         }, | ||||||
|  |       }, | ||||||
|  |       spec: { | ||||||
|  |         replicas: 1, | ||||||
|  |         template: { | ||||||
|  |           metadata: { | ||||||
|  |             labels: { | ||||||
|  |               app: params.name, | ||||||
|  |             }, | ||||||
|  |           }, | ||||||
|  |           spec: { | ||||||
|  |             restartPolicy: "OnFailure", | ||||||
|  |             containers: [ | ||||||
|  |               { | ||||||
|  |                 name: "exporter", | ||||||
|  |                 image: params.image, | ||||||
|  |                 command: [ | ||||||
|  |                   "t2t-exporter", | ||||||
|  |                   "--problem=" + params.problem, | ||||||
|  |                   "--data_dir=" + params.dataDir, | ||||||
|  |                   // TODO(kubeflow/examples#331): t2t-exporter should have flags --export and --export_dir | ||||||
|  |                   // which allow us to control the location of the exported model. | ||||||
|  |                   "--output_dir=" + params.outputDir, | ||||||
|  |                   "--model=" + params.model, | ||||||
|  |                   "--hparams_set=" + params.hparams_set, | ||||||
|  |                   // Need to import the problems. | ||||||
|  |                   "--t2t_usr_dir=/src/code_search/t2t", | ||||||
|  |                 ], | ||||||
|  |                 env: [ | ||||||
|  |                   { | ||||||
|  |                     name: "GOOGLE_APPLICATION_CREDENTIALS", | ||||||
|  |                     value: "/secret/gcp-credentials/user-gcp-sa.json", | ||||||
|  |                   }, | ||||||
|  |                 ], | ||||||
|  |                 workingDir: "/src", | ||||||
|  |                 volumeMounts: [ | ||||||
|  |                   { | ||||||
|  |                     mountPath: "/secret/gcp-credentials", | ||||||
|  |                     name: "gcp-credentials", | ||||||
|  |                   }, | ||||||
|  |                 ],  //volumeMounts | ||||||
|  |               }, | ||||||
|  |             ],  // containers | ||||||
|  |             volumes: [ | ||||||
|  |               { | ||||||
|  |                 name: "gcp-credentials", | ||||||
|  |                 secret: { | ||||||
|  |                   secretName: "user-gcp-sa", | ||||||
|  |                 }, | ||||||
|  |               }, | ||||||
|  |             ], | ||||||
|  |           },  // spec | ||||||
|  |         }, | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|  |   },  // parts | ||||||
|  | } | ||||||
|  | @ -8,11 +8,11 @@ | ||||||
|     // are not picked up by the individual components. |     // are not picked up by the individual components. | ||||||
|     // Need to see if we can find a way to fix this. |     // Need to see if we can find a way to fix this. | ||||||
| 
 | 
 | ||||||
|     local imageTag = "v20181107-30bab1f-dirty-1ac751", |     local imageTag = "v20181108-004b5ad-dirty-eba459", | ||||||
|     "t2t-job": { |     "t2t-job": { | ||||||
|       jobType: "trainer", |       jobType: "trainer", | ||||||
|       numChief: 0, |       numChief: 0, | ||||||
|       numWorker: 1,       |       numWorker: 1, | ||||||
|       numPs: 0, |       numPs: 0, | ||||||
|       numWorkerGpu: 0, |       numWorkerGpu: 0, | ||||||
|       numPsGpu: 0, |       numPsGpu: 0, | ||||||
|  | @ -60,6 +60,7 @@ | ||||||
|       outputDir: $.components["t2t-code-search"].workingDir + "/output", |       outputDir: $.components["t2t-code-search"].workingDir + "/output", | ||||||
|       model: $.components["t2t-code-search"].model, |       model: $.components["t2t-code-search"].model, | ||||||
|       hparams_set: $.components["t2t-code-search"].hparams_set, |       hparams_set: $.components["t2t-code-search"].hparams_set, | ||||||
|  |       image: $.components["t2t-job"].image, | ||||||
|     }, |     }, | ||||||
|     "t2t-code-search-serving": { |     "t2t-code-search-serving": { | ||||||
|       name: "t2t-code-search", |       name: "t2t-code-search", | ||||||
|  |  | ||||||
|  | @ -1,7 +1,7 @@ | ||||||
|  | local exporter = import "export-model.libsonnet"; | ||||||
| local k = import "k.libsonnet"; | local k = import "k.libsonnet"; | ||||||
| local t2tJob = import "t2t-job.libsonnet"; |  | ||||||
| 
 | 
 | ||||||
| local env = std.extVar("__ksonnet/environments"); | local env = std.extVar("__ksonnet/environments"); | ||||||
| local params = std.extVar("__ksonnet/params").components["t2t-code-search-exporter"]; | local params = std.extVar("__ksonnet/params").components["t2t-code-search-exporter"]; | ||||||
| 
 | 
 | ||||||
| std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) | std.prune(k.core.v1.list.new([exporter.parts(params, env).job])) | ||||||
|  |  | ||||||
|  | @ -1,19 +1,6 @@ | ||||||
| local baseParams = std.extVar("__ksonnet/params").components["t2t-job"]; | local baseParams = std.extVar("__ksonnet/params").components["t2t-job"]; | ||||||
| 
 | 
 | ||||||
| { | { | ||||||
|   getExporterCmd(params):: |  | ||||||
|     [ |  | ||||||
|       // TODO(jlewi): Do we need to use the T2T entrypoint wrapper for the exporter? |  | ||||||
|       // Why would we need to parse TF_CONFIG into command line flags? |  | ||||||
|       "/usr/local/sbin/t2t-entrypoint", |  | ||||||
|       "t2t-exporter", |  | ||||||
|       "--problem=" + params.problem, |  | ||||||
|       "--data_dir=" + params.dataDir, |  | ||||||
|       "--output_dir=" + params.outputDir, |  | ||||||
|       "--model=" + params.model, |  | ||||||
|       "--hparams_set=" + params.hparams_set, |  | ||||||
|     ], |  | ||||||
| 
 |  | ||||||
|   getTrainerCmd(params):: { |   getTrainerCmd(params):: { | ||||||
|     local trainer = [ |     local trainer = [ | ||||||
|       // t2t-entrypoint is a wrapper that parses TF_CONFIG |       // t2t-entrypoint is a wrapper that parses TF_CONFIG | ||||||
|  | @ -32,7 +19,7 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"]; | ||||||
|     worker: trainer, |     worker: trainer, | ||||||
| 
 | 
 | ||||||
|     worker_dist: trainer + [ |     worker_dist: trainer + [ | ||||||
|       "--schedule=train",       |       "--schedule=train", | ||||||
|       "--ps_gpu=" + std.toString(params.numPsGpu), |       "--ps_gpu=" + std.toString(params.numPsGpu), | ||||||
|       "--worker_gpu=" + std.toString(params.numWorkerGpu), |       "--worker_gpu=" + std.toString(params.numWorkerGpu), | ||||||
|       "--worker_replicas=" + std.toString(params.numWorker), |       "--worker_replicas=" + std.toString(params.numWorker), | ||||||
|  |  | ||||||
|  | @ -1,8 +1,8 @@ | ||||||
| { | { | ||||||
| 	// Warning: Do not define a global "image" as that will end up overriding |   // Warning: Do not define a global "image" as that will end up overriding | ||||||
| 	// the image parameter for all components. Define more specific names |   // the image parameter for all components. Define more specific names | ||||||
| 	// e.g. "dataflowImage", "trainerCpuImage", "trainerGpuImage", |   // e.g. "dataflowImage", "trainerCpuImage", "trainerGpuImage", | ||||||
| 	workingDir: 'gs://code-search-demo/20181104', |   workingDir: "gs://code-search-demo/20181104", | ||||||
|     dataDir: 'gs://code-search-demo/20181104/data', |   dataDir: "gs://code-search-demo/20181104/data", | ||||||
|     project: 'code-search-demo', |   project: "code-search-demo", | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -2,7 +2,7 @@ local base = import "base.libsonnet"; | ||||||
| // uncomment if you reference ksonnet-lib | // uncomment if you reference ksonnet-lib | ||||||
| // local k = import "k.libsonnet"; | // local k = import "k.libsonnet"; | ||||||
| 
 | 
 | ||||||
| base + { | base { | ||||||
|   // Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n") |   // Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n") | ||||||
|   // "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"}) |   // "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"}) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,14 +1,14 @@ | ||||||
| local params = std.extVar('__ksonnet/params'); | local params = std.extVar("__ksonnet/params"); | ||||||
| local globals = import 'globals.libsonnet'; | local globals = import "globals.libsonnet"; | ||||||
| local envParams = params + { | local envParams = params { | ||||||
|   components+: { |   components+: { | ||||||
|     "t2t-code-search"+: {       |     "t2t-code-search"+: { | ||||||
|     }, |     }, | ||||||
|     "t2t-code-search-datagen"+: {             |     "t2t-code-search-datagen"+: { | ||||||
|       githubTable: '', |       githubTable: "", | ||||||
|     }, |     }, | ||||||
|     "submit-preprocess-job"+: {       |     "submit-preprocess-job"+: { | ||||||
|       githubTable: '', |       githubTable: "", | ||||||
|     }, |     }, | ||||||
|   }, |   }, | ||||||
| }; | }; | ||||||
|  | @ -18,4 +18,4 @@ local envParams = params + { | ||||||
|     [x]: envParams.components[x] + globals |     [x]: envParams.components[x] + globals | ||||||
|     for x in std.objectFields(envParams.components) |     for x in std.objectFields(envParams.components) | ||||||
|   }, |   }, | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue