mirror of https://github.com/kubeflow/examples.git
				
				
				
			
		
			
				
	
	
		
			105 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			105 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Copyright 2018 Google Inc. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     https://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| 
 | |
| import argparse
 | |
| import os
 | |
| import time
 | |
| import logging
 | |
| import subprocess
 | |
| import requests
 | |
| 
 | |
| from tensorflow.python.lib.io import file_io  #pylint: disable=no-name-in-module
 | |
| 
 | |
| 
 | |
| def main():
 | |
|   parser = argparse.ArgumentParser(description='ML Trainer')
 | |
|   parser.add_argument(
 | |
|       '--model_name',
 | |
|       help='...',
 | |
|       required=True)
 | |
| 
 | |
|   parser.add_argument(
 | |
|       '--model_path',
 | |
|       help='...',
 | |
|       required=True)
 | |
| 
 | |
|   parser.add_argument('--cluster', type=str,
 | |
|                       help='GKE cluster set up for kubeflow. If set, zone must be provided. ' +
 | |
|                            'If not set, assuming this runs in a GKE container and current ' +
 | |
|                            'cluster is used.')
 | |
|   parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
 | |
|   parser.add_argument('--namespace', type=str, default='default')
 | |
|   args = parser.parse_args()
 | |
| 
 | |
|   # KUBEFLOW_NAMESPACE = 'kubeflow'
 | |
| 
 | |
|   # Make sure model dir exists before proceeding
 | |
|   retries = 0
 | |
|   sleeptime = 5
 | |
|   while retries < 20:
 | |
|     try:
 | |
|       model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1])
 | |
|       print("model subdir: %s" % model_dir)
 | |
|       break
 | |
|     except Exception as e:  #pylint: disable=broad-except
 | |
|       print(e)
 | |
|       print("Sleeping %s seconds to sync with GCS..." % sleeptime)
 | |
|       time.sleep(sleeptime)
 | |
|       retries += 1
 | |
|       sleeptime *= 2
 | |
|   if retries >= 20:
 | |
|     print("could not get model subdir from %s, exiting" % args.model_path)
 | |
|     exit(1)
 | |
| 
 | |
|   logging.getLogger().setLevel(logging.INFO)
 | |
|   args_dict = vars(args)
 | |
|   if args.cluster and args.zone:
 | |
|     cluster = args_dict.pop('cluster')  #pylint: disable=unused-variable
 | |
|     zone = args_dict.pop('zone')  #pylint: disable=unused-variable
 | |
|   else:
 | |
|     # Get cluster name and zone from metadata
 | |
|     metadata_server = "http://metadata/computeMetadata/v1/instance/"
 | |
|     metadata_flavor = {'Metadata-Flavor' : 'Google'}
 | |
|     cluster = requests.get(metadata_server + "attributes/cluster-name",
 | |
|                            headers=metadata_flavor).text
 | |
|     zone = requests.get(metadata_server + "zone",
 | |
|                         headers=metadata_flavor).text.split('/')[-1]
 | |
| 
 | |
|   # logging.info('Getting credentials for GKE cluster %s.' % cluster)
 | |
|   # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster,
 | |
|   #                  '--zone', zone])
 | |
| 
 | |
|   logging.info('Generating training template.')
 | |
| 
 | |
|   template_file = os.path.join(
 | |
|       os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml')
 | |
|   target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml')
 | |
| 
 | |
|   with open(template_file, 'r') as f:
 | |
|     with open(target_file, "w") as target:
 | |
|       data = f.read()
 | |
|       changed = data.replace('MODEL_NAME', args.model_name)
 | |
|       changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace)
 | |
|       changed2 = changed1.replace('MODEL_PATH', args.model_path)
 | |
|       target.write(changed2)
 | |
| 
 | |
| 
 | |
|   logging.info('deploying model serving.')
 | |
|   subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|   main()
 |