# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ This launcher module serves as the entry-point of the sample test image. It decides which test to trigger based upon the arguments provided. """ import fire import os import papermill as pm import re import subprocess import utils import yamale import yaml from constants import PAPERMILL_ERR_MSG, BASE_DIR, TEST_DIR, SCHEMA_CONFIG, CONFIG_DIR, DEFAULT_CONFIG from check_notebook_results import NoteBookChecker from kfp.containers._gcs_helper import GCSHelper from run_sample_test import PySampleChecker class SampleTest(object): def __init__(self, test_name, results_gcs_dir, target_image_prefix='', namespace='kubeflow'): """Launch a KFP sample_test provided its name. :param test_name: name of the corresponding sample test. :param results_gcs_dir: gs dir to store test result. :param target_image_prefix: prefix of docker image, default is empty. :param namespace: namespace for kfp, default is kubeflow. """ self._test_name = test_name self._results_gcs_dir = results_gcs_dir # Capture the first segment after gs:// as the project name. self._bucket_name = results_gcs_dir.split('/')[2] self._target_image_prefix = target_image_prefix self._is_notebook = None self._namespace = namespace self._sample_test_result = 'junit_Sample%sOutput.xml' % self._test_name self._sample_test_output = self._results_gcs_dir self._work_dir = os.path.join(BASE_DIR, 'samples/core/', self._test_name) def _copy_result(self): """ Copy generated sample test result to gcs, so that Prow can pick it. """ print('Copy the test results to GCS %s/' % self._results_gcs_dir) GCSHelper.upload_gcs_file( self._sample_test_result, os.path.join(self._results_gcs_dir, self._sample_test_result)) def _compile(self): os.chdir(self._work_dir) print('Run the sample tests...') # Looking for the entry point of the test. list_of_files = os.listdir('.') for file in list_of_files: m = re.match(self._test_name + '\.[a-zA-Z]+', file) if m: file_name, ext_name = os.path.splitext(file) if self._is_notebook is not None: raise(RuntimeError('Multiple entry points found under sample: {}'.format(self._test_name))) if ext_name == '.py': self._is_notebook = False if ext_name == '.ipynb': self._is_notebook = True if self._is_notebook is None: raise(RuntimeError('No entry point found for sample: {}'.format(self._test_name))) config_schema = yamale.make_schema(SCHEMA_CONFIG) # Retrieve default config try: with open(DEFAULT_CONFIG, 'r') as f: raw_args = yaml.safe_load(f) default_config = yamale.make_data(DEFAULT_CONFIG) yamale.validate(config_schema, default_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: raise RuntimeError('Illegal default config:{}'.format(yamlerr)) except OSError as ose: raise FileExistsError('Default config not found:{}'.format(ose)) else: self._run_pipeline = raw_args['run_pipeline'] # For presubmit check, do not do any image injection as for now. # Notebook samples need to be papermilled first. if self._is_notebook: # Parse necessary params from config.yaml nb_params = {} try: config_file = os.path.join(CONFIG_DIR, '%s.config.yaml' % self._test_name) with open(config_file, 'r') as f: raw_args = yaml.safe_load(f) test_config = yamale.make_data(config_file) yamale.validate(config_schema, test_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: print('No legit yaml config file found, use default args:{}'.format(yamlerr)) except OSError as ose: print('Config file with the same name not found, use default args:{}'.format(ose)) else: if 'notebook_params' in raw_args.keys(): nb_params.update(raw_args['notebook_params']) if 'output' in raw_args['notebook_params'].keys(): # output is a special param that has to be specified dynamically. nb_params['output'] = self._sample_test_output if 'run_pipeline' in raw_args.keys(): self._run_pipeline = raw_args['run_pipeline'] if self._run_pipeline: nb_params['experiment_name'] = self._test_name + '-test' pm.execute_notebook( input_path='%s.ipynb' % self._test_name, output_path='%s.ipynb' % self._test_name, parameters=nb_params, prepare_only=True ) # Convert to python script. subprocess.call([ 'jupyter', 'nbconvert', '--to', 'python', '%s.ipynb' % self._test_name ]) else: subprocess.call(['dsl-compile', '--py', '%s.py' % self._test_name, '--output', '%s.yaml' % self._test_name]) def _injection(self): """Inject images for pipeline components. This is only valid for coimponent test """ pass def run_test(self): self._compile() self._injection() if self._is_notebook: nbchecker = NoteBookChecker(testname=self._test_name, result=self._sample_test_result, run_pipeline=self._run_pipeline) nbchecker.run() os.chdir(TEST_DIR) nbchecker.check() else: os.chdir(TEST_DIR) pysample_checker = PySampleChecker(testname=self._test_name, input=os.path.join( self._work_dir, '%s.yaml' % self._test_name), output=self._sample_test_output, result=self._sample_test_result, namespace=self._namespace) pysample_checker.run() pysample_checker.check() self._copy_result() class ComponentTest(SampleTest): """ Launch a KFP sample test as component test provided its name. Currently follows the same logic as sample test for compatibility. include xgboost_training_cm """ def __init__(self, test_name, results_gcs_dir, dataproc_create_cluster_image, dataproc_delete_cluster_image, dataproc_analyze_image, dataproc_transform_image, dataproc_train_image, dataproc_predict_image, kubeflow_dnntrainer_image, kubeflow_deployer_image, local_confusionmatrix_image, local_roc_image, target_image_prefix='', namespace='kubeflow'): super().__init__( test_name=test_name, results_gcs_dir=results_gcs_dir, target_image_prefix=target_image_prefix, namespace=namespace ) self._dataproc_create_cluster_image = dataproc_create_cluster_image self._dataproc_delete_cluster_image = dataproc_delete_cluster_image self._dataproc_analyze_image = dataproc_analyze_image self._dataproc_transform_image = dataproc_transform_image self._dataproc_train_image = dataproc_train_image self._dataproc_predict_image = dataproc_predict_image self._kubeflow_dnntrainer_image = kubeflow_dnntrainer_image self._kubeflow_deployer_image = kubeflow_deployer_image self._local_confusionmatrix_image = local_confusionmatrix_image self._local_roc_image = local_roc_image def _injection(self): """Sample-specific image injection into yaml file.""" subs = { 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-confusion-matrix:\w+':self._local_confusionmatrix_image, 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-roc:\w+':self._local_roc_image } if self._test_name == 'xgboost_training_cm': subs.update({ 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:\w+':self._dataproc_create_cluster_image, 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:\w+':self._dataproc_delete_cluster_image, 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-analyze:\w+':self._dataproc_analyze_image, 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-transform:\w+':self._dataproc_transform_image, 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-train:\w+':self._dataproc_train_image, 'gcr\.io/ml-pipeline/ml-pipeline-dataproc-predict:\w+':self._dataproc_predict_image, }) utils.file_injection('%s.yaml' % self._test_name, '%s.yaml.tmp' % self._test_name, subs) else: # Only the above two samples need injection for now. pass utils.file_injection('%s.yaml' % self._test_name, '%s.yaml.tmp' % self._test_name, subs) def main(): """Launches either KFP sample test or component test as a command entrypoint. Usage: python sample_test_launcher.py sample_test run_test arg1 arg2 to launch sample test, and python sample_test_launcher.py component_test run_test arg1 arg2 to launch component test. """ fire.Fire({ 'sample_test': SampleTest, 'component_test': ComponentTest }) if __name__ == '__main__': main()