pipelines/test/sample-test/sample_test_launcher.py

# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This launcher module serves as the entry-point of the sample test image. It
decides which test to trigger based upon the arguments provided.
"""

import fire
import os
import papermill as pm
import re
import subprocess
import utils
import yamale
import yaml

from constants import PAPERMILL_ERR_MSG, BASE_DIR, TEST_DIR, SCHEMA_CONFIG, CONFIG_DIR, DEFAULT_CONFIG
from check_notebook_results import NoteBookChecker
from kfp.containers._gcs_helper import GCSHelper
from run_sample_test import PySampleChecker


class SampleTest(object):

  def __init__(self, test_name, results_gcs_dir, target_image_prefix='',
               namespace='kubeflow'):
    """Launch a KFP sample_test provided its name.

    :param test_name: name of the corresponding sample test.
    :param results_gcs_dir: gs dir to store test result.
    :param target_image_prefix: prefix of docker image, default is empty.
    :param namespace: namespace for kfp, default is kubeflow.
    """
    self._test_name = test_name
    self._results_gcs_dir = results_gcs_dir
    # Capture the first segment after gs:// as the project name.
    self._bucket_name = results_gcs_dir.split('/')[2]
    self._target_image_prefix = target_image_prefix
    self._is_notebook = None
    self._namespace = namespace
    self._sample_test_result = 'junit_Sample%sOutput.xml' % self._test_name
    self._sample_test_output = self._results_gcs_dir
    self._work_dir = os.path.join(BASE_DIR, 'samples/core/', self._test_name)

  def _copy_result(self):
    """ Copy generated sample test result to gcs, so that Prow can pick it. """
    print('Copy the test results to GCS %s/' % self._results_gcs_dir)

    GCSHelper.upload_gcs_file(
        self._sample_test_result,
        os.path.join(self._results_gcs_dir, self._sample_test_result))

  def _compile(self):

    os.chdir(self._work_dir)
    print('Run the sample tests...')

    # Looking for the entry point of the test.
    list_of_files = os.listdir('.')
    for file in list_of_files:
      m = re.match(self._test_name + '\.[a-zA-Z]+', file)
      if m:
        file_name, ext_name = os.path.splitext(file)
        if self._is_notebook is not None:
          raise(RuntimeError('Multiple entry points found under sample: {}'.format(self._test_name)))
        if ext_name == '.py':
          self._is_notebook = False
        if ext_name == '.ipynb':
          self._is_notebook = True

    if self._is_notebook is None:
      raise(RuntimeError('No entry point found for sample: {}'.format(self._test_name)))

    config_schema = yamale.make_schema(SCHEMA_CONFIG)
    # Retrieve default config
    try:
      with open(DEFAULT_CONFIG, 'r') as f:
        raw_args = yaml.safe_load(f)
      default_config = yamale.make_data(DEFAULT_CONFIG)
      yamale.validate(config_schema, default_config)  # If fails, a ValueError will be raised.
    except yaml.YAMLError as yamlerr:
      raise RuntimeError('Illegal default config:{}'.format(yamlerr))
    except OSError as ose:
      raise FileExistsError('Default config not found:{}'.format(ose))
    else:
      self._run_pipeline = raw_args['run_pipeline']

    # For presubmit check, do not do any image injection as for now.
    # Notebook samples need to be papermilled first.
    if self._is_notebook:
      # Parse necessary params from config.yaml
      nb_params = {}
      try:
        config_file = os.path.join(CONFIG_DIR, '%s.config.yaml' % self._test_name)
        with open(config_file, 'r') as f:
          raw_args = yaml.safe_load(f)
        test_config = yamale.make_data(config_file)
        yamale.validate(config_schema, test_config)  # If fails, a ValueError will be raised.
      except yaml.YAMLError as yamlerr:
        print('No legit yaml config file found, use default args:{}'.format(yamlerr))
      except OSError as ose:
        print('Config file with the same name not found, use default args:{}'.format(ose))
      else:
        if 'notebook_params' in raw_args.keys():
          nb_params.update(raw_args['notebook_params'])
          if 'output' in raw_args['notebook_params'].keys():  # output is a special param that has to be specified dynamically.
            nb_params['output'] = self._sample_test_output
        if 'run_pipeline' in raw_args.keys():
          self._run_pipeline = raw_args['run_pipeline']

      if self._run_pipeline:
        nb_params['experiment_name'] = self._test_name + '-test'

      pm.execute_notebook(
          input_path='%s.ipynb' % self._test_name,
          output_path='%s.ipynb' % self._test_name,
          parameters=nb_params,
          prepare_only=True
      )
      # Convert to python script.
      subprocess.call([
          'jupyter', 'nbconvert', '--to', 'python', '%s.ipynb' % self._test_name
      ])

    else:
      subprocess.call(['dsl-compile', '--py', '%s.py' % self._test_name,
                       '--output', '%s.yaml' % self._test_name])

  def _injection(self):
    """Inject images for pipeline components.
    This is only valid for coimponent test
    """
    pass

  def run_test(self):
    self._compile()
    self._injection()
    if self._is_notebook:
      nbchecker = NoteBookChecker(testname=self._test_name,
                                  result=self._sample_test_result,
                                  run_pipeline=self._run_pipeline)
      nbchecker.run()
      os.chdir(TEST_DIR)
      nbchecker.check()
    else:
      os.chdir(TEST_DIR)
      pysample_checker = PySampleChecker(testname=self._test_name,
                                         input=os.path.join(
                                             self._work_dir,
                                             '%s.yaml' % self._test_name),
                                         output=self._sample_test_output,
                                         result=self._sample_test_result,
                                         namespace=self._namespace)
      pysample_checker.run()
      pysample_checker.check()

    self._copy_result()


class ComponentTest(SampleTest):
  """ Launch a KFP sample test as component test provided its name.

  Currently follows the same logic as sample test for compatibility.
  include xgboost_training_cm
  """
  def __init__(self, test_name, results_gcs_dir,
               dataproc_create_cluster_image,
               dataproc_delete_cluster_image,
               dataproc_analyze_image,
               dataproc_transform_image,
               dataproc_train_image,
               dataproc_predict_image,
               kubeflow_dnntrainer_image,
               kubeflow_deployer_image,
               local_confusionmatrix_image,
               local_roc_image,
               target_image_prefix='',
               namespace='kubeflow'):
    super().__init__(
        test_name=test_name,
        results_gcs_dir=results_gcs_dir,
        target_image_prefix=target_image_prefix,
        namespace=namespace
    )
    self._dataproc_create_cluster_image = dataproc_create_cluster_image
    self._dataproc_delete_cluster_image = dataproc_delete_cluster_image
    self._dataproc_analyze_image = dataproc_analyze_image
    self._dataproc_transform_image = dataproc_transform_image
    self._dataproc_train_image = dataproc_train_image
    self._dataproc_predict_image = dataproc_predict_image
    self._kubeflow_dnntrainer_image = kubeflow_dnntrainer_image
    self._kubeflow_deployer_image = kubeflow_deployer_image
    self._local_confusionmatrix_image = local_confusionmatrix_image
    self._local_roc_image = local_roc_image

  def _injection(self):
    """Sample-specific image injection into yaml file."""
    subs = {
        'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-confusion-matrix:\w+':self._local_confusionmatrix_image,
        'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-roc:\w+':self._local_roc_image
    }
    if self._test_name == 'xgboost_training_cm':
      subs.update({
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:\w+':self._dataproc_create_cluster_image,
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:\w+':self._dataproc_delete_cluster_image,
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-analyze:\w+':self._dataproc_analyze_image,
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-transform:\w+':self._dataproc_transform_image,
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-train:\w+':self._dataproc_train_image,
          'gcr\.io/ml-pipeline/ml-pipeline-dataproc-predict:\w+':self._dataproc_predict_image,
      })

      utils.file_injection('%s.yaml' % self._test_name,
                           '%s.yaml.tmp' % self._test_name,
                           subs)
    else:
      # Only the above two samples need injection for now.
      pass
    utils.file_injection('%s.yaml' % self._test_name,
                         '%s.yaml.tmp' % self._test_name,
                         subs)


def main():
  """Launches either KFP sample test or component test as a command entrypoint.

  Usage:
  python sample_test_launcher.py sample_test run_test arg1 arg2 to launch sample test, and
  python sample_test_launcher.py component_test run_test arg1 arg2 to launch component
  test.
  """
  fire.Fire({
      'sample_test': SampleTest,
      'component_test': ComponentTest
  })

if __name__ == '__main__':
  main()