remove kubeflow training (#1902)

2019-08-21 09:20:39 -07:00 · 2019-08-21 09:20:39 -07:00 · 93e3121644
parent 2592307cce
commit 93e3121644
5 changed files with 2 additions and 186 deletions
--- a/samples/core/kubeflow_training_classification/README.md
+++ b/samples/core/kubeflow_training_classification/README.md
@ -1,47 +0,0 @@
-## Overview
-
-The `kubeflow-training-classification.py` pipeline creates a TensorFlow model on structured data and image URLs (Google Cloud Storage). It works for both classification and regression.
-Everything runs inside the pipeline cluster (Kubeflow). The only possible dependency is Google Cloud DataFlow if you enable the "*cloud*" mode for 
-the preprocessing or prediction step.
-
-## The requirements
-
-By default, the preprocessing and prediction steps use the "*local*" mode and run inside the cluster. If you specify the value of "*preprocess_mode*" as "*cloud*", you must enable the
-[DataFlow API](https://cloud.google.com/endpoints/docs/openapi/enable-api) for the given GCP project so that the preprocessing step
-can use Cloud DataFlow. 
-
-Note: The trainer depends on Kubeflow API version v1alpha2.
-
-## Compiling the pipeline template
-
-Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/pipelines/build-pipeline/) to install the Kubeflow Pipelines SDK, then run the following command to compile the sample Python into a workflow specification. The specification takes the form of a YAML file compressed into a `.tar.gz` file.
-
-```bash
-dsl-compile --py kubeflow-training-classification.py --output kubeflow-training-classification.tar.gz
-```
-
-## Deploying the pipeline
-
-Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template.
-
-The pipeline requires one argument:
-
-1. An output directory in a Google Cloud Storage bucket, of the form `gs://<BUCKET>/<PATH>`.
-
-## Components source
-
-Preprocessing:
-  [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft/src), 
-  [container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft)
-
-Training:
-  [source code](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher/src), 
-  [container](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher)
-
-Prediction:
-  [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict/src), 
-  [container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict)
-
-Confusion Matrix:
-  [source code](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix/src), 
-  [container](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix)
--- a/samples/core/kubeflow_training_classification/kubeflow_training_classification.py
+++ b/samples/core/kubeflow_training_classification/kubeflow_training_classification.py
@ -1,91 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import kfp
-from kfp import components
-from kfp import dsl
-from kfp import gcp
-
-dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/tft/component.yaml')
-kubeflow_tf_training_op  = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/kubeflow/dnntrainer/component.yaml')
-dataflow_tf_predict_op   = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/predict/component.yaml')
-confusion_matrix_op      = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/local/confusion_matrix/component.yaml')
-
-@dsl.pipeline(
-    name='TF training and prediction pipeline',
-    description=''
-)
-def kubeflow_training(output, project,
-    evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
-    train='gs://ml-pipeline-playground/flower/train200.csv',
-    schema='gs://ml-pipeline-playground/flower/schema.json',
-    learning_rate=0.1,
-    hidden_layer_size='100,50',
-    steps=2000,
-    target='label',
-    workers=0,
-    pss=0,
-    preprocess_mode='local',
-    predict_mode='local',
-):
-    output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
-
-    # set the flag to use GPU trainer
-    use_gpu = False
-
-    preprocess = dataflow_tf_transform_op(
-        training_data_file_pattern=train,
-        evaluation_data_file_pattern=evaluation,
-        schema=schema,
-        gcp_project=project,
-        run_mode=preprocess_mode,
-        preprocessing_module='',
-        transformed_data_dir=output_template
-    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
-
-    training = kubeflow_tf_training_op(
-        transformed_data_dir=preprocess.output,
-        schema=schema,
-        learning_rate=learning_rate,
-        hidden_layer_size=hidden_layer_size,
-        steps=steps,
-        target=target,
-        preprocessing_module='',
-        training_output_dir=output_template
-    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
-
-    if use_gpu:
-        training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:0517114dc2b365a4a6d95424af6157ead774eff3',
-        training.set_gpu_limit(1)
-
-    prediction = dataflow_tf_predict_op(
-        data_file_pattern=evaluation,
-        schema=schema,
-        target_column=target,
-        model=training.output,
-        run_mode=predict_mode,
-        gcp_project=project,
-        predictions_dir=output_template
-    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
-
-    confusion_matrix = confusion_matrix_op(
-        predictions=prediction.output,
-        output_dir=output_template
-    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
-
-
-if __name__ == '__main__':
-    kfp.compiler.Compiler().compile(kubeflow_training, __file__ + '.zip')
--- a/test/sample-test/run_sample_test.py
+++ b/test/sample-test/run_sample_test.py
@ -96,15 +96,6 @@ def main():
        'steps':
          '5'
    }
-  elif args.testname == 'kubeflow_training_classification':
-    params = {
-        'output': args.output,
-        'project': 'ml-pipeline-test',
-        'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv',
-        'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv',
-        'hidden-layer-size': '10,5',
-        'steps': '5'
-    }
  elif args.testname == 'xgboost_training_cm':
    params = {
        'output': args.output,
@ -150,22 +141,7 @@ def main():

  ###### Validate the results for specific test cases ######
  #TODO: Add result check for tfx-cab-classification after launch.
-  if args.testname == 'kubeflow_training_classification':
-    cm_tar_path = './confusion_matrix.tar.gz'
-    utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
-                                'mlpipeline-ui-metadata')
-    with tarfile.open(cm_tar_path) as tar_handle:
-      file_handles = tar_handle.getmembers()
-      assert len(file_handles) == 1
-
-      with tar_handle.extractfile(file_handles[0]) as f:
-        cm_data = json.load(io.TextIOWrapper(f))
-        utils.add_junit_test(
-            test_cases, 'confusion matrix format',
-            (len(cm_data['outputs'][0]['schema']) == 3),
-            'the column number of the confusion matrix output is not equal to three'
-        )
-  elif args.testname == 'xgboost_training_cm':
+  if args.testname == 'xgboost_training_cm':
    cm_tar_path = './confusion_matrix.tar.gz'
    utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
                                'mlpipeline-ui-metadata')
--- a/test/sample-test/run_test.sh
+++ b/test/sample-test/run_test.sh
@ -203,17 +203,6 @@ xgboost_training_cm_injection() {
  sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-roc:\([a-zA-Z0-9_.-]\)\+|${LOCAL_ROC_IMAGE}|g" ${TEST_NAME}.yaml
 }

-################################################################################
-# Utility function to inject correct images to python files for
-# kubeflow_training_classification test.
-################################################################################
-kubeflow_training_classification_injection() {
-  sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_TFT_IMAGE}|g" ${TEST_NAME}.py
-  sed -i "s|gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:\([a-zA-Z0-9_.-]\)\+|${KUBEFLOW_DNNTRAINER_IMAGE}|g" ${TEST_NAME}.py
-  sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_PREDICT_IMAGE}|g" ${TEST_NAME}.py
-  sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:\([a-zA-Z0-9_.-]\)\+|${LOCAL_CONFUSIONMATRIX_IMAGE}|g" ${TEST_NAME}.py
-}
-
 if [[ -z "$RESULTS_GCS_DIR" ]]; then
  usage
  exit 1
@ -231,17 +220,7 @@ echo "Run the sample tests..."
 # Run the tests
 preparation ${TEST_NAME}

-if [[ "${TEST_NAME}" == "kubeflow_training_classification" ]]; then
-  #TODO(numerology): convert the sed commands to sed -e
-  # 's|gcr.io/ml-pipeline/|gcr.io/ml-pipeline-test/' and tag replacement. Also
-  # let the postsubmit tests refer to yaml files.
-  if [ -n "${DATAFLOW_TFT_IMAGE}" ];then
-    kubeflow_training_classification_injection
-  fi
-
-  dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
-  check_result ${TEST_NAME}
-elif [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
+if [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
  dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
  if [[ -n "${DATAFLOW_TFT_IMAGE}" ]]; then
    tfx_cab_classification_injection
--- a/test/sample_test.yaml
+++ b/test/sample_test.yaml
@ -67,7 +67,6 @@ spec:
                - name: test-name
                  value: "{{item}}"
            withItems:
-              - kubeflow_training_classification
              - tfx_cab_classification
              - xgboost_training_cm
              - kubeflow_pipeline_using_TFX_OSS_components