remove kubeflow training (#1902)

This commit is contained in:
Ning 2019-08-21 09:20:39 -07:00 committed by GitHub
parent 2592307cce
commit 93e3121644
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 2 additions and 186 deletions

View File

@ -1,47 +0,0 @@
## Overview
The `kubeflow-training-classification.py` pipeline creates a TensorFlow model on structured data and image URLs (Google Cloud Storage). It works for both classification and regression.
Everything runs inside the pipeline cluster (Kubeflow). The only possible dependency is Google Cloud DataFlow if you enable the "*cloud*" mode for
the preprocessing or prediction step.
## The requirements
By default, the preprocessing and prediction steps use the "*local*" mode and run inside the cluster. If you specify the value of "*preprocess_mode*" as "*cloud*", you must enable the
[DataFlow API](https://cloud.google.com/endpoints/docs/openapi/enable-api) for the given GCP project so that the preprocessing step
can use Cloud DataFlow.
Note: The trainer depends on Kubeflow API version v1alpha2.
## Compiling the pipeline template
Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/pipelines/build-pipeline/) to install the Kubeflow Pipelines SDK, then run the following command to compile the sample Python into a workflow specification. The specification takes the form of a YAML file compressed into a `.tar.gz` file.
```bash
dsl-compile --py kubeflow-training-classification.py --output kubeflow-training-classification.tar.gz
```
## Deploying the pipeline
Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template.
The pipeline requires one argument:
1. An output directory in a Google Cloud Storage bucket, of the form `gs://<BUCKET>/<PATH>`.
## Components source
Preprocessing:
[source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft/src),
[container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft)
Training:
[source code](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher/src),
[container](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher)
Prediction:
[source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict/src),
[container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict)
Confusion Matrix:
[source code](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix/src),
[container](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix)

View File

@ -1,91 +0,0 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import kfp
from kfp import components
from kfp import dsl
from kfp import gcp
dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/tft/component.yaml')
kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/kubeflow/dnntrainer/component.yaml')
dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/predict/component.yaml')
confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/local/confusion_matrix/component.yaml')
@dsl.pipeline(
name='TF training and prediction pipeline',
description=''
)
def kubeflow_training(output, project,
evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
train='gs://ml-pipeline-playground/flower/train200.csv',
schema='gs://ml-pipeline-playground/flower/schema.json',
learning_rate=0.1,
hidden_layer_size='100,50',
steps=2000,
target='label',
workers=0,
pss=0,
preprocess_mode='local',
predict_mode='local',
):
output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
# set the flag to use GPU trainer
use_gpu = False
preprocess = dataflow_tf_transform_op(
training_data_file_pattern=train,
evaluation_data_file_pattern=evaluation,
schema=schema,
gcp_project=project,
run_mode=preprocess_mode,
preprocessing_module='',
transformed_data_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
training = kubeflow_tf_training_op(
transformed_data_dir=preprocess.output,
schema=schema,
learning_rate=learning_rate,
hidden_layer_size=hidden_layer_size,
steps=steps,
target=target,
preprocessing_module='',
training_output_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
if use_gpu:
training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:0517114dc2b365a4a6d95424af6157ead774eff3',
training.set_gpu_limit(1)
prediction = dataflow_tf_predict_op(
data_file_pattern=evaluation,
schema=schema,
target_column=target,
model=training.output,
run_mode=predict_mode,
gcp_project=project,
predictions_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
confusion_matrix = confusion_matrix_op(
predictions=prediction.output,
output_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
if __name__ == '__main__':
kfp.compiler.Compiler().compile(kubeflow_training, __file__ + '.zip')

View File

@ -96,15 +96,6 @@ def main():
'steps':
'5'
}
elif args.testname == 'kubeflow_training_classification':
params = {
'output': args.output,
'project': 'ml-pipeline-test',
'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv',
'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv',
'hidden-layer-size': '10,5',
'steps': '5'
}
elif args.testname == 'xgboost_training_cm':
params = {
'output': args.output,
@ -150,22 +141,7 @@ def main():
###### Validate the results for specific test cases ######
#TODO: Add result check for tfx-cab-classification after launch.
if args.testname == 'kubeflow_training_classification':
cm_tar_path = './confusion_matrix.tar.gz'
utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
'mlpipeline-ui-metadata')
with tarfile.open(cm_tar_path) as tar_handle:
file_handles = tar_handle.getmembers()
assert len(file_handles) == 1
with tar_handle.extractfile(file_handles[0]) as f:
cm_data = json.load(io.TextIOWrapper(f))
utils.add_junit_test(
test_cases, 'confusion matrix format',
(len(cm_data['outputs'][0]['schema']) == 3),
'the column number of the confusion matrix output is not equal to three'
)
elif args.testname == 'xgboost_training_cm':
if args.testname == 'xgboost_training_cm':
cm_tar_path = './confusion_matrix.tar.gz'
utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
'mlpipeline-ui-metadata')

View File

@ -203,17 +203,6 @@ xgboost_training_cm_injection() {
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-roc:\([a-zA-Z0-9_.-]\)\+|${LOCAL_ROC_IMAGE}|g" ${TEST_NAME}.yaml
}
################################################################################
# Utility function to inject correct images to python files for
# kubeflow_training_classification test.
################################################################################
kubeflow_training_classification_injection() {
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_TFT_IMAGE}|g" ${TEST_NAME}.py
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:\([a-zA-Z0-9_.-]\)\+|${KUBEFLOW_DNNTRAINER_IMAGE}|g" ${TEST_NAME}.py
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_PREDICT_IMAGE}|g" ${TEST_NAME}.py
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:\([a-zA-Z0-9_.-]\)\+|${LOCAL_CONFUSIONMATRIX_IMAGE}|g" ${TEST_NAME}.py
}
if [[ -z "$RESULTS_GCS_DIR" ]]; then
usage
exit 1
@ -231,17 +220,7 @@ echo "Run the sample tests..."
# Run the tests
preparation ${TEST_NAME}
if [[ "${TEST_NAME}" == "kubeflow_training_classification" ]]; then
#TODO(numerology): convert the sed commands to sed -e
# 's|gcr.io/ml-pipeline/|gcr.io/ml-pipeline-test/' and tag replacement. Also
# let the postsubmit tests refer to yaml files.
if [ -n "${DATAFLOW_TFT_IMAGE}" ];then
kubeflow_training_classification_injection
fi
dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
check_result ${TEST_NAME}
elif [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
if [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
if [[ -n "${DATAFLOW_TFT_IMAGE}" ]]; then
tfx_cab_classification_injection

View File

@ -67,7 +67,6 @@ spec:
- name: test-name
value: "{{item}}"
withItems:
- kubeflow_training_classification
- tfx_cab_classification
- xgboost_training_cm
- kubeflow_pipeline_using_TFX_OSS_components