remove kubeflow training (#1902)
This commit is contained in:
parent
2592307cce
commit
93e3121644
|
|
@ -1,47 +0,0 @@
|
|||
## Overview
|
||||
|
||||
The `kubeflow-training-classification.py` pipeline creates a TensorFlow model on structured data and image URLs (Google Cloud Storage). It works for both classification and regression.
|
||||
Everything runs inside the pipeline cluster (Kubeflow). The only possible dependency is Google Cloud DataFlow if you enable the "*cloud*" mode for
|
||||
the preprocessing or prediction step.
|
||||
|
||||
## The requirements
|
||||
|
||||
By default, the preprocessing and prediction steps use the "*local*" mode and run inside the cluster. If you specify the value of "*preprocess_mode*" as "*cloud*", you must enable the
|
||||
[DataFlow API](https://cloud.google.com/endpoints/docs/openapi/enable-api) for the given GCP project so that the preprocessing step
|
||||
can use Cloud DataFlow.
|
||||
|
||||
Note: The trainer depends on Kubeflow API version v1alpha2.
|
||||
|
||||
## Compiling the pipeline template
|
||||
|
||||
Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/pipelines/build-pipeline/) to install the Kubeflow Pipelines SDK, then run the following command to compile the sample Python into a workflow specification. The specification takes the form of a YAML file compressed into a `.tar.gz` file.
|
||||
|
||||
```bash
|
||||
dsl-compile --py kubeflow-training-classification.py --output kubeflow-training-classification.tar.gz
|
||||
```
|
||||
|
||||
## Deploying the pipeline
|
||||
|
||||
Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template.
|
||||
|
||||
The pipeline requires one argument:
|
||||
|
||||
1. An output directory in a Google Cloud Storage bucket, of the form `gs://<BUCKET>/<PATH>`.
|
||||
|
||||
## Components source
|
||||
|
||||
Preprocessing:
|
||||
[source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft/src),
|
||||
[container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft)
|
||||
|
||||
Training:
|
||||
[source code](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher/src),
|
||||
[container](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher)
|
||||
|
||||
Prediction:
|
||||
[source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict/src),
|
||||
[container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict)
|
||||
|
||||
Confusion Matrix:
|
||||
[source code](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix/src),
|
||||
[container](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix)
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import kfp
|
||||
from kfp import components
|
||||
from kfp import dsl
|
||||
from kfp import gcp
|
||||
|
||||
dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/tft/component.yaml')
|
||||
kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/kubeflow/dnntrainer/component.yaml')
|
||||
dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/predict/component.yaml')
|
||||
confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/local/confusion_matrix/component.yaml')
|
||||
|
||||
@dsl.pipeline(
|
||||
name='TF training and prediction pipeline',
|
||||
description=''
|
||||
)
|
||||
def kubeflow_training(output, project,
|
||||
evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
|
||||
train='gs://ml-pipeline-playground/flower/train200.csv',
|
||||
schema='gs://ml-pipeline-playground/flower/schema.json',
|
||||
learning_rate=0.1,
|
||||
hidden_layer_size='100,50',
|
||||
steps=2000,
|
||||
target='label',
|
||||
workers=0,
|
||||
pss=0,
|
||||
preprocess_mode='local',
|
||||
predict_mode='local',
|
||||
):
|
||||
output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
|
||||
|
||||
# set the flag to use GPU trainer
|
||||
use_gpu = False
|
||||
|
||||
preprocess = dataflow_tf_transform_op(
|
||||
training_data_file_pattern=train,
|
||||
evaluation_data_file_pattern=evaluation,
|
||||
schema=schema,
|
||||
gcp_project=project,
|
||||
run_mode=preprocess_mode,
|
||||
preprocessing_module='',
|
||||
transformed_data_dir=output_template
|
||||
).apply(gcp.use_gcp_secret('user-gcp-sa'))
|
||||
|
||||
training = kubeflow_tf_training_op(
|
||||
transformed_data_dir=preprocess.output,
|
||||
schema=schema,
|
||||
learning_rate=learning_rate,
|
||||
hidden_layer_size=hidden_layer_size,
|
||||
steps=steps,
|
||||
target=target,
|
||||
preprocessing_module='',
|
||||
training_output_dir=output_template
|
||||
).apply(gcp.use_gcp_secret('user-gcp-sa'))
|
||||
|
||||
if use_gpu:
|
||||
training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:0517114dc2b365a4a6d95424af6157ead774eff3',
|
||||
training.set_gpu_limit(1)
|
||||
|
||||
prediction = dataflow_tf_predict_op(
|
||||
data_file_pattern=evaluation,
|
||||
schema=schema,
|
||||
target_column=target,
|
||||
model=training.output,
|
||||
run_mode=predict_mode,
|
||||
gcp_project=project,
|
||||
predictions_dir=output_template
|
||||
).apply(gcp.use_gcp_secret('user-gcp-sa'))
|
||||
|
||||
confusion_matrix = confusion_matrix_op(
|
||||
predictions=prediction.output,
|
||||
output_dir=output_template
|
||||
).apply(gcp.use_gcp_secret('user-gcp-sa'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp.compiler.Compiler().compile(kubeflow_training, __file__ + '.zip')
|
||||
|
|
@ -96,15 +96,6 @@ def main():
|
|||
'steps':
|
||||
'5'
|
||||
}
|
||||
elif args.testname == 'kubeflow_training_classification':
|
||||
params = {
|
||||
'output': args.output,
|
||||
'project': 'ml-pipeline-test',
|
||||
'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv',
|
||||
'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv',
|
||||
'hidden-layer-size': '10,5',
|
||||
'steps': '5'
|
||||
}
|
||||
elif args.testname == 'xgboost_training_cm':
|
||||
params = {
|
||||
'output': args.output,
|
||||
|
|
@ -150,22 +141,7 @@ def main():
|
|||
|
||||
###### Validate the results for specific test cases ######
|
||||
#TODO: Add result check for tfx-cab-classification after launch.
|
||||
if args.testname == 'kubeflow_training_classification':
|
||||
cm_tar_path = './confusion_matrix.tar.gz'
|
||||
utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
|
||||
'mlpipeline-ui-metadata')
|
||||
with tarfile.open(cm_tar_path) as tar_handle:
|
||||
file_handles = tar_handle.getmembers()
|
||||
assert len(file_handles) == 1
|
||||
|
||||
with tar_handle.extractfile(file_handles[0]) as f:
|
||||
cm_data = json.load(io.TextIOWrapper(f))
|
||||
utils.add_junit_test(
|
||||
test_cases, 'confusion matrix format',
|
||||
(len(cm_data['outputs'][0]['schema']) == 3),
|
||||
'the column number of the confusion matrix output is not equal to three'
|
||||
)
|
||||
elif args.testname == 'xgboost_training_cm':
|
||||
if args.testname == 'xgboost_training_cm':
|
||||
cm_tar_path = './confusion_matrix.tar.gz'
|
||||
utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
|
||||
'mlpipeline-ui-metadata')
|
||||
|
|
|
|||
|
|
@ -203,17 +203,6 @@ xgboost_training_cm_injection() {
|
|||
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-roc:\([a-zA-Z0-9_.-]\)\+|${LOCAL_ROC_IMAGE}|g" ${TEST_NAME}.yaml
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Utility function to inject correct images to python files for
|
||||
# kubeflow_training_classification test.
|
||||
################################################################################
|
||||
kubeflow_training_classification_injection() {
|
||||
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_TFT_IMAGE}|g" ${TEST_NAME}.py
|
||||
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:\([a-zA-Z0-9_.-]\)\+|${KUBEFLOW_DNNTRAINER_IMAGE}|g" ${TEST_NAME}.py
|
||||
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_PREDICT_IMAGE}|g" ${TEST_NAME}.py
|
||||
sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:\([a-zA-Z0-9_.-]\)\+|${LOCAL_CONFUSIONMATRIX_IMAGE}|g" ${TEST_NAME}.py
|
||||
}
|
||||
|
||||
if [[ -z "$RESULTS_GCS_DIR" ]]; then
|
||||
usage
|
||||
exit 1
|
||||
|
|
@ -231,17 +220,7 @@ echo "Run the sample tests..."
|
|||
# Run the tests
|
||||
preparation ${TEST_NAME}
|
||||
|
||||
if [[ "${TEST_NAME}" == "kubeflow_training_classification" ]]; then
|
||||
#TODO(numerology): convert the sed commands to sed -e
|
||||
# 's|gcr.io/ml-pipeline/|gcr.io/ml-pipeline-test/' and tag replacement. Also
|
||||
# let the postsubmit tests refer to yaml files.
|
||||
if [ -n "${DATAFLOW_TFT_IMAGE}" ];then
|
||||
kubeflow_training_classification_injection
|
||||
fi
|
||||
|
||||
dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
|
||||
check_result ${TEST_NAME}
|
||||
elif [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
|
||||
if [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then
|
||||
dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml"
|
||||
if [[ -n "${DATAFLOW_TFT_IMAGE}" ]]; then
|
||||
tfx_cab_classification_injection
|
||||
|
|
|
|||
|
|
@ -67,7 +67,6 @@ spec:
|
|||
- name: test-name
|
||||
value: "{{item}}"
|
||||
withItems:
|
||||
- kubeflow_training_classification
|
||||
- tfx_cab_classification
|
||||
- xgboost_training_cm
|
||||
- kubeflow_pipeline_using_TFX_OSS_components
|
||||
|
|
|
|||
Loading…
Reference in New Issue