From cd4aa7d71f045b51ffa744bca53d4b239e90196e Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 8 Jun 2021 01:13:45 -0700 Subject: [PATCH] refactor(components): De-hardcoded local output paths. (#4495) * Components - De-hardcoded local output paths. * pip install pathlib2 * Added component.yaml changes --- .../deprecated/dataflow/predict/component.yaml | 5 ++--- .../deprecated/dataflow/tfma/component.yaml | 5 ++--- .../deprecated/dataflow/tft/component.yaml | 3 +-- components/kubeflow/dnntrainer/component.yaml | 5 ++--- .../kubeflow/dnntrainer/src/trainer/task.py | 17 +++++++++++++---- .../local/confusion_matrix/component.yaml | 5 ++--- .../confusion_matrix/src/confusion_matrix.py | 18 ++++++++++++++---- components/local/roc/component.yaml | 5 ++--- components/local/roc/src/roc.py | 17 +++++++++++++---- 9 files changed, 51 insertions(+), 29 deletions(-) diff --git a/components/deprecated/dataflow/predict/component.yaml b/components/deprecated/dataflow/predict/component.yaml index e861e95a14..b15200d4a2 100644 --- a/components/deprecated/dataflow/predict/component.yaml +++ b/components/deprecated/dataflow/predict/component.yaml @@ -27,7 +27,6 @@ implementation: --project, {inputValue: GCP project}, --batchsize, {inputValue: Batch size}, --output, {inputValue: Predictions dir}, + --prediction-results-uri-pattern-output-path, {outputPath: Predictions dir}, + --ui-metadata-output-path, {outputPath: MLPipeline UI metadata}, ] - fileOutputs: - Predictions dir: /output.txt - MLPipeline UI metadata: /mlpipeline-ui-metadata.json diff --git a/components/deprecated/dataflow/tfma/component.yaml b/components/deprecated/dataflow/tfma/component.yaml index c764b5a6c5..cbb1cb321c 100644 --- a/components/deprecated/dataflow/tfma/component.yaml +++ b/components/deprecated/dataflow/tfma/component.yaml @@ -28,7 +28,6 @@ implementation: --project, {inputValue: GCP project}, --slice-columns, {inputValue: Slice columns}, --output, {inputValue: Analysis results dir}, + --output-dir-uri-output-path, {outputPath: Analysis results dir}, + --ui-metadata-output-path, {outputPath: MLPipeline UI metadata}, ] - fileOutputs: - Analysis results dir: /output.txt - MLPipeline UI metadata: /mlpipeline-ui-metadata.json diff --git a/components/deprecated/dataflow/tft/component.yaml b/components/deprecated/dataflow/tft/component.yaml index b777b966d9..2fda1259e0 100644 --- a/components/deprecated/dataflow/tft/component.yaml +++ b/components/deprecated/dataflow/tft/component.yaml @@ -22,6 +22,5 @@ implementation: --mode, {inputValue: Run mode}, --preprocessing-module, {inputValue: Preprocessing module}, --output, {inputValue: Transformed data dir}, + ---output-dir-uri-output-path, {outputPath: Transformed data dir}, ] - fileOutputs: - Transformed data dir: /output.txt diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml index 647f3081cb..ae63a61e66 100644 --- a/components/kubeflow/dnntrainer/component.yaml +++ b/components/kubeflow/dnntrainer/component.yaml @@ -29,7 +29,6 @@ implementation: --target, {inputValue: Target}, --preprocessing-module, {inputValue: Preprocessing module}, --job-dir, {inputValue: Training output dir}, + --exported-model-dir-uri-output-path, {outputPath: Training output dir}, + --ui-metadata-output-path, {outputPath: MLPipeline UI metadata}, ] - fileOutputs: - Training output dir: /output.txt - MLPipeline UI metadata: /mlpipeline-ui-metadata.json diff --git a/components/kubeflow/dnntrainer/src/trainer/task.py b/components/kubeflow/dnntrainer/src/trainer/task.py index 1176133c0d..76c62fdb19 100644 --- a/components/kubeflow/dnntrainer/src/trainer/task.py +++ b/components/kubeflow/dnntrainer/src/trainer/task.py @@ -16,6 +16,7 @@ import argparse import json import os +from pathlib import Path import tensorflow as tf import tensorflow_transform as tft import tensorflow_model_analysis as tfma @@ -80,6 +81,14 @@ def parse_arguments(): required=False, help=('GCS path to a python file defining ' '"preprocess" and "get_feature_columns" functions.')) + parser.add_argument('--exported-model-dir-uri-output-path', + type=str, + default='/output.txt', + help='Local output path for the file containing exported model directory URI.') + parser.add_argument('--ui-metadata-output-path', + type=str, + default='/mlpipeline-ui-metadata.json', + help='Local output path for the file containing UI metadata JSON structure.') args = parser.parse_args() args.hidden_layer_size = [int(x.strip()) for x in args.hidden_layer_size.split(',')] @@ -341,11 +350,11 @@ def main(): 'source': args.job_dir, }] } - with open('/mlpipeline-ui-metadata.json', 'w') as f: - json.dump(metadata, f) + Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.ui_metadata_output_path).write_text(json.dumps(metadata)) - with open('/output.txt', 'w') as f: - f.write(args.job_dir) + Path(args.exported_model_dir_uri_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.exported_model_dir_uri_output_path).write_text(args.job_dir) if __name__ == '__main__': main() diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml index f49f3ace65..cc322e5412 100644 --- a/components/local/confusion_matrix/component.yaml +++ b/components/local/confusion_matrix/component.yaml @@ -15,7 +15,6 @@ implementation: --predictions, {inputValue: Predictions}, --target_lambda, {inputValue: Target lambda}, --output, {inputValue: Output dir}, + --ui-metadata-output-path, {outputPath: MLPipeline UI metadata}, + --metrics-output-path, {outputPath: MLPipeline Metrics}, ] - fileOutputs: - MLPipeline UI metadata: /mlpipeline-ui-metadata.json - MLPipeline Metrics: /mlpipeline-metrics.json diff --git a/components/local/confusion_matrix/src/confusion_matrix.py b/components/local/confusion_matrix/src/confusion_matrix.py index f830d445bd..80b4c6e820 100644 --- a/components/local/confusion_matrix/src/confusion_matrix.py +++ b/components/local/confusion_matrix/src/confusion_matrix.py @@ -27,6 +27,7 @@ import json import os import urlparse import pandas as pd +from pathlib import Path from sklearn.metrics import confusion_matrix, accuracy_score from tensorflow.python.lib.io import file_io @@ -39,6 +40,15 @@ def main(argv=None): help='a lambda function as a string to compute target.' + 'For example, "lambda x: x[\'a\'] + x[\'b\']"' + 'If not set, the input must include a "target" column.') + parser.add_argument('--ui-metadata-output-path', + type=str, + default='/mlpipeline-ui-metadata.json', + help='Local output path for the file containing UI metadata JSON structure.') + parser.add_argument('--metrics-output-path', + type=str, + default='/mlpipeline-metrics.json', + help='Local output path for the file containing metrics JSON structure.') + args = parser.parse_args() storage_service_scheme = urlparse.urlparse(args.output).scheme @@ -85,8 +95,8 @@ def main(argv=None): 'labels': list(map(str, vocab)), }] } - with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f: - json.dump(metadata, f) + Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.ui_metadata_output_path).write_text(json.dumps(metadata)) accuracy = accuracy_score(df['target'], df['predicted']) metrics = { @@ -96,8 +106,8 @@ def main(argv=None): 'format': "PERCENTAGE", }] } - with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f: - json.dump(metrics, f) + Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.metrics_output_path).write_text(json.dumps(metrics)) if __name__== "__main__": main() diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml index 3df620dd7f..53bcf24446 100644 --- a/components/local/roc/component.yaml +++ b/components/local/roc/component.yaml @@ -19,7 +19,6 @@ implementation: --true_score_column, {inputValue: True score column}, --target_lambda, {inputValue: Target lambda}, --output, {inputValue: Output dir}, + --ui-metadata-output-path, {outputPath: MLPipeline UI metadata}, + --metrics-output-path, {outputPath: MLPipeline Metrics}, ] - fileOutputs: - MLPipeline UI metadata: /mlpipeline-ui-metadata.json - MLPipeline Metrics: /mlpipeline-metrics.json diff --git a/components/local/roc/src/roc.py b/components/local/roc/src/roc.py index d9bf0c9ee9..17e7844be2 100644 --- a/components/local/roc/src/roc.py +++ b/components/local/roc/src/roc.py @@ -26,6 +26,7 @@ import json import os import urlparse import pandas as pd +from pathlib import Path from sklearn.metrics import roc_curve, roc_auc_score from tensorflow.python.lib.io import file_io @@ -44,6 +45,14 @@ def main(argv=None): 'For example, "lambda x: x[\'a\'] and x[\'b\']". If missing, ' + 'input must have a "target" column.') parser.add_argument('--output', type=str, help='GCS path of the output directory.') + parser.add_argument('--ui-metadata-output-path', + type=str, + default='/mlpipeline-ui-metadata.json', + help='Local output path for the file containing UI metadata JSON structure.') + parser.add_argument('--metrics-output-path', + type=str, + default='/mlpipeline-metrics.json', + help='Local output path for the file containing metrics JSON structure.') args = parser.parse_args() storage_service_scheme = urlparse.urlparse(args.output).scheme @@ -91,8 +100,8 @@ def main(argv=None): 'source': roc_file }] } - with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f: - json.dump(metadata, f) + Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.ui_metadata_output_path).write_text(json.dumps(metadata)) metrics = { 'metrics': [{ @@ -100,8 +109,8 @@ def main(argv=None): 'numberValue': roc_auc, }] } - with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f: - json.dump(metrics, f) + Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True) + Path(args.metrics_output_path).write_text(json.dumps(metrics)) if __name__== "__main__": main()