refactor(components): De-hardcoded local output paths. (#4495)

* Components - De-hardcoded local output paths. * pip install pathlib2 * Added component.yaml changes
2021-06-08 01:13:45 -07:00 · 2021-06-08 01:13:45 -07:00 · cd4aa7d71f
parent 73780697c1
commit cd4aa7d71f
9 changed files with 51 additions and 29 deletions
--- a/components/deprecated/dataflow/predict/component.yaml
+++ b/components/deprecated/dataflow/predict/component.yaml
@ -27,7 +27,6 @@ implementation:
      --project,    {inputValue: GCP project},
      --batchsize,  {inputValue: Batch size},
      --output,     {inputValue: Predictions dir},
+      --prediction-results-uri-pattern-output-path, {outputPath: Predictions dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
    ]
-    fileOutputs:
-      Predictions dir: /output.txt
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
--- a/components/deprecated/dataflow/tfma/component.yaml
+++ b/components/deprecated/dataflow/tfma/component.yaml
@ -28,7 +28,6 @@ implementation:
      --project,  {inputValue: GCP project},
      --slice-columns, {inputValue: Slice columns},
      --output,   {inputValue: Analysis results dir},
+      --output-dir-uri-output-path, {outputPath: Analysis results dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
    ]
-    fileOutputs:
-      Analysis results dir: /output.txt
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
--- a/components/deprecated/dataflow/tft/component.yaml
+++ b/components/deprecated/dataflow/tft/component.yaml
@ -22,6 +22,5 @@ implementation:
      --mode,    {inputValue: Run mode},
      --preprocessing-module, {inputValue: Preprocessing module},
      --output,  {inputValue: Transformed data dir},
+      ---output-dir-uri-output-path, {outputPath: Transformed data dir},
    ]
-    fileOutputs:
-      Transformed data dir: /output.txt
--- a/components/kubeflow/dnntrainer/component.yaml
+++ b/components/kubeflow/dnntrainer/component.yaml
@ -29,7 +29,6 @@ implementation:
      --target, {inputValue: Target},
      --preprocessing-module, {inputValue: Preprocessing module},
      --job-dir, {inputValue: Training output dir},
+      --exported-model-dir-uri-output-path, {outputPath: Training output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
    ]
-    fileOutputs:
-      Training output dir: /output.txt
-      MLPipeline UI metadata:  /mlpipeline-ui-metadata.json
--- a/components/kubeflow/dnntrainer/src/trainer/task.py
+++ b/components/kubeflow/dnntrainer/src/trainer/task.py
@ -16,6 +16,7 @@
 import argparse
 import json
 import os
+from pathlib import Path
 import tensorflow as tf
 import tensorflow_transform as tft
 import tensorflow_model_analysis as tfma
@ -80,6 +81,14 @@ def parse_arguments():
                      required=False,
                      help=('GCS path to a python file defining '
                            '"preprocess" and "get_feature_columns" functions.'))
+  parser.add_argument('--exported-model-dir-uri-output-path',
+                      type=str,
+                      default='/output.txt',
+                      help='Local output path for the file containing exported model directory URI.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')

  args = parser.parse_args()
  args.hidden_layer_size = [int(x.strip()) for x in args.hidden_layer_size.split(',')]
@ -341,11 +350,11 @@ def main():
      'source': args.job_dir,
    }]
  }
-  with open('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))

-  with open('/output.txt', 'w') as f:
-    f.write(args.job_dir)
+  Path(args.exported_model_dir_uri_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.exported_model_dir_uri_output_path).write_text(args.job_dir)

 if __name__ == '__main__':
  main()
--- a/components/local/confusion_matrix/component.yaml
+++ b/components/local/confusion_matrix/component.yaml
@ -15,7 +15,6 @@ implementation:
      --predictions, {inputValue: Predictions},
      --target_lambda, {inputValue: Target lambda},
      --output,      {inputValue: Output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
+      --metrics-output-path, {outputPath: MLPipeline Metrics},
    ]
-    fileOutputs:
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
-      MLPipeline Metrics:     /mlpipeline-metrics.json
--- a/components/local/confusion_matrix/src/confusion_matrix.py
+++ b/components/local/confusion_matrix/src/confusion_matrix.py
@ -27,6 +27,7 @@ import json
 import os
 import urlparse
 import pandas as pd
+from pathlib import Path
 from sklearn.metrics import confusion_matrix, accuracy_score
 from tensorflow.python.lib.io import file_io

@ -39,6 +40,15 @@ def main(argv=None):
                      help='a lambda function as a string to compute target.' +
                           'For example, "lambda x: x[\'a\'] + x[\'b\']"' +
                           'If not set, the input must include a "target" column.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')
+  parser.add_argument('--metrics-output-path',
+                      type=str,
+                      default='/mlpipeline-metrics.json',
+                      help='Local output path for the file containing metrics JSON structure.')
+
  args = parser.parse_args()

  storage_service_scheme = urlparse.urlparse(args.output).scheme
@ -85,8 +95,8 @@ def main(argv=None):
      'labels': list(map(str, vocab)),
    }]
  }
-  with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))

  accuracy = accuracy_score(df['target'], df['predicted'])
  metrics = {
@ -96,8 +106,8 @@ def main(argv=None):
      'format': "PERCENTAGE",
    }]
  }
-  with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
-    json.dump(metrics, f)
+  Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.metrics_output_path).write_text(json.dumps(metrics))

 if __name__== "__main__":
  main()
--- a/components/local/roc/component.yaml
+++ b/components/local/roc/component.yaml
@ -19,7 +19,6 @@ implementation:
      --true_score_column,  {inputValue: True score column},
      --target_lambda,      {inputValue: Target lambda},
      --output,             {inputValue: Output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
+      --metrics-output-path, {outputPath: MLPipeline Metrics},
    ]
-    fileOutputs:
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
-      MLPipeline Metrics:     /mlpipeline-metrics.json
--- a/components/local/roc/src/roc.py
+++ b/components/local/roc/src/roc.py
@ -26,6 +26,7 @@ import json
 import os
 import urlparse
 import pandas as pd
+from pathlib import Path
 from sklearn.metrics import roc_curve, roc_auc_score
 from tensorflow.python.lib.io import file_io

@ -44,6 +45,14 @@ def main(argv=None):
                           'For example, "lambda x: x[\'a\'] and x[\'b\']". If missing, ' +
                           'input must have a "target" column.')
  parser.add_argument('--output', type=str, help='GCS path of the output directory.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')
+  parser.add_argument('--metrics-output-path',
+                      type=str,
+                      default='/mlpipeline-metrics.json',
+                      help='Local output path for the file containing metrics JSON structure.')
  args = parser.parse_args()

  storage_service_scheme = urlparse.urlparse(args.output).scheme
@ -91,8 +100,8 @@ def main(argv=None):
      'source': roc_file
    }]
  }
-  with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))

  metrics = {
    'metrics': [{
@ -100,8 +109,8 @@ def main(argv=None):
      'numberValue':  roc_auc,
    }]
  }
-  with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
-    json.dump(metrics, f)
+  Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.metrics_output_path).write_text(json.dumps(metrics))

 if __name__== "__main__":
  main()