From cd4aa7d71f045b51ffa744bca53d4b239e90196e Mon Sep 17 00:00:00 2001
From: Alexey Volkov <alexey.volkov@ark-kun.com>
Date: Tue, 8 Jun 2021 01:13:45 -0700
Subject: [PATCH] refactor(components): De-hardcoded local output paths.
 (#4495)

* Components - De-hardcoded local output paths.

* pip install pathlib2

* Added component.yaml changes
---
 .../deprecated/dataflow/predict/component.yaml |  5 ++---
 .../deprecated/dataflow/tfma/component.yaml    |  5 ++---
 .../deprecated/dataflow/tft/component.yaml     |  3 +--
 components/kubeflow/dnntrainer/component.yaml  |  5 ++---
 .../kubeflow/dnntrainer/src/trainer/task.py    | 17 +++++++++++++----
 .../local/confusion_matrix/component.yaml      |  5 ++---
 .../confusion_matrix/src/confusion_matrix.py   | 18 ++++++++++++++----
 components/local/roc/component.yaml            |  5 ++---
 components/local/roc/src/roc.py                | 17 +++++++++++++----
 9 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/components/deprecated/dataflow/predict/component.yaml b/components/deprecated/dataflow/predict/component.yaml
index e861e95a14..b15200d4a2 100644
--- a/components/deprecated/dataflow/predict/component.yaml
+++ b/components/deprecated/dataflow/predict/component.yaml
@@ -27,7 +27,6 @@ implementation:
       --project,    {inputValue: GCP project},
       --batchsize,  {inputValue: Batch size},
       --output,     {inputValue: Predictions dir},
+      --prediction-results-uri-pattern-output-path, {outputPath: Predictions dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
     ]
-    fileOutputs:
-      Predictions dir: /output.txt
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/deprecated/dataflow/tfma/component.yaml b/components/deprecated/dataflow/tfma/component.yaml
index c764b5a6c5..cbb1cb321c 100644
--- a/components/deprecated/dataflow/tfma/component.yaml
+++ b/components/deprecated/dataflow/tfma/component.yaml
@@ -28,7 +28,6 @@ implementation:
       --project,  {inputValue: GCP project},
       --slice-columns, {inputValue: Slice columns},
       --output,   {inputValue: Analysis results dir},
+      --output-dir-uri-output-path, {outputPath: Analysis results dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
     ]
-    fileOutputs:
-      Analysis results dir: /output.txt
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/deprecated/dataflow/tft/component.yaml b/components/deprecated/dataflow/tft/component.yaml
index b777b966d9..2fda1259e0 100644
--- a/components/deprecated/dataflow/tft/component.yaml
+++ b/components/deprecated/dataflow/tft/component.yaml
@@ -22,6 +22,5 @@ implementation:
       --mode,    {inputValue: Run mode},
       --preprocessing-module, {inputValue: Preprocessing module},
       --output,  {inputValue: Transformed data dir},
+      ---output-dir-uri-output-path, {outputPath: Transformed data dir},
     ]
-    fileOutputs:
-      Transformed data dir: /output.txt
diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml
index 647f3081cb..ae63a61e66 100644
--- a/components/kubeflow/dnntrainer/component.yaml
+++ b/components/kubeflow/dnntrainer/component.yaml
@@ -29,7 +29,6 @@ implementation:
       --target, {inputValue: Target},
       --preprocessing-module, {inputValue: Preprocessing module},
       --job-dir, {inputValue: Training output dir},
+      --exported-model-dir-uri-output-path, {outputPath: Training output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
     ]
-    fileOutputs:
-      Training output dir: /output.txt
-      MLPipeline UI metadata:  /mlpipeline-ui-metadata.json
diff --git a/components/kubeflow/dnntrainer/src/trainer/task.py b/components/kubeflow/dnntrainer/src/trainer/task.py
index 1176133c0d..76c62fdb19 100644
--- a/components/kubeflow/dnntrainer/src/trainer/task.py
+++ b/components/kubeflow/dnntrainer/src/trainer/task.py
@@ -16,6 +16,7 @@
 import argparse
 import json
 import os
+from pathlib import Path
 import tensorflow as tf
 import tensorflow_transform as tft
 import tensorflow_model_analysis as tfma
@@ -80,6 +81,14 @@ def parse_arguments():
                       required=False,
                       help=('GCS path to a python file defining '
                             '"preprocess" and "get_feature_columns" functions.'))
+  parser.add_argument('--exported-model-dir-uri-output-path',
+                      type=str,
+                      default='/output.txt',
+                      help='Local output path for the file containing exported model directory URI.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')
 
   args = parser.parse_args()
   args.hidden_layer_size = [int(x.strip()) for x in args.hidden_layer_size.split(',')]
@@ -341,11 +350,11 @@ def main():
       'source': args.job_dir,
     }]
   }
-  with open('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))
 
-  with open('/output.txt', 'w') as f:
-    f.write(args.job_dir)
+  Path(args.exported_model_dir_uri_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.exported_model_dir_uri_output_path).write_text(args.job_dir)
 
 if __name__ == '__main__':
   main()
diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml
index f49f3ace65..cc322e5412 100644
--- a/components/local/confusion_matrix/component.yaml
+++ b/components/local/confusion_matrix/component.yaml
@@ -15,7 +15,6 @@ implementation:
       --predictions, {inputValue: Predictions},
       --target_lambda, {inputValue: Target lambda},
       --output,      {inputValue: Output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
+      --metrics-output-path, {outputPath: MLPipeline Metrics},
     ]
-    fileOutputs:
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
-      MLPipeline Metrics:     /mlpipeline-metrics.json
diff --git a/components/local/confusion_matrix/src/confusion_matrix.py b/components/local/confusion_matrix/src/confusion_matrix.py
index f830d445bd..80b4c6e820 100644
--- a/components/local/confusion_matrix/src/confusion_matrix.py
+++ b/components/local/confusion_matrix/src/confusion_matrix.py
@@ -27,6 +27,7 @@ import json
 import os
 import urlparse
 import pandas as pd
+from pathlib import Path
 from sklearn.metrics import confusion_matrix, accuracy_score
 from tensorflow.python.lib.io import file_io
 
@@ -39,6 +40,15 @@ def main(argv=None):
                       help='a lambda function as a string to compute target.' +
                            'For example, "lambda x: x[\'a\'] + x[\'b\']"' +
                            'If not set, the input must include a "target" column.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')
+  parser.add_argument('--metrics-output-path',
+                      type=str,
+                      default='/mlpipeline-metrics.json',
+                      help='Local output path for the file containing metrics JSON structure.')
+
   args = parser.parse_args()
 
   storage_service_scheme = urlparse.urlparse(args.output).scheme
@@ -85,8 +95,8 @@ def main(argv=None):
       'labels': list(map(str, vocab)),
     }]
   }
-  with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))
 
   accuracy = accuracy_score(df['target'], df['predicted'])
   metrics = {
@@ -96,8 +106,8 @@ def main(argv=None):
       'format': "PERCENTAGE",
     }]
   }
-  with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
-    json.dump(metrics, f)
+  Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.metrics_output_path).write_text(json.dumps(metrics))
 
 if __name__== "__main__":
   main()
diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml
index 3df620dd7f..53bcf24446 100644
--- a/components/local/roc/component.yaml
+++ b/components/local/roc/component.yaml
@@ -19,7 +19,6 @@ implementation:
       --true_score_column,  {inputValue: True score column},
       --target_lambda,      {inputValue: Target lambda},
       --output,             {inputValue: Output dir},
+      --ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
+      --metrics-output-path, {outputPath: MLPipeline Metrics},
     ]
-    fileOutputs:
-      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
-      MLPipeline Metrics:     /mlpipeline-metrics.json
diff --git a/components/local/roc/src/roc.py b/components/local/roc/src/roc.py
index d9bf0c9ee9..17e7844be2 100644
--- a/components/local/roc/src/roc.py
+++ b/components/local/roc/src/roc.py
@@ -26,6 +26,7 @@ import json
 import os
 import urlparse
 import pandas as pd
+from pathlib import Path
 from sklearn.metrics import roc_curve, roc_auc_score
 from tensorflow.python.lib.io import file_io
 
@@ -44,6 +45,14 @@ def main(argv=None):
                            'For example, "lambda x: x[\'a\'] and x[\'b\']". If missing, ' +
                            'input must have a "target" column.')
   parser.add_argument('--output', type=str, help='GCS path of the output directory.')
+  parser.add_argument('--ui-metadata-output-path',
+                      type=str,
+                      default='/mlpipeline-ui-metadata.json',
+                      help='Local output path for the file containing UI metadata JSON structure.')
+  parser.add_argument('--metrics-output-path',
+                      type=str,
+                      default='/mlpipeline-metrics.json',
+                      help='Local output path for the file containing metrics JSON structure.')
   args = parser.parse_args()
 
   storage_service_scheme = urlparse.urlparse(args.output).scheme
@@ -91,8 +100,8 @@ def main(argv=None):
       'source': roc_file
     }]
   }
-  with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
+  Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.ui_metadata_output_path).write_text(json.dumps(metadata))
 
   metrics = {
     'metrics': [{
@@ -100,8 +109,8 @@ def main(argv=None):
       'numberValue':  roc_auc,
     }]
   }
-  with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
-    json.dump(metrics, f)
+  Path(args.metrics_output_path).parent.mkdir(parents=True, exist_ok=True)
+  Path(args.metrics_output_path).write_text(json.dumps(metrics))
 
 if __name__== "__main__":
   main()