pipelines/components/dataflow/tfma/component.yaml

33 lines
2.3 KiB
YAML

name: TFX - Analyze model
description: |
Runs Tensorflow Model Analysis. https://www.tensorflow.org/tfx/model_analysis/get_started
TensorFlow Model Analysis allows you to perform model evaluations in the TFX pipeline, and view resultant metrics and plots in a Jupyter notebook. Specifically, it can provide:
* metrics computed on entire training and holdout dataset, as well as next-day evaluations
* tracking metrics over time
* model quality performance on different feature slices
inputs:
- {name: Model, type: GCSPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
- {name: Evaluation data, type: GCSPath, description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}}
- {name: Schema, type: GCSPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}}
- {name: Run mode, type: String, default: local, description: whether to run the job locally or in Cloud Dataflow.}
- {name: GCP project, type: GCPProjectID, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'}
- {name: Slice columns, type: String, description: Comma-separated list of columns on which to slice for analysis.}
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}}
outputs:
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}}
implementation:
container:
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:d4960d3379af4735fd04dc7167fab5fff82d0f22
command: [python2, /ml/model_analysis.py]
args: [
--model, {inputValue: Model},
--eval, {inputValue: Evaluation data},
--schema, {inputValue: Schema},
--mode, {inputValue: Run mode},
--project, {inputValue: GCP project},
--slice-columns, {inputValue: Slice columns},
--output, {inputValue: Analysis results dir},
]
fileOutputs:
Analysis results dir: /output.txt