pipelines/components/gcp/ml_engine/train/component.yaml

# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Submitting a Cloud ML training job as a pipeline step
description: |
  A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML)
  Engine training job as a step in a pipeline.
metadata:
  labels:
    add-pod-env: 'true'
inputs:
  - name: project_id
    description: 'Required. The ID of the parent project of the job.'
    type: GCPProjectID
  - name: python_module
    description: 'The Python module name to run after installing the packages.'
    default: ''
    type: String
  - name: package_uris
    description: >-
      The Cloud Storage location of the packages (that contain the training program
      and any additional dependencies). The maximum number of package URIs is 100.
    default: ''
    type: List
  - name: region
    description: 'The Compute Engine region in which the training job is run.'
    default: ''
    type: GCPRegion
  - name: args
    description: 'The command line arguments to pass to the program.'
    default: ''
    type: List
  - name: job_dir
    description: >-
      A Cloud Storage path in which to store the training outputs and other data
      needed for training. This path is passed to your TensorFlow program as the
      `job-dir` command-line argument. The benefit of specifying this field is
      that Cloud ML validates the path for use in training.
    default: ''
    type: GCSPath
  - name: python_version
    description: >-
      The version of Python used in training. If not set, the default
      version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4`
      and above.
    default: ''
    type: String
  - name: runtime_version
    description: >-
      The Cloud ML Engine runtime version to use for training. If not set,
      Cloud ML Engine uses the default stable version, 1.0.
    default: ''
    type: String
  - name: master_image_uri
    description: >-
      The Docker image to run on the master replica. This image must be in
      Container Registry.
    default: ''
    type: GCRPath
  - name: worker_image_uri
    description: >-
      The Docker image to run on the worker replica. This image must be in
      Container Registry.
    default: ''
    type: GCRPath
  - name: training_input
    description: >-
      The input parameters to create a training job. It is the JSON payload
      of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput)
    default: ''
    type: Dict
  - name: job_id_prefix
    description: 'The prefix of the generated job id.'
    default: ''
    type: String
  - name: job_id
    description: >-
        The ID of the job to create, takes precedence over generated
        job id if set.
    default: ''
    type: String
  - name: wait_interval
    description: >-
      Optional. A time-interval to wait for between calls to get the job status.
      Defaults to 30.'
    default: '30'
    type: Integer
outputs:
  - name: job_id
    description: 'The ID of the created job.'
    type: String
  - name: job_dir
    description: >-
      The output path in Cloud Storage of the trainning job, which contains
      the trained model files.
    type: GCSPath
  - name: MLPipeline UI metadata
    type: UI metadata
implementation:
  container:
    image: gcr.io/ml-pipeline/ml-pipeline-gcp:1.4.0-rc.1
    args: [
      --ui_metadata_path, {outputPath: MLPipeline UI metadata},
      kfp_component.google.ml_engine, train,
      --project_id, {inputValue: project_id},
      --python_module, {inputValue: python_module},
      --package_uris, {inputValue: package_uris},
      --region, {inputValue: region},
      --args, {inputValue: args},
      --job_dir, {inputValue: job_dir},
      --python_version, {inputValue: python_version},
      --runtime_version, {inputValue: runtime_version},
      --master_image_uri, {inputValue: master_image_uri},
      --worker_image_uri, {inputValue: worker_image_uri},
      --training_input, {inputValue: training_input},
      --job_id_prefix, {inputValue: job_id_prefix},
      --job_id, {inputValue: job_id},
      --wait_interval, {inputValue: wait_interval},
      --job_id_output_path, {outputPath: job_id},
      --job_dir_output_path, {outputPath: job_dir},
    ]
    env:
      KFP_POD_NAME: "{{pod.name}}"