135 lines
4.7 KiB
YAML
135 lines
4.7 KiB
YAML
# Copyright 2018 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
name: Submitting a Cloud ML training job as a pipeline step
|
|
description: |
|
|
A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML)
|
|
Engine training job as a step in a pipeline.
|
|
metadata:
|
|
labels:
|
|
add-pod-env: 'true'
|
|
inputs:
|
|
- name: project_id
|
|
description: 'Required. The ID of the parent project of the job.'
|
|
type: GCPProjectID
|
|
- name: python_module
|
|
description: 'The Python module name to run after installing the packages.'
|
|
default: ''
|
|
type: String
|
|
- name: package_uris
|
|
description: >-
|
|
The Cloud Storage location of the packages (that contain the training program
|
|
and any additional dependencies). The maximum number of package URIs is 100.
|
|
default: ''
|
|
type: List
|
|
- name: region
|
|
description: 'The Compute Engine region in which the training job is run.'
|
|
default: ''
|
|
type: GCPRegion
|
|
- name: args
|
|
description: 'The command line arguments to pass to the program.'
|
|
default: ''
|
|
type: List
|
|
- name: job_dir
|
|
description: >-
|
|
A Cloud Storage path in which to store the training outputs and other data
|
|
needed for training. This path is passed to your TensorFlow program as the
|
|
`job-dir` command-line argument. The benefit of specifying this field is
|
|
that Cloud ML validates the path for use in training.
|
|
default: ''
|
|
type: GCSPath
|
|
- name: python_version
|
|
description: >-
|
|
The version of Python used in training. If not set, the default
|
|
version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4`
|
|
and above.
|
|
default: ''
|
|
type: String
|
|
- name: runtime_version
|
|
description: >-
|
|
The Cloud ML Engine runtime version to use for training. If not set,
|
|
Cloud ML Engine uses the default stable version, 1.0.
|
|
default: ''
|
|
type: String
|
|
- name: master_image_uri
|
|
description: >-
|
|
The Docker image to run on the master replica. This image must be in
|
|
Container Registry.
|
|
default: ''
|
|
type: GCRPath
|
|
- name: worker_image_uri
|
|
description: >-
|
|
The Docker image to run on the worker replica. This image must be in
|
|
Container Registry.
|
|
default: ''
|
|
type: GCRPath
|
|
- name: training_input
|
|
description: >-
|
|
The input parameters to create a training job. It is the JSON payload
|
|
of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput)
|
|
default: ''
|
|
type: Dict
|
|
- name: job_id_prefix
|
|
description: 'The prefix of the generated job id.'
|
|
default: ''
|
|
type: String
|
|
- name: job_id
|
|
description: >-
|
|
The ID of the job to create, takes precedence over generated
|
|
job id if set.
|
|
default: ''
|
|
type: String
|
|
- name: wait_interval
|
|
description: >-
|
|
Optional. A time-interval to wait for between calls to get the job status.
|
|
Defaults to 30.'
|
|
default: '30'
|
|
type: Integer
|
|
outputs:
|
|
- name: job_id
|
|
description: 'The ID of the created job.'
|
|
type: String
|
|
- name: job_dir
|
|
description: >-
|
|
The output path in Cloud Storage of the trainning job, which contains
|
|
the trained model files.
|
|
type: GCSPath
|
|
- name: MLPipeline UI metadata
|
|
type: UI metadata
|
|
implementation:
|
|
container:
|
|
image: gcr.io/ml-pipeline/ml-pipeline-gcp:1.4.0-rc.1
|
|
args: [
|
|
--ui_metadata_path, {outputPath: MLPipeline UI metadata},
|
|
kfp_component.google.ml_engine, train,
|
|
--project_id, {inputValue: project_id},
|
|
--python_module, {inputValue: python_module},
|
|
--package_uris, {inputValue: package_uris},
|
|
--region, {inputValue: region},
|
|
--args, {inputValue: args},
|
|
--job_dir, {inputValue: job_dir},
|
|
--python_version, {inputValue: python_version},
|
|
--runtime_version, {inputValue: runtime_version},
|
|
--master_image_uri, {inputValue: master_image_uri},
|
|
--worker_image_uri, {inputValue: worker_image_uri},
|
|
--training_input, {inputValue: training_input},
|
|
--job_id_prefix, {inputValue: job_id_prefix},
|
|
--job_id, {inputValue: job_id},
|
|
--wait_interval, {inputValue: wait_interval},
|
|
--job_id_output_path, {outputPath: job_id},
|
|
--job_dir_output_path, {outputPath: job_dir},
|
|
]
|
|
env:
|
|
KFP_POD_NAME: "{{pod.name}}"
|