[AWS SageMaker] Specify component input types (#3683)

* Replace all string types with Python types

* Update HPO yaml

* Update Batch YAML

* Update Deploy YAML

* Update GroundTruth YAML

* Update Model YAML

* Update Train YAML

* Update WorkTeam YAML

* Updated samples to remove strings

* Update to temporary image

* Remove unnecessary imports

* Update image to newer image

* Update components to python3

* Update bool parser type

* Remove empty ContentType in samples

* Update to temporary image

* Update to version 0.3.1

* Update deploy to login

* Update deploy load config path

* Fix export environment variable in deploy

* Fix env name

* Update deploy reflow env paths

* Add debug config line

* Use username and password directly

* Updated to 0.3.1

* Update field types to JsonObject and JsonArray
This commit is contained in:
Nicholas Thomson 2020-05-11 22:06:21 -07:00 committed by GitHub
parent b9aa106bb5
commit bd8c1ddd38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 397 additions and 255 deletions

View File

@ -1,4 +1,4 @@
** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.0 -- ** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.1 --
https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker
Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
** boto3; version 1.12.33 -- https://github.com/boto/boto3/ ** boto3; version 1.12.33 -- https://github.com/boto/boto3/

View File

@ -4,78 +4,102 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region where the cluster launches.' description: 'The region where the cluster launches.'
type: String
- name: job_name - name: job_name
description: 'The name of the batch transform job.' description: 'The name of the batch transform job.'
default: '' default: ''
type: String
- name: model_name - name: model_name
description: 'The name of the model that you want to use for the transform job.' description: 'The name of the model that you want to use for the transform job.'
type: String
- name: max_concurrent - name: max_concurrent
description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.' description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.'
default: '0' default: '0'
type: Integer
- name: max_payload - name: max_payload
description: 'The maximum allowed size of the payload, in MB.' description: 'The maximum allowed size of the payload, in MB.'
default: '6' default: '6'
type: Integer
- name: batch_strategy - name: batch_strategy
description: 'The number of records to include in a mini-batch for an HTTP inference request.' description: 'The number of records to include in a mini-batch for an HTTP inference request.'
default: '' default: ''
type: String
- name: environment - name: environment
description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.' description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
default: '{}' default: '{}'
type: JsonObject
- name: input_location - name: input_location
description: 'The S3 location of the data source that is associated with a channel.' description: 'The S3 location of the data source that is associated with a channel.'
type: String
- name: data_type - name: data_type
description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.' description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.'
default: 'S3Prefix' default: 'S3Prefix'
type: String
- name: content_type - name: content_type
description: 'The multipurpose internet mail extension (MIME) type of the data.' description: 'The multipurpose internet mail extension (MIME) type of the data.'
default: '' default: ''
type: String
- name: split_type - name: split_type
description: 'The method to use to split the transform job data files into smaller batches.' description: 'The method to use to split the transform job data files into smaller batches.'
default: 'None' default: 'None'
type: String
- name: compression_type - name: compression_type
description: 'If the transform data is compressed, the specification of the compression type.' description: 'If the transform data is compressed, the specification of the compression type.'
default: 'None' default: 'None'
type: String
- name: output_location - name: output_location
description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.' description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.'
type: String
- name: accept - name: accept
description: 'The MIME type used to specify the output data.' description: 'The MIME type used to specify the output data.'
default: '' default: ''
type: String
- name: assemble_with - name: assemble_with
description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.' description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.'
default: '' default: ''
type: String
- name: output_encryption_key - name: output_encryption_key
description: 'The AWS Key Management Service ID of the key used to encrypt the output data.' description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
default: '' default: ''
type: String
- name: input_filter - name: input_filter
description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.' description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.'
default: '' default: ''
type: String
- name: output_filter - name: output_filter
description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.' description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.'
default: '' default: ''
type: String
- name: join_source - name: join_source
description: 'Specifies the source of the data to join with the transformed data.' description: 'Specifies the source of the data to join with the transformed data.'
default: 'None' default: 'None'
type: String
- name: instance_type - name: instance_type
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: instance_count - name: instance_count
description: 'The number of ML compute instances to use in each training job.' description: 'The number of ML compute instances to use in each training job.'
default: '1' default: '1'
type: Integer
- name: resource_encryption_key - name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
default: '' default: ''
type: String
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: output_location, description: 'S3 URI of the transform job results.'} - {name: output_location, description: 'S3 URI of the transform job results.'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
batch_transform.py, batch_transform.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -26,31 +26,31 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job') parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the transform job.', default='') parser.add_argument('--job_name', type=str, required=False, help='The name of the transform job.', default='')
parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the model that you want to use for the transform job.') parser.add_argument('--model_name', type=str, required=True, help='The name of the model that you want to use for the transform job.')
parser.add_argument('--max_concurrent', type=_utils.str_to_int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0') parser.add_argument('--max_concurrent', type=int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0')
parser.add_argument('--max_payload', type=_utils.str_to_int, required=False, help='The maximum allowed size of the payload, in MB.', default='6') parser.add_argument('--max_payload', type=int, required=False, help='The maximum allowed size of the payload, in MB.', default='6')
parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str.strip, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='') parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='')
parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
parser.add_argument('--input_location', type=str.strip, required=True, help='The S3 location of the data source that is associated with a channel.') parser.add_argument('--input_location', type=str, required=True, help='The S3 location of the data source that is associated with a channel.')
parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str.strip, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix') parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix')
parser.add_argument('--content_type', type=str.strip, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='') parser.add_argument('--content_type', type=str, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='')
parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str.strip, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None') parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None')
parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str.strip, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None') parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None')
parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
parser.add_argument('--accept', type=str.strip, required=False, help='The MIME type used to specify the output data.') parser.add_argument('--accept', type=str, required=False, help='The MIME type used to specify the output data.')
parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str.strip, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.') parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.')
parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
parser.add_argument('--input_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='') parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
parser.add_argument('--output_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='') parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str.strip, required=False, help='Specifies the source of the data to join with the transformed data.', default='None') parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in the transform job.') parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.')
parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
parser.add_argument('--output_location_file', type=str.strip, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.') parser.add_argument('--output_location_file', type=str, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.')
return parser return parser

View File

@ -1,10 +1,10 @@
version: 0.2 version: 0.2
phases: phases:
pre_build: pre_build:
commands: commands:
# Log in to Dockerhub # Log in to Dockerhub
- mkdir -p ~/.docker - docker login -u $DOCKER_CONFIG_USERNAME -p $DOCKER_CONFIG_PASSWORD
- echo $DOCKER_CONFIG > ~/.docker/config.json
build: build:
commands: commands:

View File

@ -5,6 +5,7 @@ set -e
REMOTE_REPOSITORY="amazon/aws-sagemaker-kfp-components" REMOTE_REPOSITORY="amazon/aws-sagemaker-kfp-components"
DRYRUN="true" DRYRUN="true"
FULL_VERSION_TAG="" FULL_VERSION_TAG=""
DOCKER_CONFIG_PATH=${DOCKER_CONFIG_PATH:-"/root/.docker"}
while getopts ":d:v:" opt; do while getopts ":d:v:" opt; do
case ${opt} in case ${opt} in
@ -64,13 +65,13 @@ echo "Tagged image with ${MAJOR_VERSION_IMAGE}"
# Push to the remote repository # Push to the remote repository
if [ "${DRYRUN}" == "false" ]; then if [ "${DRYRUN}" == "false" ]; then
docker push "${FULL_VERSION_IMAGE}" docker --config "$DOCKER_CONFIG_PATH" push "${FULL_VERSION_IMAGE}"
echo "Successfully pushed tag ${FULL_VERSION_IMAGE} to Docker Hub" echo "Successfully pushed tag ${FULL_VERSION_IMAGE} to Docker Hub"
docker push "${MINOR_VERSION_IMAGE}" docker --config "$DOCKER_CONFIG_PATH" push "${MINOR_VERSION_IMAGE}"
echo "Successfully pushed tag ${MINOR_VERSION_IMAGE} to Docker Hub" echo "Successfully pushed tag ${MINOR_VERSION_IMAGE} to Docker Hub"
docker push "${MAJOR_VERSION_IMAGE}" docker --config "$DOCKER_CONFIG_PATH" push "${MAJOR_VERSION_IMAGE}"
echo "Successfully pushed tag ${MAJOR_VERSION_IMAGE} to Docker Hub" echo "Successfully pushed tag ${MAJOR_VERSION_IMAGE} to Docker Hub"
else else
echo "Dry run detected. Not pushing images." echo "Dry run detected. Not pushing images."

View File

@ -13,6 +13,7 @@
import os import os
import argparse import argparse
from time import gmtime, strftime from time import gmtime, strftime
from distutils.util import strtobool
import time import time
import string import string
import random import random
@ -63,7 +64,7 @@ def nullable_string_argument(value):
def add_default_client_arguments(parser): def add_default_client_arguments(parser):
parser.add_argument('--region', type=str.strip, required=True, help='The region where the training job launches.') parser.add_argument('--region', type=str, required=True, help='The region where the training job launches.')
parser.add_argument('--endpoint_url', type=nullable_string_argument, required=False, help='The URL to use when communicating with the Sagemaker service.') parser.add_argument('--endpoint_url', type=nullable_string_argument, required=False, help='The URL to use when communicating with the Sagemaker service.')
@ -71,7 +72,7 @@ def get_component_version():
"""Get component version from the first line of License file""" """Get component version from the first line of License file"""
component_version = 'NULL' component_version = 'NULL'
with open('/THIRD-PARTY-LICENSES.txt', 'r') as license_file: with open('THIRD-PARTY-LICENSES.txt', 'r') as license_file:
version_match = re.search('Amazon SageMaker Components for Kubeflow Pipelines; version (([0-9]+[.])+[0-9]+)', version_match = re.search('Amazon SageMaker Components for Kubeflow Pipelines; version (([0-9]+[.])+[0-9]+)',
license_file.readline()) license_file.readline())
if version_match is not None: if version_match is not None:
@ -858,35 +859,15 @@ def enable_spot_instance_support(training_job_config, args):
def id_generator(size=4, chars=string.ascii_uppercase + string.digits): def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size)) return ''.join(random.choice(chars) for _ in range(size))
def yaml_or_json_str(str):
if str == "" or str == None:
return None
try:
return json.loads(str)
except:
return yaml.safe_load(str)
def str_to_bool(s): def str_to_bool(str):
if s.lower().strip() == 'true': # This distutils function returns an integer representation of the boolean
return True # rather than a True/False value. This simply hard casts it.
elif s.lower().strip() == 'false': return bool(strtobool(str))
return False
else:
raise argparse.ArgumentTypeError('"True" or "False" expected.')
def str_to_int(s):
if s:
return int(s)
else:
return 0
def str_to_float(s):
if s:
return float(s)
else:
return 0.0
def str_to_json_dict(s):
if s != '':
return json.loads(s)
else:
return {}
def str_to_json_list(s):
if s != '':
return json.loads(s)
else:
return []

View File

@ -4,83 +4,108 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region to deploy your model endpoints.' description: 'The region to deploy your model endpoints.'
type: String
- name: endpoint_config_name - name: endpoint_config_name
description: 'The name of the endpoint configuration.' description: 'The name of the endpoint configuration.'
default: '' default: ''
type: String
- name: variant_name_1 - name: variant_name_1
description: 'The name of the production variant.' description: 'The name of the production variant.'
default: 'variant-name-1' default: 'variant-name-1'
type: String
- name: model_name_1 - name: model_name_1
description: 'The model name used for endpoint deployment.' description: 'The model name used for endpoint deployment.'
type: String
- name: initial_instance_count_1 - name: initial_instance_count_1
description: 'Number of instances to launch initially.' description: 'Number of instances to launch initially.'
default: '1' default: '1'
type: Integer
- name: instance_type_1 - name: instance_type_1
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: initial_variant_weight_1 - name: initial_variant_weight_1
description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
default: '1.0' default: '1.0'
type: Float
- name: accelerator_type_1 - name: accelerator_type_1
description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
default: '' default: ''
type: String
- name: variant_name_2 - name: variant_name_2
description: 'The name of the production variant.' description: 'The name of the production variant.'
default: 'variant-name-2' default: 'variant-name-2'
type: String
- name: model_name_2 - name: model_name_2
description: 'The model name used for endpoint deployment.' description: 'The model name used for endpoint deployment.'
default: '' default: ''
type: String
- name: initial_instance_count_2 - name: initial_instance_count_2
description: 'Number of instances to launch initially.' description: 'Number of instances to launch initially.'
default: '1' default: '1'
type: Integer
- name: instance_type_2 - name: instance_type_2
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: initial_variant_weight_2 - name: initial_variant_weight_2
description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
default: '1.0' default: '1.0'
type: Float
- name: accelerator_type_2 - name: accelerator_type_2
description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
default: '' default: ''
type: String
- name: variant_name_3 - name: variant_name_3
description: 'The name of the production variant.' description: 'The name of the production variant.'
default: 'variant-name-3' default: 'variant-name-3'
type: String
- name: model_name_3 - name: model_name_3
description: 'The model name used for endpoint deployment' description: 'The model name used for endpoint deployment'
default: '' default: ''
type: String
- name: initial_instance_count_3 - name: initial_instance_count_3
description: 'Number of instances to launch initially.' description: 'Number of instances to launch initially.'
default: '1' default: '1'
type: Integer
- name: instance_type_3 - name: instance_type_3
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: initial_variant_weight_3 - name: initial_variant_weight_3
description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
default: '1.0' default: '1.0'
type: Float
- name: accelerator_type_3 - name: accelerator_type_3
description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
default: '' default: ''
type: String
- name: resource_encryption_key - name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.'
default: '' default: ''
type: String
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: endpoint_config_tags - name: endpoint_config_tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
- name: endpoint_name - name: endpoint_name
description: 'The name of the endpoint.' description: 'The name of the endpoint.'
default: '' default: ''
type: String
- name: endpoint_tags - name: endpoint_tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: endpoint_name, description: 'Endpoint name'} - {name: endpoint_name, description: 'Endpoint name'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
deploy.py, deploy.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -19,36 +19,36 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Training Job') parser = argparse.ArgumentParser(description='SageMaker Training Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--endpoint_config_name', type=str.strip, required=False, help='The name of the endpoint configuration.', default='') parser.add_argument('--endpoint_config_name', type=str, required=False, help='The name of the endpoint configuration.', default='')
parser.add_argument('--variant_name_1', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-1') parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1')
parser.add_argument('--model_name_1', type=str.strip, required=True, help='The model name used for endpoint deployment.') parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.')
parser.add_argument('--initial_instance_count_1', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_1', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--variant_name_2', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-2') parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2')
parser.add_argument('--model_name_2', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='')
parser.add_argument('--initial_instance_count_2', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_2', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--variant_name_3', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-3') parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3')
parser.add_argument('--model_name_3', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='')
parser.add_argument('--initial_instance_count_3', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_3', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--endpoint_config_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
parser.add_argument('--endpoint_name', type=str.strip, required=False, help='The name of the endpoint.', default='') parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='')
parser.add_argument('--endpoint_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -4,92 +4,123 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region where the cluster launches.' description: 'The region where the cluster launches.'
type: String
- name: role - name: role
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
type: String
- name: job_name - name: job_name
description: 'The name of the labeling job.' description: 'The name of the labeling job.'
type: String
- name: label_attribute_name - name: label_attribute_name
description: 'The attribute name to use for the label in the output manifest file. Default is the job name.' description: 'The attribute name to use for the label in the output manifest file. Default is the job name.'
default: '' default: ''
type: String
- name: manifest_location - name: manifest_location
description: 'The Amazon S3 location of the manifest file that describes the input data objects.' description: 'The Amazon S3 location of the manifest file that describes the input data objects.'
type: String
- name: output_location - name: output_location
description: 'The Amazon S3 location to write output data.' description: 'The Amazon S3 location to write output data.'
type: String
- name: output_encryption_key - name: output_encryption_key
description: 'The AWS Key Management Service ID of the key used to encrypt the output data.' description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
default: '' default: ''
type: String
- name: task_type - name: task_type
description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.' description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.'
type: String
- name: worker_type - name: worker_type
description: 'The workteam for data labeling, either public, private, or vendor.' description: 'The workteam for data labeling, either public, private, or vendor.'
type: String
- name: workteam_arn - name: workteam_arn
description: 'The ARN of the work team assigned to complete the tasks.' description: 'The ARN of the work team assigned to complete the tasks.'
default: '' default: ''
type: String
- name: no_adult_content - name: no_adult_content
description: 'If true, your data is free of adult content.' description: 'If true, your data is free of adult content.'
default: 'False' default: 'False'
type: Bool
- name: no_ppi - name: no_ppi
description: 'If true, your data is free of personally identifiable information.' description: 'If true, your data is free of personally identifiable information.'
default: 'False' default: 'False'
type: Bool
- name: label_category_config - name: label_category_config
description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.' description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.'
default: '' default: ''
type: String
- name: max_human_labeled_objects - name: max_human_labeled_objects
description: 'The maximum number of objects that can be labeled by human workers.' description: 'The maximum number of objects that can be labeled by human workers.'
default: '' default: ''
type: Integer
- name: max_percent_objects - name: max_percent_objects
description: 'The maximum number of input data objects that should be labeled.' description: 'The maximum number of input data objects that should be labeled.'
default: '' default: ''
type: Integer
- name: enable_auto_labeling - name: enable_auto_labeling
description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.' description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.'
default: 'False' default: 'False'
type: Bool
- name: initial_model_arn - name: initial_model_arn
description: 'The ARN of the final model used for a previous auto-labeling job.' description: 'The ARN of the final model used for a previous auto-labeling job.'
default: '' default: ''
type: String
- name: resource_encryption_key - name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
default: '' default: ''
type: String
- name: ui_template - name: ui_template
description: 'The Amazon S3 bucket location of the UI template.' description: 'The Amazon S3 bucket location of the UI template.'
type: String
- name: pre_human_task_function - name: pre_human_task_function
description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.' description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.'
default: '' default: ''
type: String
- name: post_human_task_function - name: post_human_task_function
description: 'The ARN of a Lambda function implements the logic for annotation consolidation.' description: 'The ARN of a Lambda function implements the logic for annotation consolidation.'
default: '' default: ''
type: String
- name: task_keywords - name: task_keywords
description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.' description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.'
default: '' default: ''
type: String
- name: title - name: title
description: 'A title for the task for your human workers.' description: 'A title for the task for your human workers.'
type: String
- name: description - name: description
description: 'A description of the task for your human workers.' description: 'A description of the task for your human workers.'
type: String
- name: num_workers_per_object - name: num_workers_per_object
description: 'The number of human workers that will label an object.' description: 'The number of human workers that will label an object.'
type: Integer
- name: time_limit - name: time_limit
description: 'The amount of time that a worker has to complete a task in seconds' description: 'The amount of time that a worker has to complete a task in seconds'
type: Integer
- name: task_availibility - name: task_availibility
description: 'The length of time that a task remains available for labeling by human workers.' description: 'The length of time that a task remains available for labeling by human workers.'
default: '' default: ''
type: Integer
- name: max_concurrent_tasks - name: max_concurrent_tasks
description: 'The maximum number of data objects that can be labeled by human workers at the same time.' description: 'The maximum number of data objects that can be labeled by human workers at the same time.'
default: '' default: ''
type: Integer
- name: workforce_task_price - name: workforce_task_price
description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".' description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".'
default: '0.000' default: '0.000'
type: Float
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: output_manifest_location, description: 'The Amazon S3 bucket location of the manifest file for labeled data.'} - {name: output_manifest_location, description: 'The Amazon S3 bucket location of the manifest file for labeled data.'}
- {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'} - {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
ground_truth.py, ground_truth.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -19,35 +19,35 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job') parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.') parser.add_argument('--job_name', type=str, required=True, help='The name of the labeling job.')
parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='') parser.add_argument('--label_attribute_name', type=str, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.') parser.add_argument('--manifest_location', type=str, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.') parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 location to write output data.')
parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.') parser.add_argument('--task_type', type=str, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.') parser.add_argument('--worker_type', type=str, required=True, help='The workteam for data labeling, either public, private, or vendor.')
parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.') parser.add_argument('--workteam_arn', type=str, required=False, help='The ARN of the work team assigned to complete the tasks.')
parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False') parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False')
parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False') parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False')
parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='') parser.add_argument('--label_category_config', type=str, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0) parser.add_argument('--max_human_labeled_objects', type=int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0) parser.add_argument('--max_percent_objects', type=int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False) parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False)
parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='') parser.add_argument('--initial_model_arn', type=str, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.') parser.add_argument('--ui_template', type=str, required=True, help='The Amazon S3 bucket location of the UI template.')
parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='') parser.add_argument('--pre_human_task_function', type=str, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='') parser.add_argument('--post_human_task_function', type=str, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='') parser.add_argument('--task_keywords', type=str, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.') parser.add_argument('--title', type=str, required=True, help='A title for the task for your human workers.')
parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.') parser.add_argument('--description', type=str, required=True, help='A description of the task for your human workers.')
parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.') parser.add_argument('--num_workers_per_object', type=int, required=True, help='The number of human workers that will label an object.')
parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds') parser.add_argument('--time_limit', type=int, required=True, help='The amount of time that a worker has to complete a task in seconds')
parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0) parser.add_argument('--task_availibility', type=int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0) parser.add_argument('--max_concurrent_tasks', type=int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000) parser.add_argument('--workforce_task_price', type=float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -7,101 +7,136 @@ inputs:
- name: job_name - name: job_name
description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.' description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.'
default: '' default: ''
type: String
- name: role - name: role
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
type: String
- name: image - name: image
description: 'The registry path of the Docker image that contains the training algorithm.' description: 'The registry path of the Docker image that contains the training algorithm.'
default: '' default: ''
type: String
- name: algorithm_name - name: algorithm_name
description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.' description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.'
default: '' default: ''
type: String
- name: training_input_mode - name: training_input_mode
description: 'The input mode that the algorithm supports. File or Pipe.' description: 'The input mode that the algorithm supports. File or Pipe.'
default: 'File' default: 'File'
type: String
- name: metric_definitions - name: metric_definitions
description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.' description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
default: '{}' default: '{}'
type: JsonObject
- name: strategy - name: strategy
description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.' description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.'
default: 'Bayesian' default: 'Bayesian'
type: String
- name: metric_name - name: metric_name
description: 'The name of the metric to use for the objective metric.' description: 'The name of the metric to use for the objective metric.'
type: String
- name: metric_type - name: metric_type
description: 'Whether to minimize or maximize the objective metric.' description: 'Whether to minimize or maximize the objective metric.'
type: String
- name: early_stopping_type - name: early_stopping_type
description: 'Whether to use early stopping for training jobs launched by the tuning job.' description: 'Whether to use early stopping for training jobs launched by the tuning job.'
default: 'Off' default: 'Off'
type: String
- name: static_parameters - name: static_parameters
description: 'The values of hyperparameters that do not change for the tuning job.' description: 'The values of hyperparameters that do not change for the tuning job.'
default: '{}' default: '{}'
type: JsonObject
- name: integer_parameters - name: integer_parameters
description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.' description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.'
default: '[]' default: '[]'
type: JsonArray
- name: continuous_parameters - name: continuous_parameters
description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.' description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.'
default: '[]' default: '[]'
type: JsonObject
- name: categorical_parameters - name: categorical_parameters
description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.' description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.'
default: '[]' default: '[]'
type: JsonArray
- name: channels - name: channels
description: 'A list of dicts specifying the input channels. Must have at least one.' description: 'A list of dicts specifying the input channels. Must have at least one.'
type: JsonArray
- name: output_location - name: output_location
description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.' description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.'
type: String
- name: output_encryption_key - name: output_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
default: '' default: ''
type: String
- name: instance_type - name: instance_type
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: instance_count - name: instance_count
description: 'The number of ML compute instances to use in each training job.' description: 'The number of ML compute instances to use in each training job.'
default: '1' default: '1'
type: Integer
- name: volume_size - name: volume_size
description: 'The size of the ML storage volume that you want to provision.' description: 'The size of the ML storage volume that you want to provision.'
default: '30' default: '30'
type: Integer
- name: max_num_jobs - name: max_num_jobs
description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.' description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.'
type: Integer
- name: max_parallel_jobs - name: max_parallel_jobs
description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.' description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.'
type: Integer
- name: max_run_time - name: max_run_time
description: 'The maximum run time in seconds per training job.' description: 'The maximum run time in seconds per training job.'
default: '86400' default: '86400'
type: Integer
- name: resource_encryption_key - name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
default: '' default: ''
type: String
- name: vpc_security_group_ids - name: vpc_security_group_ids
description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
default: '' default: ''
type: String
- name: vpc_subnets - name: vpc_subnets
description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
default: '' default: ''
type: String
- name: network_isolation - name: network_isolation
description: 'Isolates the training container.' description: 'Isolates the training container.'
default: 'True' default: 'True'
type: Bool
- name: traffic_encryption - name: traffic_encryption
description: 'Encrypts all communications between ML compute instances in distributed training.' description: 'Encrypts all communications between ML compute instances in distributed training.'
default: 'False' default: 'False'
type: Bool
- name: spot_instance - name: spot_instance
description: 'Use managed spot training.' description: 'Use managed spot training.'
default: 'False' default: 'False'
type: Bool
- name: max_wait_time - name: max_wait_time
description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.' description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
default: '86400' default: '86400'
type: Integer
- name: checkpoint_config - name: checkpoint_config
description: 'Dictionary of information about the output location for managed spot training checkpoint data.' description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
default: '{}' default: '{}'
type: JsonObject
- name: warm_start_type - name: warm_start_type
description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"' description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"'
default: '' default: ''
type: String
- name: parent_hpo_jobs - name: parent_hpo_jobs
description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.' description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.'
default: '' default: ''
type: String
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs, to categorize AWS resources.' description: 'Key-value pairs, to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- name: hpo_job_name - name: hpo_job_name
description: 'The name of the hyper parameter tuning job' description: 'The name of the hyper parameter tuning job'
@ -115,8 +150,8 @@ outputs:
description: 'The registry path of the Docker image that contains the training algorithm' description: 'The registry path of the Docker image that contains the training algorithm'
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
hyperparameter_tuning.py, hyperparameter_tuning.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -20,46 +20,46 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job') parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.') parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='') parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='')
parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File') parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File')
parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str.strip, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian') parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian')
parser.add_argument('--metric_name', type=str.strip, required=True, help='The name of the metric to use for the objective metric.') parser.add_argument('--metric_name', type=str, required=True, help='The name of the metric to use for the objective metric.')
parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str.strip, required=True, help='Whether to minimize or maximize the objective metric.') parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str, required=True, help='Whether to minimize or maximize the objective metric.')
parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str.strip, required=False, help='Whether to minimize or maximize the objective metric.', default='Off') parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str, required=False, help='Whether to minimize or maximize the objective metric.', default='Off')
parser.add_argument('--static_parameters', type=_utils.str_to_json_dict, required=False, help='The values of hyperparameters that do not change for the tuning job.', default='{}') parser.add_argument('--static_parameters', type=_utils.yaml_or_json_str, required=False, help='The values of hyperparameters that do not change for the tuning job.', default={})
parser.add_argument('--integer_parameters', type=_utils.str_to_json_list, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default='[]') parser.add_argument('--integer_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default=[])
parser.add_argument('--continuous_parameters', type=_utils.str_to_json_list, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default='[]') parser.add_argument('--continuous_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default=[])
parser.add_argument('--categorical_parameters', type=_utils.str_to_json_list, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default='[]') parser.add_argument('--categorical_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default=[])
parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.') parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in each training job.', default=1) parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
parser.add_argument('--volume_size', type=_utils.str_to_int, required=False, help='The size of the ML storage volume that you want to provision.', default=1) parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=1)
parser.add_argument('--max_num_jobs', type=_utils.str_to_int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.') parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
parser.add_argument('--max_parallel_jobs', type=_utils.str_to_int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.') parser.add_argument('--max_parallel_jobs', type=int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.')
parser.add_argument('--max_run_time', type=_utils.str_to_int, required=False, help='The maximum run time in seconds per training job.', default=86400) parser.add_argument('--max_run_time', type=int, required=False, help='The maximum run time in seconds per training job.', default=86400)
parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False) parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)
parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str.strip, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"') parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"')
parser.add_argument('--parent_hpo_jobs', type=str.strip, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='') parser.add_argument('--parent_hpo_jobs', type=str, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='')
### Start spot instance support ### Start spot instance support
parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False) parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}') parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
### End spot instance support ### End spot instance support
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -4,49 +4,63 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region where the training job launches.' description: 'The region where the training job launches.'
type: String
- name: model_name - name: model_name
description: 'The name of the new model.' description: 'The name of the new model.'
type: String
- name: role - name: role
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
type: String
- name: container_host_name - name: container_host_name
description: 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.' description: 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.'
default: '' default: ''
type: String
- name: image - name: image
description: 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.' description: 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.'
default: '' default: ''
type: String
- name: model_artifact_url - name: model_artifact_url
description: 'S3 path where Amazon SageMaker to store the model artifacts.' description: 'S3 path where Amazon SageMaker to store the model artifacts.'
default: '' default: ''
type: String
- name: environment - name: environment
description: 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.' description: 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
default: '{}' default: '{}'
type: JsonObject
- name: model_package - name: model_package
description: 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.' description: 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.'
default: '' default: ''
type: String
- name: secondary_containers - name: secondary_containers
description: 'A list of dicts that specifies the additional containers in the inference pipeline.' description: 'A list of dicts that specifies the additional containers in the inference pipeline.'
default: '[]' default: '[]'
type: JsonArray
- name: vpc_security_group_ids - name: vpc_security_group_ids
description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
default: '' default: ''
type: String
- name: vpc_subnets - name: vpc_subnets
description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
default: '' default: ''
type: String
- name: network_isolation - name: network_isolation
description: 'Isolates the training container.' description: 'Isolates the training container.'
default: 'True' default: 'True'
type: Bool
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: model_name, description: 'The model name Sagemaker created'} - {name: model_name, description: 'The model name Sagemaker created'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
create_model.py, create_model.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -19,18 +19,18 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Training Job') parser = argparse.ArgumentParser(description='SageMaker Training Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the new model.') parser.add_argument('--model_name', type=str, required=True, help='The name of the new model.')
parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
parser.add_argument('--container_host_name', type=str.strip, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='') parser.add_argument('--container_host_name', type=str, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='')
parser.add_argument('--image', type=str.strip, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='') parser.add_argument('--image', type=str, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='')
parser.add_argument('--model_artifact_url', type=str.strip, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='') parser.add_argument('--model_artifact_url', type=str, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='')
parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
parser.add_argument('--model_package', type=str.strip, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='') parser.add_argument('--model_package', type=str, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='')
parser.add_argument('--secondary_containers', type=_utils.str_to_json_list, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default='{}') parser.add_argument('--secondary_containers', type=_utils.yaml_or_json_str, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default={})
parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='') parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='')
parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='') parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='')
parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -4,83 +4,108 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region where the training job launches.' description: 'The region where the training job launches.'
type: String
- name: job_name - name: job_name
description: 'The name of the batch training job.' description: 'The name of the batch training job.'
default: '' default: ''
type: String
- name: role - name: role
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
type: String
- name: image - name: image
description: 'The registry path of the Docker image that contains the training algorithm.' description: 'The registry path of the Docker image that contains the training algorithm.'
default: '' default: ''
type: String
- name: algorithm_name - name: algorithm_name
description: 'The name of the algorithm resource to use for the training job. Do not specify a value for this if using training image.' description: 'The name of the algorithm resource to use for the training job. Do not specify a value for this if using training image.'
default: '' default: ''
type: String
- name: metric_definitions - name: metric_definitions
description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.' description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
default: '{}' default: '{}'
type: JsonObject
- name: training_input_mode - name: training_input_mode
description: 'The input mode that the algorithm supports. File or Pipe.' description: 'The input mode that the algorithm supports. File or Pipe.'
default: 'File' default: 'File'
type: String
- name: hyperparameters - name: hyperparameters
description: 'Dictionary of hyperparameters for the the algorithm.' description: 'Dictionary of hyperparameters for the the algorithm.'
default: '{}' default: '{}'
type: JsonObject
- name: channels - name: channels
description: 'A list of dicts specifying the input channels. Must have at least one.' description: 'A list of dicts specifying the input channels. Must have at least one.'
type: JsonArray
- name: instance_type - name: instance_type
description: 'The ML compute instance type.' description: 'The ML compute instance type.'
default: 'ml.m4.xlarge' default: 'ml.m4.xlarge'
type: String
- name: instance_count - name: instance_count
description: 'The number of ML compute instances to use in each training job.' description: 'The number of ML compute instances to use in each training job.'
default: '1' default: '1'
type: Integer
- name: volume_size - name: volume_size
description: 'The size of the ML storage volume that you want to provision.' description: 'The size of the ML storage volume that you want to provision.'
default: '30' default: '30'
type: Integer
- name: resource_encryption_key - name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
default: '' default: ''
type: String
- name: max_run_time - name: max_run_time
description: 'The maximum run time in seconds for the training job.' description: 'The maximum run time in seconds for the training job.'
default: '86400' default: '86400'
type: Integer
- name: model_artifact_path - name: model_artifact_path
description: 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.' description: 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.'
type: String
- name: output_encryption_key - name: output_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.' description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
default: '' default: ''
type: String
- name: vpc_security_group_ids - name: vpc_security_group_ids
description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
default: '' default: ''
type: String
- name: vpc_subnets - name: vpc_subnets
description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
default: '' default: ''
type: String
- name: network_isolation - name: network_isolation
description: 'Isolates the training container.' description: 'Isolates the training container.'
default: 'True' default: 'True'
type: Bool
- name: traffic_encryption - name: traffic_encryption
description: 'Encrypts all communications between ML compute instances in distributed training.' description: 'Encrypts all communications between ML compute instances in distributed training.'
default: 'False' default: 'False'
type: Bool
- name: spot_instance - name: spot_instance
description: 'Use managed spot training.' description: 'Use managed spot training.'
default: 'False' default: 'False'
type: Bool
- name: max_wait_time - name: max_wait_time
description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.' description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
default: '86400' default: '86400'
type: Integer
- name: checkpoint_config - name: checkpoint_config
description: 'Dictionary of information about the output location for managed spot training checkpoint data.' description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
default: '{}' default: '{}'
type: JsonObject
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs, to categorize AWS resources.' description: 'Key-value pairs, to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: model_artifact_url, description: 'Model artifacts url'} - {name: model_artifact_url, description: 'Model artifacts url'}
- {name: job_name, description: 'Training job name'} - {name: job_name, description: 'Training job name'}
- {name: training_image, description: 'The registry path of the Docker image that contains the training algorithm'} - {name: training_image, description: 'The registry path of the Docker image that contains the training algorithm'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
train.py, train.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -19,35 +19,35 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Training Job') parser = argparse.ArgumentParser(description='SageMaker Training Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the training job.', default='') parser.add_argument('--job_name', type=str, required=False, help='The name of the training job.', default='')
parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the training job.', default='') parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the training job.', default='')
parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, help='The input mode that the algorithm supports. File or Pipe.', default='File') parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File')
parser.add_argument('--hyperparameters', type=_utils.str_to_json_dict, help='Dictionary of hyperparameters for the the algorithm.', default='{}') parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={})
parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.') parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, help='The ML compute instance type.', default='ml.m4.xlarge') 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', required=True, type=_utils.str_to_int, help='The registry path of the Docker image that contains the training algorithm.', default=1) parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
parser.add_argument('--volume_size', type=_utils.str_to_int, required=True, help='The size of the ML storage volume that you want to provision.', default=1) parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=1)
parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--max_run_time', type=_utils.str_to_int, required=True, help='The maximum run time in seconds for the training job.', default=86400) parser.add_argument('--max_run_time', type=int, required=True, help='The maximum run time in seconds for the training job.', default=86400)
parser.add_argument('--model_artifact_path', type=str.strip, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.') parser.add_argument('--model_artifact_path', type=str, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.')
parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False) parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)
### Start spot instance support ### Start spot instance support
parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False) parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}') parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
### End spot instance support ### End spot instance support
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -4,31 +4,40 @@ description: |
inputs: inputs:
- name: region - name: region
description: 'The region where the cluster launches.' description: 'The region where the cluster launches.'
type: String
- name: team_name - name: team_name
description: 'The name of your work team.' description: 'The name of your work team.'
type: String
- name: description - name: description
description: 'A description of the work team.' description: 'A description of the work team.'
type: String
- name: user_pool - name: user_pool
description: 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.' description: 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.'
type: String
- name: user_groups - name: user_groups
description: 'An identifier for a user group.' description: 'An identifier for a user group.'
type: String
- name: client_id - name: client_id
description: 'An identifier for an application client. You must create the app client ID using Amazon Cognito.' description: 'An identifier for an application client. You must create the app client ID using Amazon Cognito.'
type: String
- name: sns_topic - name: sns_topic
description: 'The ARN for the SNS topic to which notifications should be published.' description: 'The ARN for the SNS topic to which notifications should be published.'
default: '' default: ''
type: String
- name: endpoint_url - name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.' description: 'The endpoint URL for the private link VPC endpoint.'
default: '' default: ''
type: String
- name: tags - name: tags
description: 'Key-value pairs to categorize AWS resources.' description: 'Key-value pairs to categorize AWS resources.'
default: '{}' default: '{}'
type: JsonObject
outputs: outputs:
- {name: workteam_arn, description: 'The ARN of the workteam.'} - {name: workteam_arn, description: 'The ARN of the workteam.'}
implementation: implementation:
container: container:
image: amazon/aws-sagemaker-kfp-components:0.3.0 image: amazon/aws-sagemaker-kfp-components:0.3.1
command: ['python'] command: ['python3']
args: [ args: [
workteam.py, workteam.py,
--region, {inputValue: region}, --region, {inputValue: region},

View File

@ -19,13 +19,13 @@ def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job') parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
_utils.add_default_client_arguments(parser) _utils.add_default_client_arguments(parser)
parser.add_argument('--team_name', type=str.strip, required=True, help='The name of your work team.') parser.add_argument('--team_name', type=str, required=True, help='The name of your work team.')
parser.add_argument('--description', type=str.strip, required=True, help='A description of the work team.') parser.add_argument('--description', type=str, required=True, help='A description of the work team.')
parser.add_argument('--user_pool', type=str.strip, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='') parser.add_argument('--user_pool', type=str, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='')
parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='') parser.add_argument('--user_groups', type=str, required=False, help='A list of identifiers for user groups separated by commas.', default='')
parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='') parser.add_argument('--client_id', type=str, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='') parser.add_argument('--sns_topic', type=str, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
return parser return parser

View File

@ -51,18 +51,18 @@ def ground_truth_test(region='us-west-2',
ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template', ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
ground_truth_title='Mini image classification', ground_truth_title='Mini image classification',
ground_truth_description='Test for Ground Truth KFP component', ground_truth_description='Test for Ground Truth KFP component',
ground_truth_num_workers_per_object='1', ground_truth_num_workers_per_object=1,
ground_truth_time_limit='30', ground_truth_time_limit=30,
ground_truth_task_availibility='3600', ground_truth_task_availibility=3600,
ground_truth_max_concurrent_tasks='20', ground_truth_max_concurrent_tasks=20,
training_algorithm_name='image classification', training_algorithm_name='image classification',
training_input_mode='Pipe', training_input_mode='Pipe',
training_hyperparameters='{"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}', training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"},
training_output_location='s3://your-bucket-name/mini-image-classification/training-output', training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
training_instance_type='ml.p2.xlarge', training_instance_type='ml.p2.xlarge',
training_instance_count='1', training_instance_count=1,
training_volume_size='50', training_volume_size=50,
training_max_run_time='3600', training_max_run_time=3600,
role_arn='' role_arn=''
): ):

View File

@ -21,7 +21,6 @@ channelObj = {
'S3DataDistributionType': 'FullyReplicated' 'S3DataDistributionType': 'FullyReplicated'
} }
}, },
'ContentType': '',
'CompressionType': 'None', 'CompressionType': 'None',
'RecordWrapperType': 'None', 'RecordWrapperType': 'None',
'InputMode': 'File' 'InputMode': 'File'
@ -44,37 +43,37 @@ def hpo_test(region='us-west-2',
image='', image='',
algorithm_name='K-Means', algorithm_name='K-Means',
training_input_mode='File', training_input_mode='File',
metric_definitions='{}', metric_definitions={},
strategy='Bayesian', strategy='Bayesian',
metric_name='test:msd', metric_name='test:msd',
metric_type='Minimize', metric_type='Minimize',
early_stopping_type='Off', early_stopping_type='Off',
static_parameters='{"k": "10", "feature_dim": "784"}', static_parameters={"k": "10", "feature_dim": "784"},
integer_parameters='[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \ integer_parameters=[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
{"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]', {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
continuous_parameters='[]', continuous_parameters=[],
categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]', categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
channels=json.dumps(channelObjList), channels=channelObjList,
output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
output_encryption_key='', output_encryption_key='',
instance_type='ml.p2.16xlarge', instance_type='ml.p2.16xlarge',
instance_count='1', instance_count=1,
volume_size='50', volume_size=50,
max_num_jobs='1', max_num_jobs=1,
max_parallel_jobs='1', max_parallel_jobs=1,
resource_encryption_key='', resource_encryption_key='',
max_run_time='3600', max_run_time=3600,
vpc_security_group_ids='', vpc_security_group_ids='',
vpc_subnets='', vpc_subnets='',
endpoint_url='', endpoint_url='',
network_isolation='True', network_isolation=True,
traffic_encryption='False', traffic_encryption=False,
warm_start_type='', warm_start_type='',
parent_hpo_jobs='', parent_hpo_jobs='',
spot_instance='False', spot_instance=False,
max_wait_time='3600', max_wait_time=3600,
checkpoint_config='{}', checkpoint_config={},
tags='{}', tags={},
role_arn='', role_arn='',
): ):

View File

@ -26,7 +26,6 @@ channelObj = {
'S3DataDistributionType': 'FullyReplicated' 'S3DataDistributionType': 'FullyReplicated'
} }
}, },
'ContentType': '',
'CompressionType': 'None', 'CompressionType': 'None',
'RecordWrapperType': 'None', 'RecordWrapperType': 'None',
'InputMode': 'File' 'InputMode': 'File'
@ -52,37 +51,37 @@ def mnist_classification(region='us-west-2',
hpo_metric_name='test:msd', hpo_metric_name='test:msd',
hpo_metric_type='Minimize', hpo_metric_type='Minimize',
hpo_early_stopping_type='Off', hpo_early_stopping_type='Off',
hpo_static_parameters='{"k": "10", "feature_dim": "784"}', hpo_static_parameters={"k": "10", "feature_dim": "784"},
hpo_integer_parameters='[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]', hpo_integer_parameters=[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
hpo_continuous_parameters='[]', hpo_continuous_parameters=[],
hpo_categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]', hpo_categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
hpo_channels=json.dumps(hpoChannels), hpo_channels=hpoChannels,
hpo_spot_instance='False', hpo_spot_instance=False,
hpo_max_wait_time='3600', hpo_max_wait_time=3600,
hpo_checkpoint_config='{}', hpo_checkpoint_config={},
output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
output_encryption_key='', output_encryption_key='',
instance_type='ml.p2.16xlarge', instance_type='ml.p2.16xlarge',
instance_count='1', instance_count=1,
volume_size='50', volume_size=50,
hpo_max_num_jobs='9', hpo_max_num_jobs=9,
hpo_max_parallel_jobs='3', hpo_max_parallel_jobs=3,
max_run_time='3600', max_run_time=3600,
endpoint_url='', endpoint_url='',
network_isolation='True', network_isolation=True,
traffic_encryption='False', traffic_encryption=False,
train_channels=json.dumps(trainChannels), train_channels=trainChannels,
train_spot_instance='False', train_spot_instance=False,
train_max_wait_time='3600', train_max_wait_time=3600,
train_checkpoint_config='{}', train_checkpoint_config={},
batch_transform_instance_type='ml.m4.xlarge', batch_transform_instance_type='ml.m4.xlarge',
batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input', batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
batch_transform_data_type='S3Prefix', batch_transform_data_type='S3Prefix',
batch_transform_content_type='text/csv', batch_transform_content_type='text/csv',
batch_transform_compression_type='None', batch_transform_compression_type='None',
batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
batch_transform_max_concurrent='4', batch_transform_max_concurrent=4,
batch_transform_max_payload='6', batch_transform_max_payload=6,
batch_strategy='MultiRecord', batch_strategy='MultiRecord',
batch_transform_split_type='Line', batch_transform_split_type='Line',
role_arn='' role_arn=''

View File

@ -20,7 +20,6 @@ channelObj = {
'S3DataDistributionType': 'FullyReplicated' 'S3DataDistributionType': 'FullyReplicated'
} }
}, },
'ContentType': '',
'CompressionType': 'None', 'CompressionType': 'None',
'RecordWrapperType': 'None', 'RecordWrapperType': 'None',
'InputMode': 'File' 'InputMode': 'File'
@ -40,19 +39,19 @@ def training(
endpoint_url='', endpoint_url='',
image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1', image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
training_input_mode='File', training_input_mode='File',
hyperparameters='{"k": "10", "feature_dim": "784"}', hyperparameters={"k": "10", "feature_dim": "784"},
channels=json.dumps(channelObjList), channels=channelObjList,
instance_type='ml.p2.xlarge', instance_type='ml.p2.xlarge',
instance_count='1', instance_count=1,
volume_size='50', volume_size=50,
max_run_time='3600', max_run_time=3600,
model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data', model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
output_encryption_key='', output_encryption_key='',
network_isolation='True', network_isolation=True,
traffic_encryption='False', traffic_encryption=False,
spot_instance='False', spot_instance=False,
max_wait_time='3600', max_wait_time=3600,
checkpoint_config='{}', checkpoint_config={},
role='' role=''
): ):
training = sagemaker_train_op( training = sagemaker_train_op(