178 lines
8.1 KiB
YAML
178 lines
8.1 KiB
YAML
name: 'SageMaker - Hyperparameter Tuning'
|
|
description: |
|
|
Hyperparameter Tuning Jobs in SageMaker
|
|
inputs:
|
|
- name: region
|
|
description: 'The region where the cluster launches.'
|
|
- name: job_name
|
|
description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.'
|
|
default: ''
|
|
- name: role
|
|
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
|
|
- name: image
|
|
description: 'The registry path of the Docker image that contains the training algorithm.'
|
|
default: ''
|
|
- name: algorithm_name
|
|
description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.'
|
|
default: ''
|
|
- name: training_input_mode
|
|
description: 'The input mode that the algorithm supports. File or Pipe.'
|
|
default: 'File'
|
|
- name: metric_definitions
|
|
description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
|
|
default: '{}'
|
|
- name: strategy
|
|
description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.'
|
|
default: 'Bayesian'
|
|
- name: metric_name
|
|
description: 'The name of the metric to use for the objective metric.'
|
|
- name: metric_type
|
|
description: 'Whether to minimize or maximize the objective metric.'
|
|
- name: early_stopping_type
|
|
description: 'Whether to use early stopping for training jobs launched by the tuning job.'
|
|
default: 'Off'
|
|
- name: static_parameters
|
|
description: 'The values of hyperparameters that do not change for the tuning job.'
|
|
default: '{}'
|
|
- name: integer_parameters
|
|
description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.'
|
|
default: '[]'
|
|
- name: continuous_parameters
|
|
description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.'
|
|
default: '[]'
|
|
- name: categorical_parameters
|
|
description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.'
|
|
default: '[]'
|
|
- name: channels
|
|
description: 'A list of dicts specifying the input channels. Must have at least one.'
|
|
- name: data_location_1
|
|
description: 'The S3 URI of the input data source for channel 1.'
|
|
default: ''
|
|
- name: data_location_2
|
|
description: 'The S3 URI of the input data source for channel 2.'
|
|
default: ''
|
|
- name: data_location_3
|
|
description: 'The S3 URI of the input data source for channel 3.'
|
|
default: ''
|
|
- name: data_location_4
|
|
description: 'The S3 URI of the input data source for channel 4.'
|
|
default: ''
|
|
- name: data_location_5
|
|
description: 'The S3 URI of the input data source for channel 5.'
|
|
default: ''
|
|
- name: data_location_6
|
|
description: 'The S3 URI of the input data source for channel 6.'
|
|
default: ''
|
|
- name: data_location_7
|
|
description: 'The S3 URI of the input data source for channel 7.'
|
|
default: ''
|
|
- name: data_location_8
|
|
description: 'The S3 URI of the input data source for channel 8.'
|
|
default: ''
|
|
- name: output_location
|
|
description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.'
|
|
- name: output_encryption_key
|
|
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
|
|
default: ''
|
|
- name: instance_type
|
|
description: 'The ML compute instance type.'
|
|
default: 'ml.m4.xlarge'
|
|
- name: instance_count
|
|
description: 'The number of ML compute instances to use in each training job.'
|
|
default: '1'
|
|
- name: volume_size
|
|
description: 'The size of the ML storage volume that you want to provision.'
|
|
default: '1'
|
|
- name: max_num_jobs
|
|
description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.'
|
|
- name: max_parallel_jobs
|
|
description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.'
|
|
- name: max_run_time
|
|
description: 'The maximum run time in seconds per training job.'
|
|
default: '86400'
|
|
- name: resource_encryption_key
|
|
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
|
|
default: ''
|
|
- name: vpc_security_group_ids
|
|
description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
|
|
default: ''
|
|
- name: vpc_subnets
|
|
description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
|
|
default: ''
|
|
- name: network_isolation
|
|
description: 'Isolates the training container.'
|
|
default: 'True'
|
|
- name: traffic_encryption
|
|
description: 'Encrypts all communications between ML compute instances in distributed training.'
|
|
default: 'False'
|
|
- name: warm_start_type
|
|
description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"'
|
|
default: ''
|
|
- name: parent_hpo_jobs
|
|
description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.'
|
|
default: ''
|
|
- name: tags
|
|
description: 'Key-value pairs, to categorize AWS resources.'
|
|
default: '{}'
|
|
outputs:
|
|
- name: model_artifact_url
|
|
description: 'Model artifacts url'
|
|
- name: best_job_name
|
|
description: 'Best training job in the hyperparameter tuning job'
|
|
- name: best_hyperparameters
|
|
description: 'Tuned hyperparameters'
|
|
- name: training_image
|
|
description: 'The registry path of the Docker image that contains the training algorithm'
|
|
implementation:
|
|
container:
|
|
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
|
command: ['python']
|
|
args: [
|
|
hyperparameter_tuning.py,
|
|
--region, {inputValue: region},
|
|
--job_name, {inputValue: job_name},
|
|
--role, {inputValue: role},
|
|
--image, {inputValue: image},
|
|
--algorithm_name, {inputValue: algorithm_name},
|
|
--training_input_mode, {inputValue: training_input_mode},
|
|
--metric_definitions, {inputValue: metric_definitions},
|
|
--strategy, {inputValue: strategy},
|
|
--metric_name, {inputValue: metric_name},
|
|
--metric_type, {inputValue: metric_type},
|
|
--early_stopping_type, {inputValue: early_stopping_type},
|
|
--static_parameters, {inputValue: static_parameters},
|
|
--integer_parameters, {inputValue: integer_parameters},
|
|
--continuous_parameters, {inputValue: continuous_parameters},
|
|
--categorical_parameters, {inputValue: categorical_parameters},
|
|
--channels, {inputValue: channels},
|
|
--data_location_1, {inputValue: data_location_1},
|
|
--data_location_2, {inputValue: data_location_2},
|
|
--data_location_3, {inputValue: data_location_3},
|
|
--data_location_4, {inputValue: data_location_4},
|
|
--data_location_5, {inputValue: data_location_5},
|
|
--data_location_6, {inputValue: data_location_6},
|
|
--data_location_7, {inputValue: data_location_7},
|
|
--data_location_8, {inputValue: data_location_8},
|
|
--output_location, {inputValue: output_location},
|
|
--output_encryption_key, {inputValue: output_encryption_key},
|
|
--instance_type, {inputValue: instance_type},
|
|
--instance_count, {inputValue: instance_count},
|
|
--volume_size, {inputValue: volume_size},
|
|
--max_num_jobs, {inputValue: max_num_jobs},
|
|
--max_parallel_jobs, {inputValue: max_parallel_jobs},
|
|
--resource_encryption_key, {inputValue: resource_encryption_key},
|
|
--max_run_time, {inputValue: max_run_time},
|
|
--vpc_security_group_ids, {inputValue: vpc_security_group_ids},
|
|
--vpc_subnets, {inputValue: vpc_subnets},
|
|
--network_isolation, {inputValue: network_isolation},
|
|
--traffic_encryption, {inputValue: traffic_encryption},
|
|
--warm_start_type, {inputValue: warm_start_type},
|
|
--parent_hpo_jobs, {inputValue: parent_hpo_jobs},
|
|
--tags, {inputValue: tags}
|
|
]
|
|
fileOutputs:
|
|
model_artifact_url: /tmp/model_artifact_url.txt
|
|
best_job_name: /tmp/best_job_name.txt
|
|
best_hyperparameters: /tmp/best_hyperparameters.txt
|
|
training_image: /tmp/training_image.txt
|