200 lines
8.2 KiB
YAML
200 lines
8.2 KiB
YAML
name: 'SageMaker - Hyperparameter Tuning'
|
|
description: |
|
|
Hyperparameter Tuning Jobs in SageMaker
|
|
inputs:
|
|
- name: region
|
|
description: 'The region where the cluster launches.'
|
|
- name: job_name
|
|
description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.'
|
|
default: ''
|
|
type: String
|
|
- name: role
|
|
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
|
|
type: String
|
|
- name: image
|
|
description: 'The registry path of the Docker image that contains the training algorithm.'
|
|
default: ''
|
|
type: String
|
|
- name: algorithm_name
|
|
description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.'
|
|
default: ''
|
|
type: String
|
|
- name: training_input_mode
|
|
description: 'The input mode that the algorithm supports. File or Pipe.'
|
|
default: 'File'
|
|
type: String
|
|
- name: metric_definitions
|
|
description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
|
|
default: '{}'
|
|
type: JsonObject
|
|
- name: strategy
|
|
description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.'
|
|
default: 'Bayesian'
|
|
type: String
|
|
- name: metric_name
|
|
description: 'The name of the metric to use for the objective metric.'
|
|
type: String
|
|
- name: metric_type
|
|
description: 'Whether to minimize or maximize the objective metric.'
|
|
type: String
|
|
- name: early_stopping_type
|
|
description: 'Whether to use early stopping for training jobs launched by the tuning job.'
|
|
default: 'Off'
|
|
type: String
|
|
- name: static_parameters
|
|
description: 'The values of hyperparameters that do not change for the tuning job.'
|
|
default: '{}'
|
|
type: JsonObject
|
|
- name: integer_parameters
|
|
description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.'
|
|
default: '[]'
|
|
type: JsonArray
|
|
- name: continuous_parameters
|
|
description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.'
|
|
default: '[]'
|
|
type: JsonObject
|
|
- name: categorical_parameters
|
|
description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.'
|
|
default: '[]'
|
|
type: JsonArray
|
|
- name: channels
|
|
description: 'A list of dicts specifying the input channels. Must have at least one.'
|
|
type: JsonArray
|
|
- name: output_location
|
|
description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.'
|
|
type: String
|
|
- name: output_encryption_key
|
|
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
|
|
default: ''
|
|
type: String
|
|
- name: instance_type
|
|
description: 'The ML compute instance type.'
|
|
default: 'ml.m4.xlarge'
|
|
type: String
|
|
- name: instance_count
|
|
description: 'The number of ML compute instances to use in each training job.'
|
|
default: '1'
|
|
type: Integer
|
|
- name: volume_size
|
|
description: 'The size of the ML storage volume that you want to provision.'
|
|
default: '30'
|
|
type: Integer
|
|
- name: max_num_jobs
|
|
description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.'
|
|
type: Integer
|
|
- name: max_parallel_jobs
|
|
description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.'
|
|
type: Integer
|
|
- name: max_run_time
|
|
description: 'The maximum run time in seconds per training job.'
|
|
default: '86400'
|
|
type: Integer
|
|
- name: resource_encryption_key
|
|
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
|
|
default: ''
|
|
type: String
|
|
- name: vpc_security_group_ids
|
|
description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
|
|
default: ''
|
|
type: String
|
|
- name: vpc_subnets
|
|
description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
|
|
default: ''
|
|
type: String
|
|
- name: network_isolation
|
|
description: 'Isolates the training container.'
|
|
default: 'True'
|
|
type: Bool
|
|
- name: traffic_encryption
|
|
description: 'Encrypts all communications between ML compute instances in distributed training.'
|
|
default: 'False'
|
|
type: Bool
|
|
- name: spot_instance
|
|
description: 'Use managed spot training.'
|
|
default: 'False'
|
|
type: Bool
|
|
- name: max_wait_time
|
|
description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
|
|
default: '86400'
|
|
type: Integer
|
|
- name: checkpoint_config
|
|
description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
|
|
default: '{}'
|
|
type: JsonObject
|
|
- name: warm_start_type
|
|
description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"'
|
|
default: ''
|
|
type: String
|
|
- name: parent_hpo_jobs
|
|
description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.'
|
|
default: ''
|
|
type: String
|
|
- name: endpoint_url
|
|
description: 'The endpoint URL for the private link VPC endpoint.'
|
|
default: ''
|
|
type: String
|
|
- name: tags
|
|
description: 'Key-value pairs, to categorize AWS resources.'
|
|
default: '{}'
|
|
type: JsonObject
|
|
outputs:
|
|
- name: hpo_job_name
|
|
description: 'The name of the hyper parameter tuning job'
|
|
- name: model_artifact_url
|
|
description: 'Model artifacts url'
|
|
- name: best_job_name
|
|
description: 'Best training job in the hyper parameter tuning job'
|
|
- name: best_hyperparameters
|
|
description: 'Tuned hyperparameters'
|
|
- name: training_image
|
|
description: 'The registry path of the Docker image that contains the training algorithm'
|
|
implementation:
|
|
container:
|
|
image: amazon/aws-sagemaker-kfp-components:0.3.1
|
|
command: ['python3']
|
|
args: [
|
|
hyperparameter_tuning.py,
|
|
--region, {inputValue: region},
|
|
--endpoint_url, {inputValue: endpoint_url},
|
|
--job_name, {inputValue: job_name},
|
|
--role, {inputValue: role},
|
|
--image, {inputValue: image},
|
|
--algorithm_name, {inputValue: algorithm_name},
|
|
--training_input_mode, {inputValue: training_input_mode},
|
|
--metric_definitions, {inputValue: metric_definitions},
|
|
--strategy, {inputValue: strategy},
|
|
--metric_name, {inputValue: metric_name},
|
|
--metric_type, {inputValue: metric_type},
|
|
--early_stopping_type, {inputValue: early_stopping_type},
|
|
--static_parameters, {inputValue: static_parameters},
|
|
--integer_parameters, {inputValue: integer_parameters},
|
|
--continuous_parameters, {inputValue: continuous_parameters},
|
|
--categorical_parameters, {inputValue: categorical_parameters},
|
|
--channels, {inputValue: channels},
|
|
--output_location, {inputValue: output_location},
|
|
--output_encryption_key, {inputValue: output_encryption_key},
|
|
--instance_type, {inputValue: instance_type},
|
|
--instance_count, {inputValue: instance_count},
|
|
--volume_size, {inputValue: volume_size},
|
|
--max_num_jobs, {inputValue: max_num_jobs},
|
|
--max_parallel_jobs, {inputValue: max_parallel_jobs},
|
|
--resource_encryption_key, {inputValue: resource_encryption_key},
|
|
--max_run_time, {inputValue: max_run_time},
|
|
--vpc_security_group_ids, {inputValue: vpc_security_group_ids},
|
|
--vpc_subnets, {inputValue: vpc_subnets},
|
|
--network_isolation, {inputValue: network_isolation},
|
|
--traffic_encryption, {inputValue: traffic_encryption},
|
|
--spot_instance, {inputValue: spot_instance},
|
|
--max_wait_time, {inputValue: max_wait_time},
|
|
--checkpoint_config, {inputValue: checkpoint_config},
|
|
--warm_start_type, {inputValue: warm_start_type},
|
|
--parent_hpo_jobs, {inputValue: parent_hpo_jobs},
|
|
--tags, {inputValue: tags}
|
|
]
|
|
fileOutputs:
|
|
hpo_job_name: /tmp/hpo_job_name.txt
|
|
model_artifact_url: /tmp/model_artifact_url.txt
|
|
best_job_name: /tmp/best_job_name.txt
|
|
best_hyperparameters: /tmp/best_hyperparameters.txt
|
|
training_image: /tmp/training_image.txt
|