189 lines
8.4 KiB
YAML
189 lines
8.4 KiB
YAML
name: SageMaker - Hyperparameter Tuning
|
|
description: Hyperparameter Tuning Jobs in SageMaker
|
|
inputs:
|
|
- name: spot_instance
|
|
type: Bool
|
|
description: Use managed spot training.
|
|
default: "False"
|
|
- {name: max_wait_time, type: Integer, description: The maximum time in seconds you
|
|
are willing to wait for a managed spot training job to complete., default: '86400'}
|
|
- {name: max_run_time, type: Integer, description: The maximum run time in seconds
|
|
for the training job., default: '86400'}
|
|
- {name: checkpoint_config, type: JsonObject, description: Dictionary of information
|
|
about the output location for managed spot training checkpoint data., default: '{}'}
|
|
- {name: region, type: String, description: The region for the SageMaker resource.}
|
|
- {name: endpoint_url, type: String, description: The URL to use when communicating
|
|
with the SageMaker service., default: ''}
|
|
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
|
|
when connecting to SageMaker., default: ''}
|
|
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
|
|
AWS resources.', default: '{}'}
|
|
- {name: job_name, type: String, description: The name of the tuning job. Must be
|
|
unique within the same AWS account and AWS region., default: ''}
|
|
- {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon
|
|
SageMaker assumes to perform tasks on your behalf.}
|
|
- {name: image, type: String, description: The registry path of the Docker image that
|
|
contains the training algorithm., default: ''}
|
|
- {name: algorithm_name, type: String, description: The name of the resource algorithm
|
|
to use for the hyperparameter tuning job., default: ''}
|
|
- {name: training_input_mode, type: String, description: The input mode that the algorithm
|
|
supports. File or Pipe., default: File}
|
|
- {name: metric_definitions, type: JsonObject, description: The dictionary of name-regex
|
|
pairs specify the metrics that the algorithm emits., default: '{}'}
|
|
- {name: strategy, type: String, description: How hyperparameter tuning chooses the
|
|
combinations of hyperparameter values to use for the training job it launches.,
|
|
default: Bayesian}
|
|
- {name: metric_name, type: String, description: The name of the metric to use for
|
|
the objective metric.}
|
|
- {name: metric_type, type: String, description: Whether to minimize or maximize the
|
|
objective metric.}
|
|
- name: early_stopping_type
|
|
type: String
|
|
description: Whether to minimize or maximize the objective metric.
|
|
default: "Off"
|
|
- {name: static_parameters, type: JsonObject, description: The values of hyperparameters
|
|
that do not change for the tuning job., default: '{}'}
|
|
- {name: integer_parameters, type: JsonArray, description: The array of IntegerParameterRange
|
|
objects that specify ranges of integer hyperparameters that you want to search.,
|
|
default: '[]'}
|
|
- {name: continuous_parameters, type: JsonArray, description: The array of ContinuousParameterRange
|
|
objects that specify ranges of continuous hyperparameters that you want to search.,
|
|
default: '[]'}
|
|
- {name: categorical_parameters, type: JsonArray, description: The array of CategoricalParameterRange
|
|
objects that specify ranges of categorical hyperparameters that you want to search.,
|
|
default: '[]'}
|
|
- {name: channels, type: JsonArray, description: A list of dicts specifying the input
|
|
channels. Must have at least one.}
|
|
- {name: output_location, type: String, description: The Amazon S3 path where you
|
|
want Amazon SageMaker to store the results of the transform job.}
|
|
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
|
|
SageMaker uses to encrypt the model artifacts., default: ''}
|
|
- {name: instance_type, type: String, description: The ML compute instance type.,
|
|
default: ml.m4.xlarge}
|
|
- {name: instance_count, type: Integer, description: The number of ML compute instances
|
|
to use in each training job., default: '1'}
|
|
- {name: volume_size, type: Integer, description: The size of the ML storage volume
|
|
that you want to provision., default: '30'}
|
|
- {name: max_num_jobs, type: Integer, description: The maximum number of training
|
|
jobs that a hyperparameter tuning job can launch.}
|
|
- {name: max_parallel_jobs, type: Integer, description: The maximum number of concurrent
|
|
training jobs that a hyperparameter tuning job can launch.}
|
|
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
|
|
Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
|
|
compute instance(s)., default: ''}
|
|
- {name: vpc_security_group_ids, type: String, description: 'The VPC security group
|
|
IDs, in the form sg-xxxxxxxx.', default: ''}
|
|
- {name: vpc_subnets, type: String, description: The ID of the subnets in the VPC
|
|
to which you want to connect your hpo job., default: ''}
|
|
- name: network_isolation
|
|
type: Bool
|
|
description: Isolates the training container.
|
|
default: "True"
|
|
- name: traffic_encryption
|
|
type: Bool
|
|
description: Encrypts all communications between ML compute instances in distributed
|
|
training.
|
|
default: "False"
|
|
- {name: warm_start_type, type: String, description: Specifies either "IdenticalDataAndAlgorithm"
|
|
or "TransferLearning", default: ''}
|
|
- {name: parent_hpo_jobs, type: String, description: List of previously completed
|
|
or stopped hyperparameter tuning jobs to be used as a starting point., default: ''}
|
|
outputs:
|
|
- {name: hpo_job_name, description: The name of the hyper parameter tuning job.}
|
|
- {name: model_artifact_url, description: The output model artifacts S3 url.}
|
|
- {name: best_job_name, description: Best training job in the hyper parameter tuning
|
|
job.}
|
|
- {name: best_hyperparameters, description: The resulting tuned hyperparameters.}
|
|
- {name: training_image, description: The registry path of the Docker image that contains
|
|
the training algorithm.}
|
|
implementation:
|
|
container:
|
|
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:1.1.2
|
|
command: [python3]
|
|
args:
|
|
- hyperparameter_tuning/src/sagemaker_tuning_component.py
|
|
- --spot_instance
|
|
- {inputValue: spot_instance}
|
|
- --max_wait_time
|
|
- {inputValue: max_wait_time}
|
|
- --max_run_time
|
|
- {inputValue: max_run_time}
|
|
- --checkpoint_config
|
|
- {inputValue: checkpoint_config}
|
|
- --region
|
|
- {inputValue: region}
|
|
- --endpoint_url
|
|
- {inputValue: endpoint_url}
|
|
- --assume_role
|
|
- {inputValue: assume_role}
|
|
- --tags
|
|
- {inputValue: tags}
|
|
- --job_name
|
|
- {inputValue: job_name}
|
|
- --role
|
|
- {inputValue: role}
|
|
- --image
|
|
- {inputValue: image}
|
|
- --algorithm_name
|
|
- {inputValue: algorithm_name}
|
|
- --training_input_mode
|
|
- {inputValue: training_input_mode}
|
|
- --metric_definitions
|
|
- {inputValue: metric_definitions}
|
|
- --strategy
|
|
- {inputValue: strategy}
|
|
- --metric_name
|
|
- {inputValue: metric_name}
|
|
- --metric_type
|
|
- {inputValue: metric_type}
|
|
- --early_stopping_type
|
|
- {inputValue: early_stopping_type}
|
|
- --static_parameters
|
|
- {inputValue: static_parameters}
|
|
- --integer_parameters
|
|
- {inputValue: integer_parameters}
|
|
- --continuous_parameters
|
|
- {inputValue: continuous_parameters}
|
|
- --categorical_parameters
|
|
- {inputValue: categorical_parameters}
|
|
- --channels
|
|
- {inputValue: channels}
|
|
- --output_location
|
|
- {inputValue: output_location}
|
|
- --output_encryption_key
|
|
- {inputValue: output_encryption_key}
|
|
- --instance_type
|
|
- {inputValue: instance_type}
|
|
- --instance_count
|
|
- {inputValue: instance_count}
|
|
- --volume_size
|
|
- {inputValue: volume_size}
|
|
- --max_num_jobs
|
|
- {inputValue: max_num_jobs}
|
|
- --max_parallel_jobs
|
|
- {inputValue: max_parallel_jobs}
|
|
- --resource_encryption_key
|
|
- {inputValue: resource_encryption_key}
|
|
- --vpc_security_group_ids
|
|
- {inputValue: vpc_security_group_ids}
|
|
- --vpc_subnets
|
|
- {inputValue: vpc_subnets}
|
|
- --network_isolation
|
|
- {inputValue: network_isolation}
|
|
- --traffic_encryption
|
|
- {inputValue: traffic_encryption}
|
|
- --warm_start_type
|
|
- {inputValue: warm_start_type}
|
|
- --parent_hpo_jobs
|
|
- {inputValue: parent_hpo_jobs}
|
|
- --hpo_job_name_output_path
|
|
- {outputPath: hpo_job_name}
|
|
- --model_artifact_url_output_path
|
|
- {outputPath: model_artifact_url}
|
|
- --best_job_name_output_path
|
|
- {outputPath: best_job_name}
|
|
- --best_hyperparameters_output_path
|
|
- {outputPath: best_hyperparameters}
|
|
- --training_image_output_path
|
|
- {outputPath: training_image}
|