name: 'SageMaker - Hyperparameter Tuning' description: | Hyperparameter Tuning Jobs in SageMaker inputs: - name: region description: 'The region where the cluster launches.' - name: job_name description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.' default: '' type: String - name: role description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' type: String - name: image description: 'The registry path of the Docker image that contains the training algorithm.' default: '' type: String - name: algorithm_name description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.' default: '' type: String - name: training_input_mode description: 'The input mode that the algorithm supports. File or Pipe.' default: 'File' type: String - name: metric_definitions description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.' default: '{}' type: JsonObject - name: strategy description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.' default: 'Bayesian' type: String - name: metric_name description: 'The name of the metric to use for the objective metric.' type: String - name: metric_type description: 'Whether to minimize or maximize the objective metric.' type: String - name: early_stopping_type description: 'Whether to use early stopping for training jobs launched by the tuning job.' default: 'Off' type: String - name: static_parameters description: 'The values of hyperparameters that do not change for the tuning job.' default: '{}' type: JsonObject - name: integer_parameters description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.' default: '[]' type: JsonArray - name: continuous_parameters description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.' default: '[]' type: JsonObject - name: categorical_parameters description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.' default: '[]' type: JsonArray - name: channels description: 'A list of dicts specifying the input channels. Must have at least one.' type: JsonArray - name: output_location description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.' type: String - name: output_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.' default: '' type: String - name: instance_type description: 'The ML compute instance type.' default: 'ml.m4.xlarge' type: String - name: instance_count description: 'The number of ML compute instances to use in each training job.' default: '1' type: Integer - name: volume_size description: 'The size of the ML storage volume that you want to provision.' default: '30' type: Integer - name: max_num_jobs description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.' type: Integer - name: max_parallel_jobs description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.' type: Integer - name: max_run_time description: 'The maximum run time in seconds per training job.' default: '86400' type: Integer - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' default: '' type: String - name: vpc_security_group_ids description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' default: '' type: String - name: vpc_subnets description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' default: '' type: String - name: network_isolation description: 'Isolates the training container.' default: 'True' type: Bool - name: traffic_encryption description: 'Encrypts all communications between ML compute instances in distributed training.' default: 'False' type: Bool - name: spot_instance description: 'Use managed spot training.' default: 'False' type: Bool - name: max_wait_time description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.' default: '86400' type: Integer - name: checkpoint_config description: 'Dictionary of information about the output location for managed spot training checkpoint data.' default: '{}' type: JsonObject - name: warm_start_type description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"' default: '' type: String - name: parent_hpo_jobs description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.' default: '' type: String - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' type: String - name: tags description: 'Key-value pairs, to categorize AWS resources.' default: '{}' type: JsonObject outputs: - name: hpo_job_name description: 'The name of the hyper parameter tuning job' - name: model_artifact_url description: 'Model artifacts url' - name: best_job_name description: 'Best training job in the hyper parameter tuning job' - name: best_hyperparameters description: 'Tuned hyperparameters' - name: training_image description: 'The registry path of the Docker image that contains the training algorithm' implementation: container: image: amazon/aws-sagemaker-kfp-components:0.3.1 command: ['python3'] args: [ hyperparameter_tuning.py, --region, {inputValue: region}, --endpoint_url, {inputValue: endpoint_url}, --job_name, {inputValue: job_name}, --role, {inputValue: role}, --image, {inputValue: image}, --algorithm_name, {inputValue: algorithm_name}, --training_input_mode, {inputValue: training_input_mode}, --metric_definitions, {inputValue: metric_definitions}, --strategy, {inputValue: strategy}, --metric_name, {inputValue: metric_name}, --metric_type, {inputValue: metric_type}, --early_stopping_type, {inputValue: early_stopping_type}, --static_parameters, {inputValue: static_parameters}, --integer_parameters, {inputValue: integer_parameters}, --continuous_parameters, {inputValue: continuous_parameters}, --categorical_parameters, {inputValue: categorical_parameters}, --channels, {inputValue: channels}, --output_location, {inputValue: output_location}, --output_encryption_key, {inputValue: output_encryption_key}, --instance_type, {inputValue: instance_type}, --instance_count, {inputValue: instance_count}, --volume_size, {inputValue: volume_size}, --max_num_jobs, {inputValue: max_num_jobs}, --max_parallel_jobs, {inputValue: max_parallel_jobs}, --resource_encryption_key, {inputValue: resource_encryption_key}, --max_run_time, {inputValue: max_run_time}, --vpc_security_group_ids, {inputValue: vpc_security_group_ids}, --vpc_subnets, {inputValue: vpc_subnets}, --network_isolation, {inputValue: network_isolation}, --traffic_encryption, {inputValue: traffic_encryption}, --spot_instance, {inputValue: spot_instance}, --max_wait_time, {inputValue: max_wait_time}, --checkpoint_config, {inputValue: checkpoint_config}, --warm_start_type, {inputValue: warm_start_type}, --parent_hpo_jobs, {inputValue: parent_hpo_jobs}, --tags, {inputValue: tags} ] fileOutputs: hpo_job_name: /tmp/hpo_job_name.txt model_artifact_url: /tmp/model_artifact_url.txt best_job_name: /tmp/best_job_name.txt best_hyperparameters: /tmp/best_hyperparameters.txt training_image: /tmp/training_image.txt