112 lines
4.8 KiB
YAML
112 lines
4.8 KiB
YAML
name: SageMaker - Processing Job
|
|
description: Perform data pre-processing, post-processing, feature engineering, data
|
|
validation, and model evaluation, and interpretation on using SageMaker
|
|
inputs:
|
|
- {name: region, type: String, description: The region for the SageMaker resource.}
|
|
- {name: endpoint_url, type: String, description: The URL to use when communicating
|
|
with the SageMaker service., default: ''}
|
|
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
|
|
when connecting to SageMaker., default: ''}
|
|
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
|
|
AWS resources.', default: '{}'}
|
|
- {name: job_name, type: String, description: The name of the processing job., default: ''}
|
|
- {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon
|
|
SageMaker assumes to perform tasks on your behalf.}
|
|
- {name: image, type: String, description: The registry path of the Docker image that
|
|
contains the processing container., default: ''}
|
|
- {name: instance_type, type: String, description: The ML compute instance type.,
|
|
default: ml.m4.xlarge}
|
|
- {name: instance_count, type: Integer, description: The number of ML compute instances
|
|
to use in each processing job., default: '1'}
|
|
- {name: volume_size, type: Integer, description: The size of the ML storage volume
|
|
that you want to provision., default: '30'}
|
|
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
|
|
Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
|
|
compute instance(s)., default: ''}
|
|
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
|
|
SageMaker uses to encrypt the processing artifacts., default: ''}
|
|
- {name: max_run_time, type: Integer, description: The maximum run time in seconds
|
|
for the processing job., default: '86400'}
|
|
- {name: environment, type: JsonObject, description: The dictionary of the environment
|
|
variables to set in the Docker container. Up to 16 key-value entries in the map.,
|
|
default: '{}'}
|
|
- {name: container_entrypoint, type: JsonArray, description: The entrypoint for the
|
|
processing job. This is in the form of a list of strings that make a command.,
|
|
default: '[]'}
|
|
- {name: container_arguments, type: JsonArray, description: A list of string arguments
|
|
to be passed to a processing job., default: '[]'}
|
|
- {name: output_config, type: JsonArray, description: Parameters that specify Amazon
|
|
S3 outputs for a processing job., default: '[]'}
|
|
- {name: input_config, type: JsonArray, description: Parameters that specify Amazon
|
|
S3 inputs for a processing job., default: '[]'}
|
|
- {name: vpc_security_group_ids, type: String, description: 'The VPC security group
|
|
IDs, in the form sg-xxxxxxxx.', default: ''}
|
|
- {name: vpc_subnets, type: String, description: The ID of the subnets in the VPC
|
|
to which you want to connect your hpo job., default: ''}
|
|
- name: network_isolation
|
|
type: Bool
|
|
description: Isolates the processing container.
|
|
default: "True"
|
|
- name: traffic_encryption
|
|
type: Bool
|
|
description: Encrypts all communications between ML compute instances in distributed
|
|
training.
|
|
default: "False"
|
|
outputs:
|
|
- {name: job_name, description: Processing job name.}
|
|
- {name: output_artifacts, description: A dictionary containing the output S3 artifacts.}
|
|
implementation:
|
|
container:
|
|
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:1.1.2
|
|
command: [python3]
|
|
args:
|
|
- process/src/sagemaker_process_component.py
|
|
- --region
|
|
- {inputValue: region}
|
|
- --endpoint_url
|
|
- {inputValue: endpoint_url}
|
|
- --assume_role
|
|
- {inputValue: assume_role}
|
|
- --tags
|
|
- {inputValue: tags}
|
|
- --job_name
|
|
- {inputValue: job_name}
|
|
- --role
|
|
- {inputValue: role}
|
|
- --image
|
|
- {inputValue: image}
|
|
- --instance_type
|
|
- {inputValue: instance_type}
|
|
- --instance_count
|
|
- {inputValue: instance_count}
|
|
- --volume_size
|
|
- {inputValue: volume_size}
|
|
- --resource_encryption_key
|
|
- {inputValue: resource_encryption_key}
|
|
- --output_encryption_key
|
|
- {inputValue: output_encryption_key}
|
|
- --max_run_time
|
|
- {inputValue: max_run_time}
|
|
- --environment
|
|
- {inputValue: environment}
|
|
- --container_entrypoint
|
|
- {inputValue: container_entrypoint}
|
|
- --container_arguments
|
|
- {inputValue: container_arguments}
|
|
- --output_config
|
|
- {inputValue: output_config}
|
|
- --input_config
|
|
- {inputValue: input_config}
|
|
- --vpc_security_group_ids
|
|
- {inputValue: vpc_security_group_ids}
|
|
- --vpc_subnets
|
|
- {inputValue: vpc_subnets}
|
|
- --network_isolation
|
|
- {inputValue: network_isolation}
|
|
- --traffic_encryption
|
|
- {inputValue: traffic_encryption}
|
|
- --job_name_output_path
|
|
- {outputPath: job_name}
|
|
- --output_artifacts_output_path
|
|
- {outputPath: output_artifacts}
|