112 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
			
		
		
	
	
			112 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
name: SageMaker - Processing Job
 | 
						|
description: Perform data pre-processing, post-processing, feature engineering, data
 | 
						|
  validation, and model evaluation, and interpretation on using SageMaker
 | 
						|
inputs:
 | 
						|
- {name: region, type: String, description: The region for the SageMaker resource.}
 | 
						|
- {name: endpoint_url, type: String, description: The URL to use when communicating
 | 
						|
    with the SageMaker service., default: ''}
 | 
						|
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
 | 
						|
    when connecting to SageMaker., default: ''}
 | 
						|
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
 | 
						|
    AWS resources.', default: '{}'}
 | 
						|
- {name: job_name, type: String, description: The name of the processing job., default: ''}
 | 
						|
- {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon
 | 
						|
    SageMaker assumes to perform tasks on your behalf.}
 | 
						|
- {name: image, type: String, description: The registry path of the Docker image that
 | 
						|
    contains the processing container., default: ''}
 | 
						|
- {name: instance_type, type: String, description: The ML compute instance type.,
 | 
						|
  default: ml.m4.xlarge}
 | 
						|
- {name: instance_count, type: Integer, description: The number of ML compute instances
 | 
						|
    to use in each processing job., default: '1'}
 | 
						|
- {name: volume_size, type: Integer, description: The size of the ML storage volume
 | 
						|
    that you want to provision., default: '30'}
 | 
						|
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
 | 
						|
    Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
 | 
						|
    compute instance(s)., default: ''}
 | 
						|
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
 | 
						|
    SageMaker uses to encrypt the processing artifacts., default: ''}
 | 
						|
- {name: max_run_time, type: Integer, description: The maximum run time in seconds
 | 
						|
    for the processing job., default: '86400'}
 | 
						|
- {name: environment, type: JsonObject, description: The dictionary of the environment
 | 
						|
    variables to set in the Docker container. Up to 16 key-value entries in the map.,
 | 
						|
  default: '{}'}
 | 
						|
- {name: container_entrypoint, type: JsonArray, description: The entrypoint for the
 | 
						|
    processing job. This is in the form of a list of strings that make a command.,
 | 
						|
  default: '[]'}
 | 
						|
- {name: container_arguments, type: JsonArray, description: A list of string arguments
 | 
						|
    to be passed to a processing job., default: '[]'}
 | 
						|
- {name: output_config, type: JsonArray, description: Parameters that specify Amazon
 | 
						|
    S3 outputs for a processing job., default: '[]'}
 | 
						|
- {name: input_config, type: JsonArray, description: Parameters that specify Amazon
 | 
						|
    S3 inputs for a processing job., default: '[]'}
 | 
						|
- {name: vpc_security_group_ids, type: String, description: 'The VPC security group
 | 
						|
    IDs, in the form sg-xxxxxxxx.', default: ''}
 | 
						|
- {name: vpc_subnets, type: String, description: The ID of the subnets in the VPC
 | 
						|
    to which you want to connect your hpo job., default: ''}
 | 
						|
- name: network_isolation
 | 
						|
  type: Bool
 | 
						|
  description: Isolates the processing container.
 | 
						|
  default: "True"
 | 
						|
- name: traffic_encryption
 | 
						|
  type: Bool
 | 
						|
  description: Encrypts all communications between ML compute instances in distributed
 | 
						|
    training.
 | 
						|
  default: "False"
 | 
						|
outputs:
 | 
						|
- {name: job_name, description: Processing job name.}
 | 
						|
- {name: output_artifacts, description: A dictionary containing the output S3 artifacts.}
 | 
						|
implementation:
 | 
						|
  container:
 | 
						|
    image: amazon/aws-sagemaker-kfp-components:1.1.0
 | 
						|
    command: [python3]
 | 
						|
    args:
 | 
						|
    - process/src/sagemaker_process_component.py
 | 
						|
    - --region
 | 
						|
    - {inputValue: region}
 | 
						|
    - --endpoint_url
 | 
						|
    - {inputValue: endpoint_url}
 | 
						|
    - --assume_role
 | 
						|
    - {inputValue: assume_role}
 | 
						|
    - --tags
 | 
						|
    - {inputValue: tags}
 | 
						|
    - --job_name
 | 
						|
    - {inputValue: job_name}
 | 
						|
    - --role
 | 
						|
    - {inputValue: role}
 | 
						|
    - --image
 | 
						|
    - {inputValue: image}
 | 
						|
    - --instance_type
 | 
						|
    - {inputValue: instance_type}
 | 
						|
    - --instance_count
 | 
						|
    - {inputValue: instance_count}
 | 
						|
    - --volume_size
 | 
						|
    - {inputValue: volume_size}
 | 
						|
    - --resource_encryption_key
 | 
						|
    - {inputValue: resource_encryption_key}
 | 
						|
    - --output_encryption_key
 | 
						|
    - {inputValue: output_encryption_key}
 | 
						|
    - --max_run_time
 | 
						|
    - {inputValue: max_run_time}
 | 
						|
    - --environment
 | 
						|
    - {inputValue: environment}
 | 
						|
    - --container_entrypoint
 | 
						|
    - {inputValue: container_entrypoint}
 | 
						|
    - --container_arguments
 | 
						|
    - {inputValue: container_arguments}
 | 
						|
    - --output_config
 | 
						|
    - {inputValue: output_config}
 | 
						|
    - --input_config
 | 
						|
    - {inputValue: input_config}
 | 
						|
    - --vpc_security_group_ids
 | 
						|
    - {inputValue: vpc_security_group_ids}
 | 
						|
    - --vpc_subnets
 | 
						|
    - {inputValue: vpc_subnets}
 | 
						|
    - --network_isolation
 | 
						|
    - {inputValue: network_isolation}
 | 
						|
    - --traffic_encryption
 | 
						|
    - {inputValue: traffic_encryption}
 | 
						|
    - --job_name_output_path
 | 
						|
    - {outputPath: job_name}
 | 
						|
    - --output_artifacts_output_path
 | 
						|
    - {outputPath: output_artifacts}
 |