103 lines
4.5 KiB
YAML
103 lines
4.5 KiB
YAML
name: 'SageMaker - Batch Transformation'
|
|
description: |
|
|
Batch Transformation Jobs in SageMaker
|
|
inputs:
|
|
- name: region
|
|
description: 'The region where the cluster launches.'
|
|
- name: job_name
|
|
description: 'The name of the batch transform job.'
|
|
default: ''
|
|
- name: model_name
|
|
description: 'The name of the model that you want to use for the transform job.'
|
|
- name: max_concurrent
|
|
description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.'
|
|
default: '0'
|
|
- name: max_payload
|
|
description: 'The maximum allowed size of the payload, in MB.'
|
|
default: '6'
|
|
- name: batch_strategy
|
|
description: 'The number of records to include in a mini-batch for an HTTP inference request.'
|
|
default: ''
|
|
- name: environment
|
|
description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
|
|
default: '{}'
|
|
- name: input_location
|
|
description: 'The S3 location of the data source that is associated with a channel.'
|
|
- name: data_type
|
|
description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.'
|
|
default: 'S3Prefix'
|
|
- name: content_type
|
|
description: 'The multipurpose internet mail extension (MIME) type of the data.'
|
|
default: ''
|
|
- name: split_type
|
|
description: 'The method to use to split the transform job data files into smaller batches.'
|
|
default: 'None'
|
|
- name: compression_type
|
|
description: 'If the transform data is compressed, the specification of the compression type.'
|
|
default: 'None'
|
|
- name: output_location
|
|
description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.'
|
|
- name: accept
|
|
description: 'The MIME type used to specify the output data.'
|
|
default: ''
|
|
- name: assemble_with
|
|
description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.'
|
|
default: ''
|
|
- name: output_encryption_key
|
|
description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
|
|
default: ''
|
|
- name: input_filter
|
|
description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.'
|
|
default: ''
|
|
- name: output_filter
|
|
description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.'
|
|
default: ''
|
|
- name: join_source
|
|
description: 'Specifies the source of the data to join with the transformed data.'
|
|
default: 'None'
|
|
- name: instance_type
|
|
description: 'The ML compute instance type.'
|
|
default: 'ml.m4.xlarge'
|
|
- name: instance_count
|
|
description: 'The number of ML compute instances to use in each training job.'
|
|
default: '1'
|
|
- name: resource_encryption_key
|
|
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
|
|
default: ''
|
|
- name: tags
|
|
description: 'Key-value pairs to categorize AWS resources.'
|
|
default: '{}'
|
|
outputs:
|
|
- {name: output_location, description: 'S3 URI of the transform job results.'}
|
|
implementation:
|
|
container:
|
|
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
|
command: ['python']
|
|
args: [
|
|
batch_transform.py,
|
|
--region, {inputValue: region},
|
|
--job_name, {inputValue: job_name},
|
|
--model_name, {inputValue: model_name},
|
|
--max_concurrent, {inputValue: max_concurrent},
|
|
--max_payload, {inputValue: max_payload},
|
|
--batch_strategy, {inputValue: batch_strategy},
|
|
--environment, {inputValue: environment},
|
|
--input_location, {inputValue: input_location},
|
|
--data_type, {inputValue: data_type},
|
|
--content_type, {inputValue: content_type},
|
|
--split_type, {inputValue: split_type},
|
|
--compression_type, {inputValue: compression_type},
|
|
--output_location, {inputValue: output_location},
|
|
--accept, {inputValue: accept},
|
|
--assemble_with, {inputValue: assemble_with},
|
|
--output_encryption_key, {inputValue: output_encryption_key},
|
|
--input_filter, {inputValue: input_filter},
|
|
--output_filter, {inputValue: output_filter},
|
|
--join_source, {inputValue: join_source},
|
|
--instance_type, {inputValue: instance_type},
|
|
--instance_count, {inputValue: instance_count},
|
|
--resource_encryption_key, {inputValue: resource_encryption_key},
|
|
--tags, {inputValue: tags},
|
|
--output_location_file, {outputPath: output_location}
|
|
]
|