115 lines
5.3 KiB
YAML
115 lines
5.3 KiB
YAML
name: SageMaker - Batch Transformation
|
|
description: Batch Transformation Jobs in SageMaker
|
|
inputs:
|
|
- {name: region, type: String, description: The region for the SageMaker resource.}
|
|
- {name: endpoint_url, type: String, description: The URL to use when communicating
|
|
with the SageMaker service., default: ''}
|
|
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
|
|
when connecting to SageMaker., default: ''}
|
|
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
|
|
AWS resources.', default: '{}'}
|
|
- {name: job_name, type: String, description: The name of the transform job., default: ''}
|
|
- {name: model_name, type: String, description: The name of the model that you want
|
|
to use for the transform job.}
|
|
- {name: max_concurrent, type: Integer, description: The maximum number of parallel
|
|
requests that can be sent to each instance in a transform job., default: '0'}
|
|
- {name: max_payload, type: Integer, description: 'The maximum allowed size of the
|
|
payload, in MB.', default: '6'}
|
|
- {name: batch_strategy, type: String, description: The number of records to include
|
|
in a mini-batch for an HTTP inference request., default: ''}
|
|
- {name: environment, type: JsonObject, description: The dictionary of the environment
|
|
variables to set in the Docker container. Up to 16 key-value entries in the map.,
|
|
default: '{}'}
|
|
- {name: input_location, type: String, description: The S3 location of the data source
|
|
that is associated with a channel.}
|
|
- {name: data_type, type: String, description: 'Data type of the input. Can be ManifestFile,
|
|
S3Prefix, or AugmentedManifestFile.', default: S3Prefix}
|
|
- {name: content_type, type: String, description: The multipurpose internet mail extension
|
|
(MIME) type of the data., default: ''}
|
|
- {name: split_type, type: String, description: The method to use to split the transform
|
|
job data files into smaller batches., default: None}
|
|
- {name: compression_type, type: String, description: 'If the transform data is compressed,
|
|
the specification of the compression type.', default: None}
|
|
- {name: output_location, type: String, description: The Amazon S3 path where you
|
|
want Amazon SageMaker to store the results of the transform job.}
|
|
- {name: accept, type: String, description: The MIME type used to specify the output
|
|
data., default: ''}
|
|
- {name: assemble_with, type: String, description: Defines how to assemble the results
|
|
of the transform job as a single S3 object. Either None or Line., default: ''}
|
|
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
|
|
SageMaker uses to encrypt the model artifacts., default: ''}
|
|
- {name: input_filter, type: String, description: A JSONPath expression used to select
|
|
a portion of the input data to pass to the algorithm., default: ''}
|
|
- {name: output_filter, type: String, description: A JSONPath expression used to select
|
|
a portion of the joined dataset to save in the output file for a batch transform
|
|
job., default: ''}
|
|
- {name: join_source, type: String, description: Specifies the source of the data
|
|
to join with the transformed data., default: None}
|
|
- {name: instance_type, type: String, description: The ML compute instance type for
|
|
the transform job., default: ml.m4.xlarge}
|
|
- {name: instance_count, type: Integer, description: The number of ML compute instances
|
|
to use in the transform job., default: '1'}
|
|
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
|
|
Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
|
|
compute instance(s)., default: ''}
|
|
outputs:
|
|
- {name: output_location, description: S3 URI of the transform job results.}
|
|
implementation:
|
|
container:
|
|
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:1.1.2
|
|
command: [python3]
|
|
args:
|
|
- batch_transform/src/sagemaker_transform_component.py
|
|
- --region
|
|
- {inputValue: region}
|
|
- --endpoint_url
|
|
- {inputValue: endpoint_url}
|
|
- --assume_role
|
|
- {inputValue: assume_role}
|
|
- --tags
|
|
- {inputValue: tags}
|
|
- --job_name
|
|
- {inputValue: job_name}
|
|
- --model_name
|
|
- {inputValue: model_name}
|
|
- --max_concurrent
|
|
- {inputValue: max_concurrent}
|
|
- --max_payload
|
|
- {inputValue: max_payload}
|
|
- --batch_strategy
|
|
- {inputValue: batch_strategy}
|
|
- --environment
|
|
- {inputValue: environment}
|
|
- --input_location
|
|
- {inputValue: input_location}
|
|
- --data_type
|
|
- {inputValue: data_type}
|
|
- --content_type
|
|
- {inputValue: content_type}
|
|
- --split_type
|
|
- {inputValue: split_type}
|
|
- --compression_type
|
|
- {inputValue: compression_type}
|
|
- --output_location
|
|
- {inputValue: output_location}
|
|
- --accept
|
|
- {inputValue: accept}
|
|
- --assemble_with
|
|
- {inputValue: assemble_with}
|
|
- --output_encryption_key
|
|
- {inputValue: output_encryption_key}
|
|
- --input_filter
|
|
- {inputValue: input_filter}
|
|
- --output_filter
|
|
- {inputValue: output_filter}
|
|
- --join_source
|
|
- {inputValue: join_source}
|
|
- --instance_type
|
|
- {inputValue: instance_type}
|
|
- --instance_count
|
|
- {inputValue: instance_count}
|
|
- --resource_encryption_key
|
|
- {inputValue: resource_encryption_key}
|
|
- --output_location_output_path
|
|
- {outputPath: output_location}
|