pipelines/components/aws/sagemaker/ground_truth/component.yaml

161 lines
6.8 KiB
YAML

name: 'SageMaker - Ground Truth'
description: |
Ground Truth Jobs in SageMaker
inputs:
- name: region
description: 'The region where the cluster launches.'
type: String
- name: role
description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
type: String
- name: job_name
description: 'The name of the labeling job.'
type: String
- name: label_attribute_name
description: 'The attribute name to use for the label in the output manifest file. Default is the job name.'
default: ''
type: String
- name: manifest_location
description: 'The Amazon S3 location of the manifest file that describes the input data objects.'
type: String
- name: output_location
description: 'The Amazon S3 location to write output data.'
type: String
- name: output_encryption_key
description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
default: ''
type: String
- name: task_type
description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.'
type: String
- name: worker_type
description: 'The workteam for data labeling, either public, private, or vendor.'
type: String
- name: workteam_arn
description: 'The ARN of the work team assigned to complete the tasks.'
default: ''
type: String
- name: no_adult_content
description: 'If true, your data is free of adult content.'
default: 'False'
type: Bool
- name: no_ppi
description: 'If true, your data is free of personally identifiable information.'
default: 'False'
type: Bool
- name: label_category_config
description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.'
default: ''
type: String
- name: max_human_labeled_objects
description: 'The maximum number of objects that can be labeled by human workers.'
default: '0'
type: Integer
- name: max_percent_objects
description: 'The maximum number of input data objects that should be labeled.'
default: '0'
type: Integer
- name: enable_auto_labeling
description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.'
default: 'False'
type: Bool
- name: initial_model_arn
description: 'The ARN of the final model used for a previous auto-labeling job.'
default: ''
type: String
- name: resource_encryption_key
description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
default: ''
type: String
- name: ui_template
description: 'The Amazon S3 bucket location of the UI template.'
type: String
- name: pre_human_task_function
description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.'
default: ''
type: String
- name: post_human_task_function
description: 'The ARN of a Lambda function implements the logic for annotation consolidation.'
default: ''
type: String
- name: task_keywords
description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.'
default: ''
type: String
- name: title
description: 'A title for the task for your human workers.'
type: String
- name: description
description: 'A description of the task for your human workers.'
type: String
- name: num_workers_per_object
description: 'The number of human workers that will label an object.'
type: Integer
- name: time_limit
description: 'The amount of time that a worker has to complete a task in seconds'
type: Integer
- name: task_availibility
description: 'The length of time that a task remains available for labeling by human workers.'
default: ''
type: Integer
- name: max_concurrent_tasks
description: 'The maximum number of data objects that can be labeled by human workers at the same time.'
default: ''
type: Integer
- name: workforce_task_price
description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".'
default: '0.000'
type: Float
- name: endpoint_url
description: 'The endpoint URL for the private link VPC endpoint.'
default: ''
type: String
- name: tags
description: 'Key-value pairs to categorize AWS resources.'
default: '{}'
type: JsonObject
outputs:
- {name: output_manifest_location, description: 'The Amazon S3 bucket location of the manifest file for labeled data.'}
- {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
implementation:
container:
image: amazon/aws-sagemaker-kfp-components:0.4.0
command: ['python3']
args: [
ground_truth.py,
--region, {inputValue: region},
--endpoint_url, {inputValue: endpoint_url},
--role, {inputValue: role},
--job_name, {inputValue: job_name},
--label_attribute_name, {inputValue: label_attribute_name},
--manifest_location, {inputValue: manifest_location},
--output_location, {inputValue: output_location},
--output_encryption_key, {inputValue: output_encryption_key},
--task_type, {inputValue: task_type},
--worker_type, {inputValue: worker_type},
--workteam_arn, {inputValue: workteam_arn},
--no_adult_content, {inputValue: no_adult_content},
--no_ppi, {inputValue: no_ppi},
--label_category_config, {inputValue: label_category_config},
--max_human_labeled_objects, {inputValue: max_human_labeled_objects},
--max_percent_objects, {inputValue: max_percent_objects},
--enable_auto_labeling, {inputValue: enable_auto_labeling},
--initial_model_arn, {inputValue: initial_model_arn},
--resource_encryption_key, {inputValue: resource_encryption_key},
--ui_template, {inputValue: ui_template},
--pre_human_task_function, {inputValue: pre_human_task_function},
--post_human_task_function, {inputValue: post_human_task_function},
--task_keywords, {inputValue: task_keywords},
--title, {inputValue: title},
--description, {inputValue: description},
--num_workers_per_object, {inputValue: num_workers_per_object},
--time_limit, {inputValue: time_limit},
--task_availibility, {inputValue: task_availibility},
--max_concurrent_tasks, {inputValue: max_concurrent_tasks},
--workforce_task_price, {inputValue: workforce_task_price},
--tags, {inputValue: tags}
]
fileOutputs:
output_manifest_location: /tmp/output_manifest_location.txt
active_learning_model_arn: /tmp/active_learning_model_arn.txt