154 lines
7.0 KiB
YAML
154 lines
7.0 KiB
YAML
name: SageMaker - Ground Truth
|
|
description: Ground Truth Jobs in SageMaker
|
|
inputs:
|
|
- {name: region, type: String, description: The region for the SageMaker resource.}
|
|
- {name: endpoint_url, type: String, description: The URL to use when communicating
|
|
with the SageMaker service., default: ''}
|
|
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
|
|
when connecting to SageMaker., default: ''}
|
|
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
|
|
AWS resources.', default: '{}'}
|
|
- {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon
|
|
SageMaker assumes to perform tasks on your behalf.}
|
|
- {name: job_name, type: String, description: The name of the labeling job., default: ''}
|
|
- {name: label_attribute_name, type: String, description: The attribute name to use
|
|
for the label in the output manifest file. Default is the job name., default: ''}
|
|
- {name: manifest_location, type: String, description: The Amazon S3 location of the
|
|
manifest file that describes the input data objects.}
|
|
- {name: output_location, type: String, description: The Amazon S3 location to write
|
|
output data.}
|
|
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
|
|
SageMaker uses to encrypt the model artifacts., default: ''}
|
|
- {name: task_type, type: String, description: 'Built in image classification, bounding
|
|
box, text classification, or semantic segmentation, or custom. If custom, please
|
|
provide pre- and post-labeling task lambda functions.'}
|
|
- {name: worker_type, type: String, description: 'The workteam for data labeling,
|
|
either public, private, or vendor.'}
|
|
- {name: workteam_arn, type: String, description: The ARN of the work team assigned
|
|
to complete the tasks., default: ''}
|
|
- name: no_adult_content
|
|
type: Bool
|
|
description: If true, your data is free of adult content.
|
|
default: "False"
|
|
- name: no_ppi
|
|
type: Bool
|
|
description: If true, your data is free of personally identifiable information.
|
|
default: "False"
|
|
- {name: label_category_config, type: String, description: The S3 URL of the JSON
|
|
structured file that defines the categories used to label the data objects., default: ''}
|
|
- {name: max_human_labeled_objects, type: Integer, description: The maximum number
|
|
of objects that can be labeled by human workers., default: '0'}
|
|
- {name: max_percent_objects, type: Integer, description: The maximum percentatge
|
|
of input data objects that should be labeled., default: '0'}
|
|
- name: enable_auto_labeling
|
|
type: Bool
|
|
description: Enables auto-labeling, only for bounding box, text classification,
|
|
and image classification.
|
|
default: "False"
|
|
- {name: initial_model_arn, type: String, description: The ARN of the final model
|
|
used for a previous auto-labeling job., default: ''}
|
|
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
|
|
Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
|
|
compute instance(s)., default: ''}
|
|
- {name: ui_template, type: String, description: The Amazon S3 bucket location of
|
|
the UI template.}
|
|
- {name: pre_human_task_function, type: String, description: The ARN of a Lambda function
|
|
that is run before a data object is sent to a human worker., default: ''}
|
|
- {name: post_human_task_function, type: String, description: The ARN of a Lambda
|
|
function implements the logic for annotation consolidation., default: ''}
|
|
- {name: task_keywords, type: String, description: Keywords used to describe the task
|
|
so that workers on Amazon Mechanical Turk can discover the task., default: ''}
|
|
- {name: title, type: String, description: A title for the task for your human workers.}
|
|
- {name: description, type: String, description: A description of the task for your
|
|
human workers.}
|
|
- {name: num_workers_per_object, type: Integer, description: The number of human workers
|
|
that will label an object.}
|
|
- {name: time_limit, type: Integer, description: The amount of time that a worker
|
|
has to complete a task in seconds}
|
|
- {name: task_availibility, type: Integer, description: The length of time that a
|
|
task remains available for labelling by human workers., default: '0'}
|
|
- {name: max_concurrent_tasks, type: Integer, description: The maximum number of data
|
|
objects that can be labeled by human workers at the same time., default: '0'}
|
|
- {name: workforce_task_price, type: String, description: The price that you pay for
|
|
each task performed by a public worker in USD. Specify to the tenth fractions
|
|
of a cent. Format as '0.000'., default: '0.000'}
|
|
outputs:
|
|
- {name: output_manifest_location, description: The Amazon S3 bucket location of the
|
|
manifest file for labeled data.}
|
|
- {name: active_learning_model_arn, description: The ARN for the most recent Amazon
|
|
SageMaker model trained as part of automated data labeling.}
|
|
implementation:
|
|
container:
|
|
image: amazon/aws-sagemaker-kfp-components:1.1.1
|
|
command: [python3]
|
|
args:
|
|
- ground_truth/src/sagemaker_ground_truth_component.py
|
|
- --region
|
|
- {inputValue: region}
|
|
- --endpoint_url
|
|
- {inputValue: endpoint_url}
|
|
- --assume_role
|
|
- {inputValue: assume_role}
|
|
- --tags
|
|
- {inputValue: tags}
|
|
- --role
|
|
- {inputValue: role}
|
|
- --job_name
|
|
- {inputValue: job_name}
|
|
- --label_attribute_name
|
|
- {inputValue: label_attribute_name}
|
|
- --manifest_location
|
|
- {inputValue: manifest_location}
|
|
- --output_location
|
|
- {inputValue: output_location}
|
|
- --output_encryption_key
|
|
- {inputValue: output_encryption_key}
|
|
- --task_type
|
|
- {inputValue: task_type}
|
|
- --worker_type
|
|
- {inputValue: worker_type}
|
|
- --workteam_arn
|
|
- {inputValue: workteam_arn}
|
|
- --no_adult_content
|
|
- {inputValue: no_adult_content}
|
|
- --no_ppi
|
|
- {inputValue: no_ppi}
|
|
- --label_category_config
|
|
- {inputValue: label_category_config}
|
|
- --max_human_labeled_objects
|
|
- {inputValue: max_human_labeled_objects}
|
|
- --max_percent_objects
|
|
- {inputValue: max_percent_objects}
|
|
- --enable_auto_labeling
|
|
- {inputValue: enable_auto_labeling}
|
|
- --initial_model_arn
|
|
- {inputValue: initial_model_arn}
|
|
- --resource_encryption_key
|
|
- {inputValue: resource_encryption_key}
|
|
- --ui_template
|
|
- {inputValue: ui_template}
|
|
- --pre_human_task_function
|
|
- {inputValue: pre_human_task_function}
|
|
- --post_human_task_function
|
|
- {inputValue: post_human_task_function}
|
|
- --task_keywords
|
|
- {inputValue: task_keywords}
|
|
- --title
|
|
- {inputValue: title}
|
|
- --description
|
|
- {inputValue: description}
|
|
- --num_workers_per_object
|
|
- {inputValue: num_workers_per_object}
|
|
- --time_limit
|
|
- {inputValue: time_limit}
|
|
- --task_availibility
|
|
- {inputValue: task_availibility}
|
|
- --max_concurrent_tasks
|
|
- {inputValue: max_concurrent_tasks}
|
|
- --workforce_task_price
|
|
- {inputValue: workforce_task_price}
|
|
- --output_manifest_location_output_path
|
|
- {outputPath: output_manifest_location}
|
|
- --active_learning_model_arn_output_path
|
|
- {outputPath: active_learning_model_arn}
|