pipelines/components/aws/sagemaker/ground_truth/component.yaml

name: SageMaker - Ground Truth
description: Ground Truth Jobs in SageMaker
inputs:
- {name: region, type: String, description: The region for the SageMaker resource.}
- {name: endpoint_url, type: String, description: The URL to use when communicating
    with the SageMaker service., default: ''}
- {name: assume_role, type: String, description: The ARN of an IAM role to assume
    when connecting to SageMaker., default: ''}
- {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize
    AWS resources.', default: '{}'}
- {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon
    SageMaker assumes to perform tasks on your behalf.}
- {name: job_name, type: String, description: The name of the labeling job., default: ''}
- {name: label_attribute_name, type: String, description: The attribute name to use
    for the label in the output manifest file. Default is the job name., default: ''}
- {name: manifest_location, type: String, description: The Amazon S3 location of the
    manifest file that describes the input data objects.}
- {name: output_location, type: String, description: The Amazon S3 location to write
    output data.}
- {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon
    SageMaker uses to encrypt the model artifacts., default: ''}
- {name: task_type, type: String, description: 'Built in image classification, bounding
    box, text classification, or semantic segmentation, or custom. If custom, please
    provide pre- and post-labeling task lambda functions.'}
- {name: worker_type, type: String, description: 'The workteam for data labeling,
    either public, private, or vendor.'}
- {name: workteam_arn, type: String, description: The ARN of the work team assigned
    to complete the tasks., default: ''}
- name: no_adult_content
  type: Bool
  description: If true, your data is free of adult content.
  default: "False"
- name: no_ppi
  type: Bool
  description: If true, your data is free of personally identifiable information.
  default: "False"
- {name: label_category_config, type: String, description: The S3 URL of the JSON
    structured file that defines the categories used to label the data objects., default: ''}
- {name: max_human_labeled_objects, type: Integer, description: The maximum number
    of objects that can be labeled by human workers., default: '0'}
- {name: max_percent_objects, type: Integer, description: The maximum percentatge
    of input data objects that should be labeled., default: '0'}
- name: enable_auto_labeling
  type: Bool
  description: Enables auto-labeling, only for bounding box, text classification,
    and image classification.
  default: "False"
- {name: initial_model_arn, type: String, description: The ARN of the final model
    used for a previous auto-labeling job., default: ''}
- {name: resource_encryption_key, type: String, description: The AWS KMS key that
    Amazon SageMaker uses to encrypt data on the storage volume attached to the ML
    compute instance(s)., default: ''}
- {name: ui_template, type: String, description: The Amazon S3 bucket location of
    the UI template.}
- {name: pre_human_task_function, type: String, description: The ARN of a Lambda function
    that is run before a data object is sent to a human worker., default: ''}
- {name: post_human_task_function, type: String, description: The ARN of a Lambda
    function implements the logic for annotation consolidation., default: ''}
- {name: task_keywords, type: String, description: Keywords used to describe the task
    so that workers on Amazon Mechanical Turk can discover the task., default: ''}
- {name: title, type: String, description: A title for the task for your human workers.}
- {name: description, type: String, description: A description of the task for your
    human workers.}
- {name: num_workers_per_object, type: Integer, description: The number of human workers
    that will label an object.}
- {name: time_limit, type: Integer, description: The amount of time that a worker
    has to complete a task in seconds}
- {name: task_availibility, type: Integer, description: The length of time that a
    task remains available for labelling by human workers., default: '0'}
- {name: max_concurrent_tasks, type: Integer, description: The maximum number of data
    objects that can be labeled by human workers at the same time., default: '0'}
- {name: workforce_task_price, type: String, description: The price that you pay for
    each task performed by a public worker in USD. Specify to the tenth fractions
    of a cent. Format as '0.000'., default: '0.000'}
outputs:
- {name: output_manifest_location, description: The Amazon S3 bucket location of the
    manifest file for labeled data.}
- {name: active_learning_model_arn, description: The ARN for the most recent Amazon
    SageMaker model trained as part of automated data labeling.}
implementation:
  container:
    image: amazon/aws-sagemaker-kfp-components:1.1.1
    command: [python3]
    args:
    - ground_truth/src/sagemaker_ground_truth_component.py
    - --region
    - {inputValue: region}
    - --endpoint_url
    - {inputValue: endpoint_url}
    - --assume_role
    - {inputValue: assume_role}
    - --tags
    - {inputValue: tags}
    - --role
    - {inputValue: role}
    - --job_name
    - {inputValue: job_name}
    - --label_attribute_name
    - {inputValue: label_attribute_name}
    - --manifest_location
    - {inputValue: manifest_location}
    - --output_location
    - {inputValue: output_location}
    - --output_encryption_key
    - {inputValue: output_encryption_key}
    - --task_type
    - {inputValue: task_type}
    - --worker_type
    - {inputValue: worker_type}
    - --workteam_arn
    - {inputValue: workteam_arn}
    - --no_adult_content
    - {inputValue: no_adult_content}
    - --no_ppi
    - {inputValue: no_ppi}
    - --label_category_config
    - {inputValue: label_category_config}
    - --max_human_labeled_objects
    - {inputValue: max_human_labeled_objects}
    - --max_percent_objects
    - {inputValue: max_percent_objects}
    - --enable_auto_labeling
    - {inputValue: enable_auto_labeling}
    - --initial_model_arn
    - {inputValue: initial_model_arn}
    - --resource_encryption_key
    - {inputValue: resource_encryption_key}
    - --ui_template
    - {inputValue: ui_template}
    - --pre_human_task_function
    - {inputValue: pre_human_task_function}
    - --post_human_task_function
    - {inputValue: post_human_task_function}
    - --task_keywords
    - {inputValue: task_keywords}
    - --title
    - {inputValue: title}
    - --description
    - {inputValue: description}
    - --num_workers_per_object
    - {inputValue: num_workers_per_object}
    - --time_limit
    - {inputValue: time_limit}
    - --task_availibility
    - {inputValue: task_availibility}
    - --max_concurrent_tasks
    - {inputValue: max_concurrent_tasks}
    - --workforce_task_price
    - {inputValue: workforce_task_price}
    - --output_manifest_location_output_path
    - {outputPath: output_manifest_location}
    - --active_learning_model_arn_output_path
    - {outputPath: active_learning_model_arn}