[AWS SageMaker] Specify component input types (#3683)

* Replace all string types with Python types * Update HPO yaml * Update Batch YAML * Update Deploy YAML * Update GroundTruth YAML * Update Model YAML * Update Train YAML * Update WorkTeam YAML * Updated samples to remove strings * Update to temporary image * Remove unnecessary imports * Update image to newer image * Update components to python3 * Update bool parser type * Remove empty ContentType in samples * Update to temporary image * Update to version 0.3.1 * Update deploy to login * Update deploy load config path * Fix export environment variable in deploy * Fix env name * Update deploy reflow env paths * Add debug config line * Use username and password directly * Updated to 0.3.1 * Update field types to JsonObject and JsonArray
2020-05-11 22:06:21 -07:00 · 2020-05-11 22:06:21 -07:00 · bd8c1ddd38
parent b9aa106bb5
commit bd8c1ddd38
22 changed files with 397 additions and 255 deletions
--- a/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt
+++ b/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt
@ -1,4 +1,4 @@
-** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.0 --
+** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.1 --
 https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker
 Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 ** boto3; version 1.12.33 -- https://github.com/boto/boto3/
--- a/components/aws/sagemaker/batch_transform/component.yaml
+++ b/components/aws/sagemaker/batch_transform/component.yaml
@ -4,78 +4,102 @@ description: |
 inputs:
  - name: region
    description: 'The region where the cluster launches.'
+    type: String
  - name: job_name
    description: 'The name of the batch transform job.'
    default: ''
+    type: String
  - name: model_name
    description: 'The name of the model that you want to use for the transform job.'
+    type: String
  - name: max_concurrent
    description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.'
    default: '0'
+    type: Integer
  - name: max_payload
    description: 'The maximum allowed size of the payload, in MB.'
    default: '6'
+    type: Integer
  - name: batch_strategy
    description: 'The number of records to include in a mini-batch for an HTTP inference request.'
    default: ''
+    type: String
  - name: environment
    description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
    default: '{}'
+    type: JsonObject
  - name: input_location
    description: 'The S3 location of the data source that is associated with a channel.'
+    type: String
  - name: data_type
    description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.'
    default: 'S3Prefix'
+    type: String
  - name: content_type
    description: 'The multipurpose internet mail extension (MIME) type of the data.'
    default: ''
+    type: String
  - name: split_type
    description: 'The method to use to split the transform job data files into smaller batches.'
    default: 'None'
+    type: String
  - name: compression_type
    description: 'If the transform data is compressed, the specification of the compression type.'
    default: 'None'
+    type: String
  - name: output_location
    description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.'
+    type: String
  - name: accept
    description: 'The MIME type used to specify the output data.'
    default: ''
+    type: String
  - name: assemble_with
    description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.'
    default: ''
+    type: String
  - name: output_encryption_key
    description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
    default: ''
+    type: String
  - name: input_filter
    description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.'
    default: ''
+    type: String
  - name: output_filter
    description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.'
    default: ''
+    type: String
  - name: join_source
    description: 'Specifies the source of the data to join with the transformed data.'
    default: 'None'
+    type: String
  - name: instance_type
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: instance_count
    description: 'The number of ML compute instances to use in each training job.'
    default: '1'
+    type: Integer
  - name: resource_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
    default: ''
+    type: String
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: output_location,    description: 'S3 URI of the transform job results.'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      batch_transform.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/batch_transform/src/batch_transform.py
+++ b/components/aws/sagemaker/batch_transform/src/batch_transform.py
@ -26,31 +26,31 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the transform job.', default='')
-  parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the model that you want to use for the transform job.')
-  parser.add_argument('--max_concurrent', type=_utils.str_to_int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0')
-  parser.add_argument('--max_payload', type=_utils.str_to_int, required=False, help='The maximum allowed size of the payload, in MB.', default='6')
-  parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str.strip, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='')
-  parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}')
-  parser.add_argument('--input_location', type=str.strip, required=True, help='The S3 location of the data source that is associated with a channel.')
-  parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str.strip, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix')
-  parser.add_argument('--content_type', type=str.strip, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='')
-  parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str.strip, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None')
-  parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str.strip, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
-  parser.add_argument('--accept', type=str.strip, required=False, help='The MIME type used to specify the output data.')
-  parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str.strip, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--input_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
-  parser.add_argument('--output_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
-  parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str.strip, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the transform job.', default='')
+  parser.add_argument('--model_name', type=str, required=True, help='The name of the model that you want to use for the transform job.')
+  parser.add_argument('--max_concurrent', type=int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0')
+  parser.add_argument('--max_payload', type=int, required=False, help='The maximum allowed size of the payload, in MB.', default='6')
+  parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='')
+  parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
+  parser.add_argument('--input_location', type=str, required=True, help='The S3 location of the data source that is associated with a channel.')
+  parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix')
+  parser.add_argument('--content_type', type=str, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='')
+  parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None')
+  parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
+  parser.add_argument('--accept', type=str, required=False, help='The MIME type used to specify the output data.')
+  parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
+  parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
+  parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
  parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in the transform job.')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
-  parser.add_argument('--output_location_file', type=str.strip, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
+  parser.add_argument('--output_location_file', type=str, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.')

  return parser

--- a/components/aws/sagemaker/codebuild/deploy.buildspec.yml
+++ b/components/aws/sagemaker/codebuild/deploy.buildspec.yml
@ -1,10 +1,10 @@
-version: 0.2      
+version: 0.2
+
 phases:
  pre_build:
    commands:
      # Log in to Dockerhub
-      - mkdir -p ~/.docker
-      - echo $DOCKER_CONFIG > ~/.docker/config.json
+      - docker login -u $DOCKER_CONFIG_USERNAME -p $DOCKER_CONFIG_PASSWORD

  build:
    commands:
--- a/components/aws/sagemaker/codebuild/scripts/deploy.sh
+++ b/components/aws/sagemaker/codebuild/scripts/deploy.sh
@ -5,6 +5,7 @@ set -e
 REMOTE_REPOSITORY="amazon/aws-sagemaker-kfp-components"
 DRYRUN="true"
 FULL_VERSION_TAG=""
+DOCKER_CONFIG_PATH=${DOCKER_CONFIG_PATH:-"/root/.docker"}

 while getopts ":d:v:" opt; do
 	case ${opt} in
@ -64,13 +65,13 @@ echo "Tagged image with ${MAJOR_VERSION_IMAGE}"

 # Push to the remote repository
 if [ "${DRYRUN}" == "false" ]; then
-  docker push "${FULL_VERSION_IMAGE}"
+  docker --config "$DOCKER_CONFIG_PATH" push "${FULL_VERSION_IMAGE}"
  echo "Successfully pushed tag ${FULL_VERSION_IMAGE} to Docker Hub"

-	docker push "${MINOR_VERSION_IMAGE}"
+	docker --config "$DOCKER_CONFIG_PATH" push "${MINOR_VERSION_IMAGE}"
  echo "Successfully pushed tag ${MINOR_VERSION_IMAGE} to Docker Hub"

-	docker push "${MAJOR_VERSION_IMAGE}"
+	docker --config "$DOCKER_CONFIG_PATH" push "${MAJOR_VERSION_IMAGE}"
  echo "Successfully pushed tag ${MAJOR_VERSION_IMAGE} to Docker Hub"
 else
  echo "Dry run detected. Not pushing images."
--- a/components/aws/sagemaker/common/_utils.py
+++ b/components/aws/sagemaker/common/_utils.py
@ -13,6 +13,7 @@
 import os
 import argparse
 from time import gmtime, strftime
+from distutils.util import strtobool
 import time
 import string
 import random
@ -63,7 +64,7 @@ def nullable_string_argument(value):


 def add_default_client_arguments(parser):
-    parser.add_argument('--region', type=str.strip, required=True, help='The region where the training job launches.')
+    parser.add_argument('--region', type=str, required=True, help='The region where the training job launches.')
    parser.add_argument('--endpoint_url', type=nullable_string_argument, required=False, help='The URL to use when communicating with the Sagemaker service.')


@ -71,7 +72,7 @@ def get_component_version():
    """Get component version from the first line of License file"""
    component_version = 'NULL'

-    with open('/THIRD-PARTY-LICENSES.txt', 'r') as license_file:
+    with open('THIRD-PARTY-LICENSES.txt', 'r') as license_file:
        version_match = re.search('Amazon SageMaker Components for Kubeflow Pipelines; version (([0-9]+[.])+[0-9]+)',
                        license_file.readline())
        if version_match is not None:
@ -858,35 +859,15 @@ def enable_spot_instance_support(training_job_config, args):
 def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
  return ''.join(random.choice(chars) for _ in range(size))

+def yaml_or_json_str(str):
+  if str == "" or str == None:
+    return None
+  try:
+    return json.loads(str)
+  except:
+    return yaml.safe_load(str)

-def str_to_bool(s):
-  if s.lower().strip() == 'true':
-    return True
-  elif s.lower().strip() == 'false':
-    return False
-  else:
-    raise argparse.ArgumentTypeError('"True" or "False" expected.')
-
-def str_to_int(s):
-  if s:
-    return int(s)
-  else:
-    return 0
-
-def str_to_float(s):
-  if s:
-    return float(s)
-  else:
-    return 0.0
-
-def str_to_json_dict(s):
-  if s != '':
-      return json.loads(s)
-  else:
-      return {}
-
-def str_to_json_list(s):
-  if s != '':
-      return json.loads(s)
-  else:
-      return []
+def str_to_bool(str):
+    # This distutils function returns an integer representation of the boolean
+    # rather than a True/False value. This simply hard casts it.
+    return bool(strtobool(str))
--- a/components/aws/sagemaker/deploy/component.yaml
+++ b/components/aws/sagemaker/deploy/component.yaml
@ -4,83 +4,108 @@ description: |
 inputs:
  - name: region
    description: 'The region to deploy your model endpoints.'
+    type: String
  - name: endpoint_config_name
    description: 'The name of the endpoint configuration.'
    default: ''
+    type: String
  - name: variant_name_1
    description: 'The name of the production variant.'
    default: 'variant-name-1'
+    type: String
  - name: model_name_1
    description: 'The model name used for endpoint deployment.'
+    type: String
  - name: initial_instance_count_1
    description: 'Number of instances to launch initially.'
    default: '1'
+    type: Integer
  - name: instance_type_1
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: initial_variant_weight_1
    description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
    default: '1.0'
+    type: Float
  - name: accelerator_type_1
    description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
    default: ''
+    type: String
  - name: variant_name_2
    description: 'The name of the production variant.'
    default: 'variant-name-2'
+    type: String
  - name: model_name_2
    description: 'The model name used for endpoint deployment.'
    default: ''
+    type: String
  - name: initial_instance_count_2
    description: 'Number of instances to launch initially.'
    default: '1'
+    type: Integer
  - name: instance_type_2
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: initial_variant_weight_2
    description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
    default: '1.0'
+    type: Float
  - name: accelerator_type_2
    description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
    default: ''
+    type: String
  - name: variant_name_3
    description: 'The name of the production variant.'
    default: 'variant-name-3'
+    type: String
  - name: model_name_3
    description: 'The model name used for endpoint deployment'
    default: ''
+    type: String
  - name: initial_instance_count_3
    description: 'Number of instances to launch initially.'
    default: '1'
+    type: Integer
  - name: instance_type_3
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: initial_variant_weight_3
    description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
    default: '1.0'
+    type: Float
  - name: accelerator_type_3
    description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
    default: ''
+    type: String
  - name: resource_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.'
    default: ''
+    type: String
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: endpoint_config_tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
  - name: endpoint_name
    description: 'The name of the endpoint.'
    default: ''
+    type: String
  - name: endpoint_tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: endpoint_name,          description: 'Endpoint name'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      deploy.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/deploy/src/deploy.py
+++ b/components/aws/sagemaker/deploy/src/deploy.py
@ -19,36 +19,36 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Training Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--endpoint_config_name', type=str.strip, required=False, help='The name of the endpoint configuration.', default='')
-  parser.add_argument('--variant_name_1', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-1')
-  parser.add_argument('--model_name_1', type=str.strip, required=True, help='The model name used for endpoint deployment.')
-  parser.add_argument('--initial_instance_count_1', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+  parser.add_argument('--endpoint_config_name', type=str, required=False, help='The name of the endpoint configuration.', default='')
+  parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1')
+  parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.')
+  parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1)
  parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_1', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--variant_name_2', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-2')
-  parser.add_argument('--model_name_2', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='')
-  parser.add_argument('--initial_instance_count_2', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2')
+  parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='')
+  parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1)
  parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_2', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--variant_name_3', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-3')
-  parser.add_argument('--model_name_3', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='')
-  parser.add_argument('--initial_instance_count_3', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3')
+  parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='')
+  parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1)
  parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_3', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--endpoint_config_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

-  parser.add_argument('--endpoint_name', type=str.strip, required=False, help='The name of the endpoint.', default='')
-  parser.add_argument('--endpoint_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='')
+  parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/components/aws/sagemaker/ground_truth/component.yaml
+++ b/components/aws/sagemaker/ground_truth/component.yaml
@ -4,92 +4,123 @@ description: |
 inputs:
  - name: region
    description: 'The region where the cluster launches.'
+    type: String
  - name: role
    description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
  - name: job_name
    description: 'The name of the labeling job.'
+    type: String
  - name: label_attribute_name
    description: 'The attribute name to use for the label in the output manifest file. Default is the job name.'
    default: ''
+    type: String
  - name: manifest_location
    description: 'The Amazon S3 location of the manifest file that describes the input data objects.'
+    type: String
  - name: output_location
    description: 'The Amazon S3 location to write output data.'
+    type: String
  - name: output_encryption_key
    description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
    default: ''
+    type: String
  - name: task_type
    description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.'
+    type: String
  - name: worker_type
    description: 'The workteam for data labeling, either public, private, or vendor.'
+    type: String
  - name: workteam_arn
    description: 'The ARN of the work team assigned to complete the tasks.'
    default: ''
+    type: String
  - name: no_adult_content
    description: 'If true, your data is free of adult content.'
    default: 'False'
+    type: Bool
  - name: no_ppi
    description: 'If true, your data is free of personally identifiable information.'
    default: 'False'
+    type: Bool
  - name: label_category_config
    description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.'
    default: ''
+    type: String
  - name: max_human_labeled_objects
    description: 'The maximum number of objects that can be labeled by human workers.'
    default: ''
+    type: Integer
  - name: max_percent_objects
    description: 'The maximum number of input data objects that should be labeled.'
    default: ''
+    type: Integer
  - name: enable_auto_labeling
    description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.'
    default: 'False'
+    type: Bool
  - name: initial_model_arn
    description: 'The ARN of the final model used for a previous auto-labeling job.'
    default: ''
+    type: String
  - name: resource_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
    default: ''
+    type: String
  - name: ui_template
    description: 'The Amazon S3 bucket location of the UI template.'
+    type: String
  - name: pre_human_task_function
    description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.'
    default: ''
+    type: String
  - name: post_human_task_function
    description: 'The ARN of a Lambda function implements the logic for annotation consolidation.'
    default: ''
+    type: String
  - name: task_keywords
    description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.'
    default: ''
+    type: String
  - name: title
    description: 'A title for the task for your human workers.'
+    type: String
  - name: description
    description: 'A description of the task for your human workers.'
+    type: String
  - name: num_workers_per_object
    description: 'The number of human workers that will label an object.'
+    type: Integer
  - name: time_limit
    description: 'The amount of time that a worker has to complete a task in seconds'
+    type: Integer
  - name: task_availibility
    description: 'The length of time that a task remains available for labeling by human workers.'
    default: ''
+    type: Integer
  - name: max_concurrent_tasks
    description: 'The maximum number of data objects that can be labeled by human workers at the same time.'
    default: ''
+    type: Integer
  - name: workforce_task_price
    description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".'
    default: '0.000'
+    type: Float
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: output_manifest_location,  description: 'The Amazon S3 bucket location of the manifest file for labeled data.'}
  - {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      ground_truth.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/ground_truth/src/ground_truth.py
+++ b/components/aws/sagemaker/ground_truth/src/ground_truth.py
@ -19,35 +19,35 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.')
-  parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
-  parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
-  parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.')
-  parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--job_name', type=str, required=True, help='The name of the labeling job.')
+  parser.add_argument('--label_attribute_name', type=str, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
+  parser.add_argument('--manifest_location', type=str, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 location to write output data.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--task_type', type=str, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
+  parser.add_argument('--worker_type', type=str, required=True, help='The workteam for data labeling, either public, private, or vendor.')
+  parser.add_argument('--workteam_arn', type=str, required=False, help='The ARN of the work team assigned to complete the tasks.')
  parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False')
  parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False')
-  parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
-  parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
-  parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
+  parser.add_argument('--label_category_config', type=str, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
+  parser.add_argument('--max_human_labeled_objects', type=int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
+  parser.add_argument('--max_percent_objects', type=int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
  parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False)
-  parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.')
-  parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
-  parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
-  parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
-  parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.')
-  parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.')
-  parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.')
-  parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds')
-  parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
-  parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
-  parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--initial_model_arn', type=str, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--ui_template', type=str, required=True, help='The Amazon S3 bucket location of the UI template.')
+  parser.add_argument('--pre_human_task_function', type=str, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
+  parser.add_argument('--post_human_task_function', type=str, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
+  parser.add_argument('--task_keywords', type=str, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
+  parser.add_argument('--title', type=str, required=True, help='A title for the task for your human workers.')
+  parser.add_argument('--description', type=str, required=True, help='A description of the task for your human workers.')
+  parser.add_argument('--num_workers_per_object', type=int, required=True, help='The number of human workers that will label an object.')
+  parser.add_argument('--time_limit', type=int, required=True, help='The amount of time that a worker has to complete a task in seconds')
+  parser.add_argument('--task_availibility', type=int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
+  parser.add_argument('--max_concurrent_tasks', type=int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
+  parser.add_argument('--workforce_task_price', type=float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/components/aws/sagemaker/hyperparameter_tuning/component.yaml
+++ b/components/aws/sagemaker/hyperparameter_tuning/component.yaml
@ -7,101 +7,136 @@ inputs:
  - name: job_name
    description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.'
    default: ''
+    type: String
  - name: role
    description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
  - name: image
    description: 'The registry path of the Docker image that contains the training algorithm.'
    default: ''
+    type: String
  - name: algorithm_name
    description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.'
    default: ''
+    type: String
  - name: training_input_mode
    description: 'The input mode that the algorithm supports. File or Pipe.'
    default: 'File'
+    type: String
  - name: metric_definitions
    description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
    default: '{}'
+    type: JsonObject
  - name: strategy
    description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.'
    default: 'Bayesian'
+    type: String
  - name: metric_name
    description: 'The name of the metric to use for the objective metric.'
+    type: String
  - name: metric_type
    description: 'Whether to minimize or maximize the objective metric.'
+    type: String
  - name: early_stopping_type
    description: 'Whether to use early stopping for training jobs launched by the tuning job.'
    default: 'Off'
+    type: String
  - name: static_parameters
    description: 'The values of hyperparameters that do not change for the tuning job.'
    default: '{}'
+    type: JsonObject
  - name: integer_parameters
    description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.'
    default: '[]'
+    type: JsonArray
  - name: continuous_parameters
    description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.'
    default: '[]'
+    type: JsonObject
  - name: categorical_parameters
    description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.'
    default: '[]'
+    type: JsonArray
  - name: channels
    description: 'A list of dicts specifying the input channels. Must have at least one.'
+    type: JsonArray
  - name: output_location
    description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.'
+    type: String
  - name: output_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
    default: ''
+    type: String
  - name: instance_type
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: instance_count
    description: 'The number of ML compute instances to use in each training job.'
    default: '1'
+    type: Integer
  - name: volume_size
    description: 'The size of the ML storage volume that you want to provision.'
    default: '30'
+    type: Integer
  - name: max_num_jobs
    description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.'
+    type: Integer
  - name: max_parallel_jobs
    description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.'
+    type: Integer
  - name: max_run_time
    description: 'The maximum run time in seconds per training job.'
    default: '86400'
+    type: Integer
  - name: resource_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
    default: ''
+    type: String
  - name: vpc_security_group_ids
    description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
    default: ''
+    type: String
  - name: vpc_subnets
    description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
    default: ''
+    type: String
  - name: network_isolation
    description: 'Isolates the training container.'
    default: 'True'
+    type: Bool
  - name: traffic_encryption
    description: 'Encrypts all communications between ML compute instances in distributed training.'
    default: 'False'
+    type: Bool
  - name: spot_instance
    description: 'Use managed spot training.'
    default: 'False'
+    type: Bool
  - name: max_wait_time
    description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
    default: '86400'
+    type: Integer
  - name: checkpoint_config
    description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
    default: '{}'
+    type: JsonObject
  - name: warm_start_type
    description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"'
    default: ''
+    type: String
  - name: parent_hpo_jobs
    description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.'
    default: ''
+    type: String
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs, to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - name: hpo_job_name
    description: 'The name of the hyper parameter tuning job'
@ -115,8 +150,8 @@ outputs:
    description: 'The registry path of the Docker image that contains the training algorithm'
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      hyperparameter_tuning.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
+++ b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
@ -20,46 +20,46 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
-  parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='')
-  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File')
-  parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}')
-  parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str.strip, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian')
-  parser.add_argument('--metric_name', type=str.strip, required=True, help='The name of the metric to use for the objective metric.')
-  parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str.strip, required=True, help='Whether to minimize or maximize the objective metric.')
-  parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str.strip, required=False, help='Whether to minimize or maximize the objective metric.', default='Off')
-  parser.add_argument('--static_parameters', type=_utils.str_to_json_dict, required=False, help='The values of hyperparameters that do not change for the tuning job.', default='{}')
-  parser.add_argument('--integer_parameters', type=_utils.str_to_json_list, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--continuous_parameters', type=_utils.str_to_json_list, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--categorical_parameters', type=_utils.str_to_json_list, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
+  parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='')
+  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File')
+  parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
+  parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian')
+  parser.add_argument('--metric_name', type=str, required=True, help='The name of the metric to use for the objective metric.')
+  parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str, required=True, help='Whether to minimize or maximize the objective metric.')
+  parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str, required=False, help='Whether to minimize or maximize the objective metric.', default='Off')
+  parser.add_argument('--static_parameters', type=_utils.yaml_or_json_str, required=False, help='The values of hyperparameters that do not change for the tuning job.', default={})
+  parser.add_argument('--integer_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default=[])
+  parser.add_argument('--continuous_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default=[])
+  parser.add_argument('--categorical_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default=[])
+  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
  parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
-  parser.add_argument('--volume_size', type=_utils.str_to_int, required=False, help='The size of the ML storage volume that you want to provision.', default=1)
-  parser.add_argument('--max_num_jobs', type=_utils.str_to_int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
-  parser.add_argument('--max_parallel_jobs', type=_utils.str_to_int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.')
-  parser.add_argument('--max_run_time', type=_utils.str_to_int, required=False, help='The maximum run time in seconds per training job.', default=86400)
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
+  parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=1)
+  parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
+  parser.add_argument('--max_parallel_jobs', type=int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.')
+  parser.add_argument('--max_run_time', type=int, required=False, help='The maximum run time in seconds per training job.', default=86400)
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
  parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
  parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)
-  parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str.strip, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"')
-  parser.add_argument('--parent_hpo_jobs', type=str.strip, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='')
+  parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"')
+  parser.add_argument('--parent_hpo_jobs', type=str, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='')

  ### Start spot instance support
  parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
-  parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
-  parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}')
+  parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
+  parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
  ### End spot instance support

-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/components/aws/sagemaker/model/component.yaml
+++ b/components/aws/sagemaker/model/component.yaml
@ -4,49 +4,63 @@ description: |
 inputs:
  - name: region
    description: 'The region where the training job launches.'
+    type: String
  - name: model_name
    description: 'The name of the new model.'
+    type: String
  - name: role
    description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
  - name: container_host_name
    description: 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.'
    default: ''
+    type: String
  - name: image
    description: 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.'
    default: ''
+    type: String
  - name: model_artifact_url
    description: 'S3 path where Amazon SageMaker to store the model artifacts.'
    default: ''
+    type: String
  - name: environment
    description: 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
    default: '{}'
+    type: JsonObject
  - name: model_package
    description: 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.'
    default: ''
+    type: String
  - name: secondary_containers
    description: 'A list of dicts that specifies the additional containers in the inference pipeline.'
    default: '[]'
+    type: JsonArray
  - name: vpc_security_group_ids
    description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
    default: ''
+    type: String
  - name: vpc_subnets
    description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
    default: ''
+    type: String
  - name: network_isolation
    description: 'Isolates the training container.'
    default: 'True'
+    type: Bool
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: model_name,          description: 'The model name Sagemaker created'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      create_model.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/model/src/create_model.py
+++ b/components/aws/sagemaker/model/src/create_model.py
@ -19,18 +19,18 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Training Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the new model.')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--container_host_name', type=str.strip, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='')
-  parser.add_argument('--image', type=str.strip, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='')
-  parser.add_argument('--model_artifact_url', type=str.strip, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='')
-  parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}')
-  parser.add_argument('--model_package', type=str.strip, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='')
-  parser.add_argument('--secondary_containers', type=_utils.str_to_json_list, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default='{}')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='')
+  parser.add_argument('--model_name', type=str, required=True, help='The name of the new model.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--container_host_name', type=str, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='')
+  parser.add_argument('--image', type=str, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='')
+  parser.add_argument('--model_artifact_url', type=str, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='')
+  parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
+  parser.add_argument('--model_package', type=str, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='')
+  parser.add_argument('--secondary_containers', type=_utils.yaml_or_json_str, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default={})
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='')
  parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/components/aws/sagemaker/train/component.yaml
+++ b/components/aws/sagemaker/train/component.yaml
@ -4,83 +4,108 @@ description: |
 inputs:
  - name: region
    description: 'The region where the training job launches.'
+    type: String
  - name: job_name
    description: 'The name of the batch training job.'
    default: ''
+    type: String
  - name: role
    description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
  - name: image
    description: 'The registry path of the Docker image that contains the training algorithm.'
    default: ''
+    type: String
  - name: algorithm_name
    description: 'The name of the algorithm resource to use for the training job. Do not specify a value for this if using training image.'
    default: ''
+    type: String
  - name: metric_definitions
    description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
    default: '{}'
+    type: JsonObject
  - name: training_input_mode
    description: 'The input mode that the algorithm supports. File or Pipe.'
    default: 'File'
+    type: String
  - name: hyperparameters
    description: 'Dictionary of hyperparameters for the the algorithm.'
    default: '{}'
+    type: JsonObject
  - name: channels
    description: 'A list of dicts specifying the input channels. Must have at least one.'
+    type: JsonArray
  - name: instance_type
    description: 'The ML compute instance type.'
    default: 'ml.m4.xlarge'
+    type: String
  - name: instance_count
    description: 'The number of ML compute instances to use in each training job.'
    default: '1'
+    type: Integer
  - name: volume_size
    description: 'The size of the ML storage volume that you want to provision.'
    default: '30'
+    type: Integer
  - name: resource_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
    default: ''
+    type: String
  - name: max_run_time
    description: 'The maximum run time in seconds for the training job.'
    default: '86400'
+    type: Integer
  - name: model_artifact_path
    description: 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.'
+    type: String
  - name: output_encryption_key
    description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
    default: ''
+    type: String
  - name: vpc_security_group_ids
    description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
    default: ''
+    type: String
  - name: vpc_subnets
    description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
    default: ''
+    type: String
  - name: network_isolation
    description: 'Isolates the training container.'
    default: 'True'
+    type: Bool
  - name: traffic_encryption
    description: 'Encrypts all communications between ML compute instances in distributed training.'
    default: 'False'
+    type: Bool
  - name: spot_instance
    description: 'Use managed spot training.'
    default: 'False'
+    type: Bool
  - name: max_wait_time
    description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
    default: '86400'
+    type: Integer
  - name: checkpoint_config
    description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
    default: '{}'
+    type: JsonObject
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs, to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: model_artifact_url,    description: 'Model artifacts url'}
  - {name: job_name,              description: 'Training job name'}
  - {name: training_image,        description: 'The registry path of the Docker image that contains the training algorithm'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      train.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/train/src/train.py
+++ b/components/aws/sagemaker/train/src/train.py
@ -19,35 +19,35 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Training Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the training job.', default='')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
-  parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the training job.', default='')
-  parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}')
-  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, help='The input mode that the algorithm supports. File or Pipe.', default='File')
-  parser.add_argument('--hyperparameters', type=_utils.str_to_json_dict, help='Dictionary of hyperparameters for the the algorithm.', default='{}')
-  parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the training job.', default='')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
+  parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the training job.', default='')
+  parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
+  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File')
+  parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={})
+  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
  parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', required=True, type=_utils.str_to_int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
-  parser.add_argument('--volume_size', type=_utils.str_to_int, required=True, help='The size of the ML storage volume that you want to provision.', default=1)
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--max_run_time', type=_utils.str_to_int, required=True, help='The maximum run time in seconds for the training job.', default=86400)
-  parser.add_argument('--model_artifact_path', type=str.strip, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
+  parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=1)
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--max_run_time', type=int, required=True, help='The maximum run time in seconds for the training job.', default=86400)
+  parser.add_argument('--model_artifact_path', type=str, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
  parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
  parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)

  ### Start spot instance support
  parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
-  parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
-  parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}')
+  parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
+  parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
  ### End spot instance support

-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/components/aws/sagemaker/workteam/component.yaml
+++ b/components/aws/sagemaker/workteam/component.yaml
@ -4,31 +4,40 @@ description: |
 inputs:
  - name: region
    description: 'The region where the cluster launches.'
+    type: String
  - name: team_name
    description: 'The name of your work team.'
+    type: String
  - name: description
    description: 'A description of the work team.'
+    type: String
  - name: user_pool
    description: 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.'
+    type: String
  - name: user_groups
    description: 'An identifier for a user group.'
+    type: String
  - name: client_id
    description: 'An identifier for an application client. You must create the app client ID using Amazon Cognito.'
+    type: String
  - name: sns_topic
    description: 'The ARN for the SNS topic to which notifications should be published.'
    default: ''
+    type: String
  - name: endpoint_url
    description: 'The endpoint URL for the private link VPC endpoint.'
    default: ''
+    type: String
  - name: tags
    description: 'Key-value pairs to categorize AWS resources.'
    default: '{}'
+    type: JsonObject
 outputs:
  - {name: workteam_arn, description: 'The ARN of the workteam.'}
 implementation:
  container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
    args: [
      workteam.py,
      --region, {inputValue: region},
--- a/components/aws/sagemaker/workteam/src/workteam.py
+++ b/components/aws/sagemaker/workteam/src/workteam.py
@ -19,13 +19,13 @@ def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
  _utils.add_default_client_arguments(parser)
  
-  parser.add_argument('--team_name', type=str.strip, required=True, help='The name of your work team.')
-  parser.add_argument('--description', type=str.strip, required=True, help='A description of the work team.')
-  parser.add_argument('--user_pool', type=str.strip, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='')
-  parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='')
-  parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
-  parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--team_name', type=str, required=True, help='The name of your work team.')
+  parser.add_argument('--description', type=str, required=True, help='A description of the work team.')
+  parser.add_argument('--user_pool', type=str, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='')
+  parser.add_argument('--user_groups', type=str, required=False, help='A list of identifiers for user groups separated by commas.', default='')
+  parser.add_argument('--client_id', type=str, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
+  parser.add_argument('--sns_topic', type=str, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

  return parser

--- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
+++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
@ -51,18 +51,18 @@ def ground_truth_test(region='us-west-2',
    ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
    ground_truth_title='Mini image classification',
    ground_truth_description='Test for Ground Truth KFP component',
-    ground_truth_num_workers_per_object='1',
-    ground_truth_time_limit='30',
-    ground_truth_task_availibility='3600',
-    ground_truth_max_concurrent_tasks='20',
+    ground_truth_num_workers_per_object=1,
+    ground_truth_time_limit=30,
+    ground_truth_task_availibility=3600,
+    ground_truth_max_concurrent_tasks=20,
    training_algorithm_name='image classification',
    training_input_mode='Pipe',
-    training_hyperparameters='{"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}',
+    training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"},
    training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
    training_instance_type='ml.p2.xlarge',
-    training_instance_count='1',
-    training_volume_size='50',
-    training_max_run_time='3600',
+    training_instance_count=1,
+    training_volume_size=50,
+    training_max_run_time=3600,
    role_arn=''
    ):

--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
@ -21,7 +21,6 @@ channelObj = {
            'S3DataDistributionType': 'FullyReplicated'
        }
    },
-    'ContentType': '',
    'CompressionType': 'None',
    'RecordWrapperType': 'None',
    'InputMode': 'File'
@ -44,37 +43,37 @@ def hpo_test(region='us-west-2',
    image='',
    algorithm_name='K-Means',
    training_input_mode='File',
-    metric_definitions='{}',
+    metric_definitions={},
    strategy='Bayesian',
    metric_name='test:msd',
    metric_type='Minimize',
    early_stopping_type='Off',
-    static_parameters='{"k": "10", "feature_dim": "784"}',
-    integer_parameters='[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
-                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
-    continuous_parameters='[]',
-    categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
-    channels=json.dumps(channelObjList),
+    static_parameters={"k": "10", "feature_dim": "784"},
+    integer_parameters=[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
+                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
+    continuous_parameters=[],
+    categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
+    channels=channelObjList,
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
    instance_type='ml.p2.16xlarge',
-    instance_count='1',
-    volume_size='50',
-    max_num_jobs='1',
-    max_parallel_jobs='1',
+    instance_count=1,
+    volume_size=50,
+    max_num_jobs=1,
+    max_parallel_jobs=1,
    resource_encryption_key='',
-    max_run_time='3600',
+    max_run_time=3600,
    vpc_security_group_ids='',
    vpc_subnets='',
    endpoint_url='',
-    network_isolation='True',
-    traffic_encryption='False',
+    network_isolation=True,
+    traffic_encryption=False,
    warm_start_type='',
    parent_hpo_jobs='',
-    spot_instance='False',
-    max_wait_time='3600',
-    checkpoint_config='{}',
-    tags='{}',
+    spot_instance=False,
+    max_wait_time=3600,
+    checkpoint_config={},
+    tags={},
    role_arn='',
    ):

--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
@ -26,7 +26,6 @@ channelObj = {
            'S3DataDistributionType': 'FullyReplicated'
        }
    },
-    'ContentType': '',
    'CompressionType': 'None',
    'RecordWrapperType': 'None',
    'InputMode': 'File'
@ -52,37 +51,37 @@ def mnist_classification(region='us-west-2',
    hpo_metric_name='test:msd',
    hpo_metric_type='Minimize',
    hpo_early_stopping_type='Off',
-    hpo_static_parameters='{"k": "10", "feature_dim": "784"}',
-    hpo_integer_parameters='[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
-    hpo_continuous_parameters='[]',
-    hpo_categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
-    hpo_channels=json.dumps(hpoChannels),
-    hpo_spot_instance='False',
-    hpo_max_wait_time='3600',
-    hpo_checkpoint_config='{}',
+    hpo_static_parameters={"k": "10", "feature_dim": "784"},
+    hpo_integer_parameters=[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
+    hpo_continuous_parameters=[],
+    hpo_categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
+    hpo_channels=hpoChannels,
+    hpo_spot_instance=False,
+    hpo_max_wait_time=3600,
+    hpo_checkpoint_config={},
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
    instance_type='ml.p2.16xlarge',
-    instance_count='1',
-    volume_size='50',
-    hpo_max_num_jobs='9',
-    hpo_max_parallel_jobs='3',
-    max_run_time='3600',
+    instance_count=1,
+    volume_size=50,
+    hpo_max_num_jobs=9,
+    hpo_max_parallel_jobs=3,
+    max_run_time=3600,
    endpoint_url='',
-    network_isolation='True',
-    traffic_encryption='False',
-    train_channels=json.dumps(trainChannels),
-    train_spot_instance='False',
-    train_max_wait_time='3600',
-    train_checkpoint_config='{}',
+    network_isolation=True,
+    traffic_encryption=False,
+    train_channels=trainChannels,
+    train_spot_instance=False,
+    train_max_wait_time=3600,
+    train_checkpoint_config={},
    batch_transform_instance_type='ml.m4.xlarge',
    batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
    batch_transform_data_type='S3Prefix',
    batch_transform_content_type='text/csv',
    batch_transform_compression_type='None',
    batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
-    batch_transform_max_concurrent='4',
-    batch_transform_max_payload='6',
+    batch_transform_max_concurrent=4,
+    batch_transform_max_payload=6,
    batch_strategy='MultiRecord',
    batch_transform_split_type='Line',
    role_arn=''
--- a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
+++ b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
@ -20,7 +20,6 @@ channelObj = {
            'S3DataDistributionType': 'FullyReplicated'
        }
    },
-    'ContentType': '',
    'CompressionType': 'None',
    'RecordWrapperType': 'None',
    'InputMode': 'File'
@ -40,19 +39,19 @@ def training(
        endpoint_url='',
        image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
        training_input_mode='File',
-        hyperparameters='{"k": "10", "feature_dim": "784"}',
-        channels=json.dumps(channelObjList),
+        hyperparameters={"k": "10", "feature_dim": "784"},
+        channels=channelObjList,
        instance_type='ml.p2.xlarge',
-        instance_count='1',
-        volume_size='50',
-        max_run_time='3600',
+        instance_count=1,
+        volume_size=50,
+        max_run_time=3600,
        model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
        output_encryption_key='',
-        network_isolation='True',
-        traffic_encryption='False',
-        spot_instance='False',
-        max_wait_time='3600',
-        checkpoint_config='{}',
+        network_isolation=True,
+        traffic_encryption=False,
+        spot_instance=False,
+        max_wait_time=3600,
+        checkpoint_config={},
        role=''
        ):
    training = sagemaker_train_op(