AWS SageMaker : Use IAM Roles for Service Account (#3719)

* don't use aws-secret and update readme for sample pipelines * Addressed comments on PR and few more readme changes * small changes to readme * nit change * Address comments
2020-05-21 10:36:14 -07:00 · 2020-05-21 10:36:14 -07:00 · d18ad7a563
parent 291f5b3d7a
commit d18ad7a563
15 changed files with 250 additions and 239 deletions
--- a/components/aws/sagemaker/batch_transform/src/batch_transform.py
+++ b/components/aws/sagemaker/batch_transform/src/batch_transform.py
@ -44,9 +44,7 @@ def create_parser():
  parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
  parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
  parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
-  parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type', type=str, required=False, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.')
  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
--- a/components/aws/sagemaker/deploy/README.md
+++ b/components/aws/sagemaker/deploy/README.md
@ -31,7 +31,7 @@ Argument        | Description                 | Optional (in pipeline definition
 :---            | :----------                 | :----------                       | :----------      | :----------| :----------     | :----------|
 model_name_[1, 3] | The name of the model that you want to host. This is the name that you specified when creating the model | No | No | String | | |
 variant_name_[1, 3] | The name of the production variant | Yes | Yes | String | | variant_name_[1, 3] |
-instance_type_[1, 3] | The ML compute instance type | Yes | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
+instance_type_[1, 3] | The ML compute instance type | Yes | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/)| ml.m4.xlarge |
 initial_instance_count_[1, 3] | Number of instances to launch initially | Yes | Yes | Integer | ≥ 1 | 1 |
 initial_variant_weight_[1, 3] | Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. The traffic to a production variant is determined by the ratio of the VariantWeight to the sum of all VariantWeight values across all ProductionVariants. | Yes | Yes | Float | Minimum value of 0 | |
 accelerator_type_[1, 3] | The size of the Elastic Inference (EI) instance to use for the production variant | Yes | Yes | String| ml.eia1.medium, ml.eia1.large, ml.eia1.xlarge | |
--- a/components/aws/sagemaker/deploy/src/deploy.py
+++ b/components/aws/sagemaker/deploy/src/deploy.py
@ -23,30 +23,23 @@ def create_parser():
  parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1')
  parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.')
  parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1)
-  parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type_1', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
  parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
  parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
  parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2')
  parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='')
  parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1)
-  parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type_2', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
  parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
  parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
  parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3')
  parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='')
  parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1)
-  parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type_3', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
  parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
  parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
  parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
-
  parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='')
  parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

--- a/components/aws/sagemaker/hyperparameter_tuning/README.md
+++ b/components/aws/sagemaker/hyperparameter_tuning/README.md
@ -28,7 +28,7 @@ categorical_parameters | The array of CategoricalParameterRange objects that spe
 channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | No | List of Dicts | | |
 output_location | The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job | No | No | String | | |
 output_encryption_key | The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts | Yes | Yes | String | | |
-instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
+instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/)| ml.m4.xlarge |
 instance_count | The number of ML compute instances to use in each training job | Yes | Yes | Int | ≥ 1 | 1 |
 volume_size | The size of the ML storage volume that you want to provision in GB | Yes | Yes | Int | ≥ 1 | 30 |
 max_num_jobs | The maximum number of training jobs that a hyperparameter tuning job can launch | No | No | Int | [1, 500] | |
--- a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
+++ b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
@ -19,7 +19,7 @@ from common import _utils
 def create_parser():
  parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
  _utils.add_default_client_arguments(parser)
-  
+
  parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
  parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
@ -37,9 +37,7 @@ def create_parser():
  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
  parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=30)
  parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
--- a/components/aws/sagemaker/train/README.md
+++ b/components/aws/sagemaker/train/README.md
@ -20,8 +20,8 @@ algorithm_name | The name of the algorithm resource to use for the hyperparamete
 metric_definitions | The dictionary of name-regex pairs specify the metrics that the algorithm emits | Yes | Dict | | {} |
 put_mode | The input mode that the algorithm supports | No | String | File, Pipe | File |
 hyperparameters  | Hyperparameters for the selected algorithm | No | Dict | [Depends on Algo](https://docs.aws.amazon.com/sagemaker/latest/dg/k-means-api-config.html)| |
-channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | No | List of Dicts | | |
-instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
+channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | List of Dicts | | |
+instance_type | The ML compute instance type | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/) | ml.m4.xlarge |
 instance_count | The number of ML compute instances to use in each training job | Yes | Int | ≥ 1 | 1 |
 volume_size | The size of the ML storage volume that you want to provision in GB | Yes | Int | ≥ 1 | 30 |
 resource_encryption_key | The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s) | Yes | String | | |
@ -42,7 +42,7 @@ tags | Key-value pairs to categorize AWS resources | Yes | Dict | | {} |
 Stores the Model in the s3 bucket you specified 

 # Example code
-Simple example pipeline with only Train component : [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/documents/samples/contrib/aws-samples/simple_train_pipeline)
+Simple example pipeline with only Train component : [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/simple_train_pipeline)

 # Resources
 * [Using Amazon built-in algorithms](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html)
--- a/components/aws/sagemaker/train/src/train.py
+++ b/components/aws/sagemaker/train/src/train.py
@ -28,9 +28,7 @@ def create_parser():
  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File')
  parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={})
  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
-  parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
-    'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_type', required=False, type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
  parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
  parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=30)
  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
--- a/samples/contrib/aws-samples/README.md
+++ b/samples/contrib/aws-samples/README.md
@ -0,0 +1,197 @@
+# Sample AWS SageMaker Kubeflow Pipelines 
+
+This folder contains many example pipelines which use [AWS SageMaker Components for KFP](https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker). The following sections explain the setup needed to run these pipelines. Once you are done with the setup, [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/simple_train_pipeline) is a good place to start if you have never used these components before.
+
+
+
+## Prerequisites 
+
+1. You need a cluster with Kubeflow installed on it. [Install Kubeflow on AWS cluster](https://www.kubeflow.org/docs/aws/deploy/install-kubeflow/)
+2. Install the following on your local machine or EC2 instance (These are recommended tools. Not all of these are required)
+    1. [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html). If you are using an IAM user, configure your [Access Key ID, Secret Access Key](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) and preferred AWS Region by running:
+       `aws configure`  
+    2. [aws-iam-authenticator](https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html) version 0.1.31 and above  
+    3. [eksctl](https://github.com/weaveworks/eksctl) version above 0.15  
+    4. [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl) version needs to be your k8s version +/- 1 minor version.
+    5. [KFP SDK](https://www.kubeflow.org/docs/pipelines/sdk/install-sdk/#install-the-kubeflow-pipelines-sdk) (installs the dsl-compile and kfp cli)
+
+
+## IAM Permissions 
+
+To use AWS KFP Components the KFP component pods need access to AWS SageMaker.
+There are two ways you can give them access to SageMaker. 
+(You need EKS cluster for Option 1)
+
+**Option 1** (Recommended) [IAM roles for service account](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). 
+   1. Enable OIDC support on EKS cluster 
+      ```
+      eksctl utils associate-iam-oidc-provider --cluster <cluster_name> \
+       --region <cluster_region> --approve
+      ```
+   2. Take note of the OIDC issuer URL. This URL is in the form `oidc.eks.<region>.amazonaws.com/id/<OIDC_ID>` . Note down the URL.
+      ```
+      aws eks describe-cluster --name <cluster_name> --query "cluster.identity.oidc.issuer" --output text
+      ```
+   3. Create a file named trust.json with the following content.   
+      Replace `<OIDC_URL>` with your OIDC issuer URL **(Don’t include https://)** and `<AWS_ACCOUNT_NUMBER>` with your AWS account number. 
+      ```
+      # Replace these two with proper values 
+      OIDC_URL="<OIDC_URL>"
+      AWS_ACC_NUM="<AWS_ACCOUNT_NUMBER>"
+      
+      # Run this to create trust.json file
+      cat <<EOF > trust.json
+      {
+        "Version": "2012-10-17",
+        "Statement": [
+          {
+            "Effect": "Allow",
+            "Principal": {
+              "Federated": "arn:aws:iam::$AWS_ACC_NUM:oidc-provider/$OIDC_URL"
+            },
+            "Action": "sts:AssumeRoleWithWebIdentity",
+            "Condition": {
+              "StringEquals": {
+                "$OIDC_URL:aud": "sts.amazonaws.com",
+                "$OIDC_URL:sub": "system:serviceaccount:kubeflow:pipeline-runner"
+              }
+            }
+          }
+        ]
+      }
+      EOF
+      ```
+   4. Create an IAM role using trust.json. Make a note of the ARN returned in the output.
+      ```
+      aws iam create-role --role-name kfp-example-pod-role --assume-role-policy-document file://trust.json
+      aws iam attach-role-policy --role-name kfp-example-pod-role --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
+      aws iam get-role --role-name kfp-example-pod-role --output text --query 'Role.Arn'
+      ```
+   5. Edit your pipeline-runner service account.
+      ```
+      kubectl edit -n kubeflow serviceaccount pipeline-runner
+      ```
+      Add `eks.amazonaws.com/role-arn: <role_arn>` to annotations, then save the file. Example: **(add only line 5)**  
+      ```
+      apiVersion: v1
+      kind: ServiceAccount
+      metadata:
+        annotations:
+          eks.amazonaws.com/role-arn: <role_arn>
+        creationTimestamp: "2020-04-16T05:48:06Z"
+        labels:
+          app: pipeline-runner
+          app.kubernetes.io/component: pipelines-runner
+          app.kubernetes.io/instance: pipelines-runner-0.2.0
+          app.kubernetes.io/managed-by: kfctl
+          app.kubernetes.io/name: pipelines-runner
+          app.kubernetes.io/part-of: kubeflow
+          app.kubernetes.io/version: 0.2.0
+        name: pipeline-runner
+        namespace: kubeflow
+        resourceVersion: "11787"
+        selfLink: /api/v1/namespaces/kubeflow/serviceaccounts/pipeline-runner
+        uid: d86234bd-7fa5-11ea-a8f2-02934be6dc88
+      secrets:
+      - name: pipeline-runner-token-dkjrk
+      ``` 
+**Option 2** Store the IAM credentials as a `aws-secret` in kubernetes cluster. Then use those in the components.
+   1. You need credentials for an IAM user with SageMakerFullAccess. Apply them to k8s cluster.
+      Replace `AWS_ACCESS_KEY_IN_BASE64` and `AWS_SECRET_ACCESS_IN_BASE64`.
+      > Note: To get base64 string you can do `echo -n $AWS_ACCESS_KEY_ID | base64`
+      ```
+      cat <<EOF | kubectl apply -f -
+      apiVersion: v1
+      kind: Secret
+      metadata:
+        name: aws-secret
+        namespace: kubeflow
+      type: Opaque
+      data:
+        AWS_ACCESS_KEY_ID: <AWS_ACCESS_KEY_IN_BASE64>
+        AWS_SECRET_ACCESS_KEY: <AWS_SECRET_ACCESS_IN_BASE64>
+      EOF
+      ```
+   2. Use the stored `aws-secret` in pipeline code by adding this line to each component in your pipeline `.apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))`   
+      [Kubeflow Document](https://www.kubeflow.org/docs/aws/pipeline/)  
+      [Example Code](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py#L76) (uncomment this line)
+
+## Inputs to the pipeline
+
+### Sample MNIST dataset
+
+Use the following python script to copy train_data, test_data, and valid_data to your bucket.  
+[Create a bucket](https://docs.aws.amazon.com/AmazonS3/latest/gsg/CreatingABucket.html) in `us-east-1` region if you don't have one already. 
+For the purposes of this demonstration, all resources will be created in the us-east-1 region.
+
+
+Create a new file named s3_sample_data_creator.py with following content :
+```
+import pickle, gzip, numpy, urllib.request, json
+from urllib.parse import urlparse
+
+###################################################################
+# This is the only thing that you need to change to run this code 
+# Give the name of your S3 bucket 
+bucket = '<bucket-name>' 
+
+# If you are gonna use the default values of the pipeline then 
+# give a bucket name which is in us-east-1 region 
+###################################################################
+
+
+# Load the dataset
+urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
+with gzip.open('mnist.pkl.gz', 'rb') as f:
+    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
+
+
+# Upload dataset to S3
+from sagemaker.amazon.common import write_numpy_to_dense_tensor
+import io
+import boto3
+
+train_data_key = 'mnist_kmeans_example/train_data'
+test_data_key = 'mnist_kmeans_example/test_data'
+train_data_location = 's3://{}/{}'.format(bucket, train_data_key)
+test_data_location = 's3://{}/{}'.format(bucket, test_data_key)
+print('training data will be uploaded to: {}'.format(train_data_location))
+print('training data will be uploaded to: {}'.format(test_data_location))
+
+# Convert the training data into the format required by the SageMaker KMeans algorithm
+buf = io.BytesIO()
+write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
+buf.seek(0)
+
+boto3.resource('s3').Bucket(bucket).Object(train_data_key).upload_fileobj(buf)
+
+# Convert the test data into the format required by the SageMaker KMeans algorithm
+write_numpy_to_dense_tensor(buf, test_set[0], test_set[1])
+buf.seek(0)
+
+boto3.resource('s3').Bucket(bucket).Object(test_data_key).upload_fileobj(buf)
+
+# Convert the valid data into the format required by the SageMaker KMeans algorithm
+numpy.savetxt('valid-data.csv', valid_set[0], delimiter=',', fmt='%g')
+s3_client = boto3.client('s3')
+input_key = "{}/valid_data.csv".format("mnist_kmeans_example/input")
+s3_client.upload_file('valid-data.csv', bucket, input_key)
+```
+Run this file `python s3_sample_data_creator.py`
+
+### Role Input
+
+This role is used by SageMaker jobs created by the KFP to access the S3 buckets and other AWS resources.
+Run these commands to create the sagemaker-execution-role.   
+Note down the Role ARN. You need to give this Role ARN as input in pipeline.
+
+```
+TRUST="{ \"Version\": \"2012-10-17\", \"Statement\": [ { \"Effect\": \"Allow\", \"Principal\": { \"Service\": \"sagemaker.amazonaws.com\" }, \"Action\": \"sts:AssumeRole\" } ] }"
+aws iam create-role --role-name kfp-example-sagemaker-execution-role --assume-role-policy-document "$TRUST"
+aws iam attach-role-policy --role-name kfp-example-sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
+aws iam attach-role-policy --role-name kfp-example-sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess
+aws iam get-role --role-name kfp-example-sagemaker-execution-role --output text --query 'Role.Arn'
+
+# note down the Role ARN. 
+```
+
--- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/README.md
+++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/README.md
@ -4,6 +4,10 @@ This sample is based on [this example](https://github.com/awslabs/amazon-sagemak

 The sample goes through the workflow of creating a private workteam, creating data labeling jobs for that team, and running a training job using the new labeled data.

+## Prerequisites 
+
+Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)
+(This pipeline does not use mnist dataset. Follow the instruction bellow to get sample dataset)

 ## Prep the dataset, label categories, and UI template

@ -34,26 +38,6 @@ client_ID = App client

 > Note : Once you start a run on the pipeline you will receive the ground_truth labeling jobs at "Labeling portal sign-in URL" link 

-## SageMaker permission
-
-In order to run this pipeline, we need to prepare an IAM Role to run Sagemaker jobs. You need this `role_arn` to run a pipeline. Check [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) for details.
-
-This pipeline also use aws-secret to get access to Sagemaker services, please also make sure you have a `aws-secret` in the kubeflow namespace.
-
-```yaml
-apiVersion: v1
-kind: Secret
-metadata:
-  name: aws-secret
-  namespace: kubeflow
-type: Opaque
-data:
-  AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
-  AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
-```
-
-> Note: To get base64 string, try `echo -n $AWS_ACCESS_KEY_ID | base64`
-

 ## Compiling the pipeline template

--- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
+++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
@ -59,7 +59,7 @@ def ground_truth_test(region='us-west-2',
    training_input_mode='Pipe',
    training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"},
    training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
-    training_instance_type='ml.p2.xlarge',
+    training_instance_type='ml.m5.2xlarge',
    training_instance_count=1,
    training_volume_size=50,
    training_max_run_time=3600,
@ -73,7 +73,7 @@ def ground_truth_test(region='us-west-2',
        user_pool=user_pool,
        user_groups=user_groups,
        client_id=client_id
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    ground_truth_train = sagemaker_gt_op(
        region=region,
@ -93,7 +93,7 @@ def ground_truth_test(region='us-west-2',
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    ground_truth_validation = sagemaker_gt_op(
        region=region,
@ -113,7 +113,7 @@ def ground_truth_test(region='us-west-2',
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    channelObj['ChannelName'] = 'train'
    channelObj['DataSource']['S3DataSource']['S3Uri'] = str(ground_truth_train.outputs['output_manifest_location'])
@ -134,7 +134,8 @@ def ground_truth_test(region='us-west-2',
        max_run_time=training_max_run_time,
        model_artifact_path=training_output_location,
        role=role_arn
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )
+

 if __name__ == '__main__':
    kfp.compiler.Compiler().compile(ground_truth_test, __file__ + '.zip')
--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/README.md
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/README.md
@ -1,93 +1,10 @@
 The `mnist-classification-pipeline.py` sample runs a pipeline to train a classficiation model using Kmeans with MNIST dataset on Sagemaker.  
 The `kmeans-hpo-pipeline.py` is a single component hyper parameter optimisation pipeline which has default values set to use Kmeans. 

-If you do not have `train_data`, `test_data`, and `valid_data` you can use the following code to get sample data which  
-(This data can be used for both of these pipelines)

-## The sample dataset
+## Prerequisites 

-This sample is based on the [Train a Model with a Built-in Algorithm and Deploy it](https://docs.aws.amazon.com/sagemaker/latest/dg/ex1.html).
-
-The sample trains and deploy a model based on the [MNIST dataset](http://www.deeplearning.net/tutorial/gettingstarted.html).
-
-
-Create an S3 bucket and use the following python script to copy `train_data`, `test_data`, and `valid_data.csv` to your buckets.  
-(create the bucket in `us-west-2` region if you are gonna use default values of the pipeline)
-https://docs.aws.amazon.com/AmazonS3/latest/gsg/CreatingABucket.html
-
-Create a new file named `s3_sample_data_creator.py` with following content :
-```python
-import pickle, gzip, numpy, urllib.request, json
-from urllib.parse import urlparse
-
-# Load the dataset
-urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
-with gzip.open('mnist.pkl.gz', 'rb') as f:
-    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
-
-
-# Upload dataset to S3
-from sagemaker.amazon.common import write_numpy_to_dense_tensor
-import io
-import boto3
-
-###################################################################
-# This is the only thing that you need to change to run this code 
-# Give the name of your S3 bucket 
-bucket = 'bucket-name' 
-
-# If you are gonna use the default values of the pipeline then 
-# give a bucket name which is in us-west-2 region 
-###################################################################
-
-train_data_key = 'mnist_kmeans_example/train_data'
-test_data_key = 'mnist_kmeans_example/test_data'
-train_data_location = 's3://{}/{}'.format(bucket, train_data_key)
-test_data_location = 's3://{}/{}'.format(bucket, test_data_key)
-print('training data will be uploaded to: {}'.format(train_data_location))
-print('training data will be uploaded to: {}'.format(test_data_location))
-
-# Convert the training data into the format required by the SageMaker KMeans algorithm
-buf = io.BytesIO()
-write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
-buf.seek(0)
-
-boto3.resource('s3').Bucket(bucket).Object(train_data_key).upload_fileobj(buf)
-
-# Convert the test data into the format required by the SageMaker KMeans algorithm
-write_numpy_to_dense_tensor(buf, test_set[0], test_set[1])
-buf.seek(0)
-
-boto3.resource('s3').Bucket(bucket).Object(test_data_key).upload_fileobj(buf)
-
-# Convert the valid data into the format required by the SageMaker KMeans algorithm
-numpy.savetxt('valid-data.csv', valid_set[0], delimiter=',', fmt='%g')
-s3_client = boto3.client('s3')
-input_key = "{}/valid_data.csv".format("mnist_kmeans_example/input")
-s3_client.upload_file('valid-data.csv', bucket, input_key)
-
-```
-
-Run this file `python s3_sample_data_creator.py`
-## SageMaker permission
-
-In order to run this pipeline, we need to prepare an IAM Role to run Sagemaker jobs. You need this `role_arn` to run a pipeline. Check [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) for details.
-
-This pipeline also use aws-secret to get access to Sagemaker services, please also make sure you have a `aws-secret` in the kubeflow namespace.
-
-```yaml
-apiVersion: v1
-kind: Secret
-metadata:
-  name: aws-secret
-  namespace: kubeflow
-type: Opaque
-data:
-  AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
-  AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
-```
-
-> Note: To get base64 string, try `echo -n $AWS_ACCESS_KEY_ID | base64`
+Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)


 ## Compiling the pipeline template
@ -98,6 +15,7 @@ Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/p
 dsl-compile --py mnist-classification-pipeline.py --output mnist-classification-pipeline.tar.gz
 ```

+
 ## Deploying the pipeline

 Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template.
--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3

+
 import kfp
 import json
 import copy
@ -38,7 +39,7 @@ channelObjList.append(copy.deepcopy(channelObj))
    name='MNIST HPO test pipeline',
    description='SageMaker hyperparameter tuning job test'
 )
-def hpo_test(region='us-west-2',
+def hpo_test(region='us-east-1',
    hpo_job_name='HPO-kmeans-sample',
    image='',
    algorithm_name='K-Means',
@ -56,7 +57,7 @@ def hpo_test(region='us-west-2',
    channels=channelObjList,
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
-    instance_type='ml.p2.16xlarge',
+    instance_type='ml.m5.2xlarge',
    instance_count=1,
    volume_size=50,
    max_num_jobs=1,
@ -114,7 +115,8 @@ def hpo_test(region='us-west-2',
        checkpoint_config=checkpoint_config,
        tags=tags,
        role=role_arn,
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )
+

 if __name__ == '__main__':
    kfp.compiler.Compiler().compile(hpo_test, __file__ + '.zip')
--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3

+
 import kfp
 import json
 import copy
@ -44,8 +45,8 @@ hpoChannels.append(copy.deepcopy(channelObj))
    name='MNIST Classification pipeline',
    description='MNIST Classification using KMEANS in SageMaker'
 )
-def mnist_classification(region='us-west-2',
-    image='174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1',
+def mnist_classification(region='us-east-1',
+    image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
    training_input_mode='File',
    hpo_strategy='Bayesian',
    hpo_metric_name='test:msd',
@ -61,7 +62,7 @@ def mnist_classification(region='us-west-2',
    hpo_checkpoint_config={},
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
-    instance_type='ml.p2.16xlarge',
+    instance_type='ml.m5.2xlarge',
    instance_count=1,
    volume_size=50,
    hpo_max_num_jobs=9,
@ -115,7 +116,7 @@ def mnist_classification(region='us-west-2',
        max_wait_time=hpo_max_wait_time,
        checkpoint_config=hpo_checkpoint_config,
        role=role_arn,
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    training = sagemaker_train_op(
        region=region,
@ -136,7 +137,7 @@ def mnist_classification(region='us-west-2',
        max_wait_time=train_max_wait_time,
        checkpoint_config=train_checkpoint_config,
        role=role_arn,
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    create_model = sagemaker_model_op(
        region=region,
@ -146,13 +147,13 @@ def mnist_classification(region='us-west-2',
        model_artifact_url=training.outputs['model_artifact_url'],
        network_isolation=network_isolation,
        role=role_arn
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    prediction = sagemaker_deploy_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name_1=create_model.output,
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

    batch_transform = sagemaker_batch_transform_op(
        region=region,
@ -169,7 +170,7 @@ def mnist_classification(region='us-west-2',
        split_type=batch_transform_split_type,
        compression_type=batch_transform_compression_type,
        output_location=batch_transform_ouput
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )

 if __name__ == '__main__':
    kfp.compiler.Compiler().compile(mnist_classification, __file__ + '.zip')
--- a/samples/contrib/aws-samples/simple_train_pipeline/README.md
+++ b/samples/contrib/aws-samples/simple_train_pipeline/README.md
@ -2,100 +2,17 @@

 An example pipeline with only [train component](https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker/train).

-# Prerequisites 
-1. Install Kubeflow on an EKS cluster in AWS. https://www.kubeflow.org/docs/aws/deploy/install-kubeflow/
-2. Get and store data in S3 buckets. You can get sample data using this code.  
-   Create a new file `s3_sample_data_creator.py` with following content :
-   ```buildoutcfg
-   import io
-   import boto3
-   import pickle, gzip, numpy, urllib.request, json
-   from urllib.parse import urlparse
-   from sagemaker.amazon.common import write_numpy_to_dense_tensor

-   
-   ###########################################################################################
-   # This is the only thing that you need to change in this code 
-   # Give the name of your S3 bucket 
-   # To use the example input below give a bucket name which is in us-east-1 region 
-   bucket = '<bucket-name>' 
+## Prerequisites 

-   ###########################################################################################
-      
-   # Load the dataset
-   urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
-   with gzip.open('mnist.pkl.gz', 'rb') as f:
-       train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
+Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)


-   # Upload dataset to S3   
-   data_key = 'mnist_kmeans_example/data'
-   data_location = 's3://{}/{}'.format(bucket, data_key)
-   print('Data will be uploaded to: {}'.format(data_location))
-
-   # Convert the training data into the format required by the SageMaker KMeans algorithm
-   buf = io.BytesIO()
-   write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
-   buf.seek(0)
-
-   boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)
-   ```
-   Run this file `python s3_sample_data_creator.py`
-3. Prepare an IAM role with permissions to run SageMaker jobs and access to S3 buckets.   
-   
-   create a new file "trust.json" with following content
-   ```buildoutcfg 
-   {
-     "Version": "2012-10-17",
-     "Statement": [
-       {
-         "Sid": "",
-         "Effect": "Allow",
-         "Principal": {
-           "Service": "sagemaker.amazonaws.com"
-         },
-         "Action": "sts:AssumeRole"
-       }
-     ]
-   }
-   ```
-   ```buildoutcfg
-
-   # run these commands to create a role named "SageMakerExecutorKFP" with SageMaker and S3 access
-   aws iam create-role --role-name SageMakerExecutorKFP --assume-role-policy-document file://trust.json
-   aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess --role-name SageMakerExecutorKFP
-   aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name SageMakerExecutorKFP
-   
-   # Note down the role ARN
-   aws iam get-role --role-name SageMakerExecutorKFP     # | jq .Role.Arn
-   ```
-4. Add 'aws-secret' to your Kubeflow namespace.
-   ```
-   # 1. get aws key and secret in base64 format: 
-
-   echo -n "<AWS_ACCESS_KEY_ID>" | base64
-   echo -n "<AWS_SECRET_ACCESS_KEY>" | base64
-
-   # 2. Create new file secret.yaml with following content
-   
-   apiVersion: v1
-   kind: Secret
-   metadata:
-     name: aws-secret
-     namespace: kubeflow
-   type: Opaque
-   data:
-     AWS_ACCESS_KEY_ID: <base64_AWS_ACCESS_KEY_ID>
-     AWS_SECRET_ACCESS_KEY: <base64_AWS_SECRET_ACCESS_KEY>
-     
-   # 3. Now apply to the cluster's kubeflow namespace:
- 
-   kubectl -n kubeflow apply -f secret.yaml 
-   ```
-5. Compile the pipeline:  
+## Steps 
+1. Compile the pipeline:  
   `dsl-compile --py training-pipeline.py --output training-pipeline.tar.gz`
-6. In the Kubeflow UI, upload this compiled pipeline specification (the .tar.gz file) and click on create run.
-7. Once the pipeline completes, you can see the outputs under 'Output parameters' in the HPO component's Input/Output section.
+2. In the Kubeflow UI, upload this compiled pipeline specification (the .tar.gz file) and click on create run.
+3. Once the pipeline completes, you can see the outputs under 'Output parameters' in the HPO component's Input/Output section.

 Example inputs to this pipeline :
 ```buildoutcfg
@ -111,7 +28,7 @@ channels : In this JSON, along with other parameters you need to pass the S3 Uri
                    "ChannelName": "train",
                    "DataSource": {
                      "S3DataSource": {
-                        "S3Uri": "s3://<your_bucket_name>/mnist_kmeans_example/data",
+                        "S3Uri": "s3://<your_bucket_name>/mnist_kmeans_example/train_data",
                        "S3DataType": "S3Prefix",
                        "S3DataDistributionType": "FullyReplicated"
                      }
@ -123,7 +40,7 @@ channels : In this JSON, along with other parameters you need to pass the S3 Uri
                  }
                ]

-instance_type : ml.p2.xlarge
+instance_type : ml.m5.2xlarge
 instance_count : 1
 volume_size : 50
 max_run_time : 3600
--- a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
+++ b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
@ -1,5 +1,8 @@
 #!/usr/bin/env python3

+# Uncomment the apply(use_aws_secret()) below if you are not using OIDC
+# more info : https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/README.md
+
 import kfp
 import json
 import copy
@ -41,7 +44,7 @@ def training(
        training_input_mode='File',
        hyperparameters={"k": "10", "feature_dim": "784"},
        channels=channelObjList,
-        instance_type='ml.p2.xlarge',
+        instance_type='ml.m5.2xlarge',
        instance_count=1,
        volume_size=50,
        max_run_time=3600,
@ -73,7 +76,8 @@ def training(
        max_wait_time=max_wait_time,
        checkpoint_config=checkpoint_config,
        role=role,
-    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+    )#.apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
+

 if __name__ == '__main__':
    kfp.compiler.Compiler().compile(training, __file__ + '.zip')