Update images, bug fixes, clean up code (#1778)

* Update docker images and minor refactoring

* Update image tag, bug fixes, remove unneeded imports

* Revert to using image version, use origin batch transform output method

* Forgot to save 2 changes
This commit is contained in:
carolynwang 2019-08-09 15:25:13 -07:00 committed by Kubernetes Prow Robot
parent 9c79163287
commit 69ca3c7e4b
18 changed files with 24 additions and 61 deletions

View File

@ -71,7 +71,7 @@ outputs:
- {name: output_location, description: 'S3 URI of the transform job results.'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
batch_transform.py,
@ -98,5 +98,5 @@ implementation:
--instance_count, {inputValue: instance_count},
--resource_encryption_key, {inputValue: resource_encryption_key},
--tags, {inputValue: tags},
--output_location_file, {outputPath: output_location},
--output_location_file, {outputPath: output_location}
]

View File

@ -12,9 +12,6 @@
import argparse
import logging
import random
import json
from datetime import datetime
from pathlib2 import Path
from common import _utils
@ -56,7 +53,6 @@ def main(argv=None):
batch_job_name = _utils.create_transform_job(client, vars(args))
logging.info('Batch Job request submitted. Waiting for completion...')
_utils.wait_for_transform_job(client, batch_job_name)
_utils.print_tranformation_job_result(args.output_location)
Path(args.output_location_file).parent.mkdir(parents=True, exist_ok=True)
Path(args.output_location_file).write_text(unicode(args.output_location))

View File

@ -10,8 +10,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import argparse
from time import gmtime, strftime
import time
@ -19,7 +17,6 @@ import string
import random
import json
import yaml
from urlparse import urlparse
import boto3
from botocore.exceptions import ClientError
@ -80,7 +77,7 @@ def create_training_job_request(args):
request['AlgorithmSpecification']['TrainingImage'] = args['image']
request['AlgorithmSpecification'].pop('AlgorithmName')
else:
# TODO: determine if users can make custom algorithm resources that have the same name as built-in algorithm names
# TODO: Adjust this implementation to account for custom algorithm resources names that are the same as built-in algorithm names
algo_name = args['algorithm_name'].lower().strip()
if algo_name in built_in_algos.keys():
request['AlgorithmSpecification']['TrainingImage'] = get_image_uri(args['region'], built_in_algos[algo_name])
@ -114,7 +111,7 @@ def create_training_job_request(args):
request['InputDataConfig'] = args['channels']
# Max number of input channels/data locations is 20, but currently only 8 data location parameters are exposed separately.
# Source: Input data configuration description in the SageMaker create training job form
for i in range(1, len(args['channels'] + 1)):
for i in range(1, len(args['channels']) + 1):
if args['data_location_' + str(i)]:
request['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
else:
@ -412,7 +409,7 @@ def create_transform_job(client, args):
client.create_transform_job(**request)
batch_job_name = request['TransformJobName']
logging.info("Created Transform Job with name: " + batch_job_name)
logging.info("Transform job in SageMaker: https://{}.console.aws.amazon.com/sagemaker/home?region={}#/jobs/{}"
logging.info("Transform job in SageMaker: https://{}.console.aws.amazon.com/sagemaker/home?region={}#/transform-jobs/{}"
.format(args['region'], args['region'], batch_job_name))
logging.info("CloudWatch logs: https://{}.console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=/aws/sagemaker/TransformJobs;prefix={};streamFilter=typeLogStreamPrefix"
.format(args['region'], args['region'], batch_job_name))
@ -437,17 +434,6 @@ def wait_for_transform_job(client, batch_job_name):
time.sleep(30)
def print_tranformation_job_result(output_location):
### Fetch the transform output
bucket = urlparse(output_location).netloc
output_key = "{}/valid_data.csv.out".format(urlparse(output_location).path.lstrip('/'))
s3_client = boto3.client('s3')
s3_client.download_file(bucket, output_key, 'valid-result')
with open('valid-result') as f:
results = f.readlines()
logging.info("Sample transform result: {}".format(results[0]))
def create_hyperparameter_tuning_job_request(args):
### Documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job
with open('/app/common/hpo.template.yaml', 'r') as f:
@ -514,9 +500,9 @@ def create_hyperparameter_tuning_job_request(args):
request['TrainingJobDefinition']['InputDataConfig'] = args['channels']
# Max number of input channels/data locations is 20, but currently only 8 data location parameters are exposed separately.
# Source: Input data configuration description in the SageMaker create hyperparameter tuning job form
for i in range(1, len(args['channels'] + 1):
for i in range(1, len(args['channels']) + 1):
if args['data_location_' + str(i)]:
request['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
request['TrainingJobDefinition']['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
else:
logging.error("Must specify at least one input channel.")
raise Exception('Could not make job request')

View File

@ -76,7 +76,7 @@ outputs:
- {name: endpoint_name, description: 'Endpoint name'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
deploy.py,

View File

@ -12,12 +12,9 @@
import argparse
import logging
import random
from datetime import datetime
from common import _utils
def main(argv=None):
parser = argparse.ArgumentParser(description='SageMaker Training Job')
parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.')

View File

@ -85,7 +85,7 @@ outputs:
- {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
ground_truth.py,

View File

@ -12,10 +12,6 @@
import argparse
import logging
import random
import json
from datetime import datetime
from pathlib2 import Path
from common import _utils

View File

@ -125,7 +125,7 @@ outputs:
description: 'The registry path of the Docker image that contains the training algorithm'
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
hyperparameter_tuning.py,

View File

@ -12,10 +12,7 @@
import argparse
import logging
import random
import json
from datetime import datetime
from pathlib2 import Path
from common import _utils

View File

@ -42,7 +42,7 @@ outputs:
- {name: model_name, description: 'The model name Sagemaker created'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
create_model.py,

View File

@ -12,8 +12,6 @@
import argparse
import logging
import random
from datetime import datetime
from common import _utils

View File

@ -91,7 +91,7 @@ outputs:
- {name: training_image, description: 'The registry path of the Docker image that contains the training algorithm'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
train.py,

View File

@ -12,9 +12,6 @@
import argparse
import logging
import random
import json
from datetime import datetime
from common import _utils

View File

@ -24,7 +24,7 @@ outputs:
- {name: workteam_arn, description: 'The ARN of the workteam.'}
implementation:
container:
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
command: ['python']
args: [
workteam.py,

View File

@ -12,10 +12,6 @@
import argparse
import logging
import random
import json
from datetime import datetime
from pathlib2 import Path
from common import _utils
@ -28,7 +24,7 @@ def main(argv=None):
parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='')
parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
parser.add_argument('--tags', type=json.loads, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
args = parser.parse_args()

View File

@ -22,13 +22,13 @@ def ground_truth_test(region='us-west-2',
ground_truth_train_job_name='mini-image-classification-demo-train',
ground_truth_validation_job_name='mini-image-classification-demo-validation',
ground_truth_label_attribute_name='category',
ground_truth_train_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/train.manifest',
ground_truth_validation_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/validation.manifest',
ground_truth_output_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/output',
ground_truth_train_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/train.manifest',
ground_truth_validation_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/validation.manifest',
ground_truth_output_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/output',
ground_truth_task_type='image classification',
ground_truth_worker_type='private',
ground_truth_label_category_config='s3://your-bucket-name/gt-demo-images/ground-truth-demo/class_labels.json',
ground_truth_ui_template='s3://your-bucket-name/gt-demo-images/ground-truth-demo/instructions.template',
ground_truth_label_category_config='s3://your-bucket-name/mini-image-classification/ground-truth-demo/class_labels.json',
ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
ground_truth_title='Mini image classification',
ground_truth_description='Test for Ground Truth KFP component',
ground_truth_num_workers_per_object='1',
@ -62,7 +62,7 @@ def ground_truth_test(region='us-west-2',
"ContentType": "application/x-recordio", \
"CompressionType": "None", \
"RecordWrapperType": "RecordIO"}]',
training_output_location='s3://your-bucket-name/gt-demo-images/training-output',
training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
training_instance_type='ml.p2.xlarge',
training_instance_count='1',
training_volume_size='50',

View File

@ -27,7 +27,7 @@ ims['Fruit'] = [label[0] for label in all_labels if (label[2] == '/m/02xwb' and
ims['Fruit'].remove('02a54f6864478101') # This image contains personal information, let's remove it from our dataset.
num_classes = len(ims)
# If running the short version of the demo, reduce each class count 40 times.
# If running the short version of the demo, reduce each class count 50 times.
for key in ims.keys():
ims[key] = set(ims[key][:int(len(ims[key]) / 50)])

View File

@ -6,9 +6,9 @@ from kfp import dsl
from kfp import gcp
from kfp.aws import use_aws_secret
emr_create_cluster_op = components.load_component_from_file('../../../components/aws/emr/create_cluster/component.yaml')
emr_submit_spark_job_op = components.load_component_from_file('../../../components/aws/emr/submit_spark_job/component.yaml')
emr_delete_cluster_op = components.load_component_from_file('../../../components/aws/emr/delete_cluster/component.yaml')
emr_create_cluster_op = components.load_component_from_file('../../../../components/aws/emr/create_cluster/component.yaml')
emr_submit_spark_job_op = components.load_component_from_file('../../../../components/aws/emr/submit_spark_job/component.yaml')
emr_delete_cluster_op = components.load_component_from_file('../../../../components/aws/emr/delete_cluster/component.yaml')
@dsl.pipeline(
name='Titanic Suvival Prediction Pipeline',