Update images, bug fixes, clean up code (#1778)
* Update docker images and minor refactoring * Update image tag, bug fixes, remove unneeded imports * Revert to using image version, use origin batch transform output method * Forgot to save 2 changes
This commit is contained in:
parent
9c79163287
commit
69ca3c7e4b
|
|
@ -71,7 +71,7 @@ outputs:
|
|||
- {name: output_location, description: 'S3 URI of the transform job results.'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
batch_transform.py,
|
||||
|
|
@ -98,5 +98,5 @@ implementation:
|
|||
--instance_count, {inputValue: instance_count},
|
||||
--resource_encryption_key, {inputValue: resource_encryption_key},
|
||||
--tags, {inputValue: tags},
|
||||
--output_location_file, {outputPath: output_location},
|
||||
--output_location_file, {outputPath: output_location}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,9 +12,6 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib2 import Path
|
||||
|
||||
from common import _utils
|
||||
|
|
@ -56,7 +53,6 @@ def main(argv=None):
|
|||
batch_job_name = _utils.create_transform_job(client, vars(args))
|
||||
logging.info('Batch Job request submitted. Waiting for completion...')
|
||||
_utils.wait_for_transform_job(client, batch_job_name)
|
||||
_utils.print_tranformation_job_result(args.output_location)
|
||||
|
||||
Path(args.output_location_file).parent.mkdir(parents=True, exist_ok=True)
|
||||
Path(args.output_location_file).write_text(unicode(args.output_location))
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import datetime
|
||||
import argparse
|
||||
from time import gmtime, strftime
|
||||
import time
|
||||
|
|
@ -19,7 +17,6 @@ import string
|
|||
import random
|
||||
import json
|
||||
import yaml
|
||||
from urlparse import urlparse
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
|
@ -80,7 +77,7 @@ def create_training_job_request(args):
|
|||
request['AlgorithmSpecification']['TrainingImage'] = args['image']
|
||||
request['AlgorithmSpecification'].pop('AlgorithmName')
|
||||
else:
|
||||
# TODO: determine if users can make custom algorithm resources that have the same name as built-in algorithm names
|
||||
# TODO: Adjust this implementation to account for custom algorithm resources names that are the same as built-in algorithm names
|
||||
algo_name = args['algorithm_name'].lower().strip()
|
||||
if algo_name in built_in_algos.keys():
|
||||
request['AlgorithmSpecification']['TrainingImage'] = get_image_uri(args['region'], built_in_algos[algo_name])
|
||||
|
|
@ -114,7 +111,7 @@ def create_training_job_request(args):
|
|||
request['InputDataConfig'] = args['channels']
|
||||
# Max number of input channels/data locations is 20, but currently only 8 data location parameters are exposed separately.
|
||||
# Source: Input data configuration description in the SageMaker create training job form
|
||||
for i in range(1, len(args['channels'] + 1)):
|
||||
for i in range(1, len(args['channels']) + 1):
|
||||
if args['data_location_' + str(i)]:
|
||||
request['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
|
||||
else:
|
||||
|
|
@ -412,7 +409,7 @@ def create_transform_job(client, args):
|
|||
client.create_transform_job(**request)
|
||||
batch_job_name = request['TransformJobName']
|
||||
logging.info("Created Transform Job with name: " + batch_job_name)
|
||||
logging.info("Transform job in SageMaker: https://{}.console.aws.amazon.com/sagemaker/home?region={}#/jobs/{}"
|
||||
logging.info("Transform job in SageMaker: https://{}.console.aws.amazon.com/sagemaker/home?region={}#/transform-jobs/{}"
|
||||
.format(args['region'], args['region'], batch_job_name))
|
||||
logging.info("CloudWatch logs: https://{}.console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=/aws/sagemaker/TransformJobs;prefix={};streamFilter=typeLogStreamPrefix"
|
||||
.format(args['region'], args['region'], batch_job_name))
|
||||
|
|
@ -437,17 +434,6 @@ def wait_for_transform_job(client, batch_job_name):
|
|||
time.sleep(30)
|
||||
|
||||
|
||||
def print_tranformation_job_result(output_location):
|
||||
### Fetch the transform output
|
||||
bucket = urlparse(output_location).netloc
|
||||
output_key = "{}/valid_data.csv.out".format(urlparse(output_location).path.lstrip('/'))
|
||||
s3_client = boto3.client('s3')
|
||||
s3_client.download_file(bucket, output_key, 'valid-result')
|
||||
with open('valid-result') as f:
|
||||
results = f.readlines()
|
||||
logging.info("Sample transform result: {}".format(results[0]))
|
||||
|
||||
|
||||
def create_hyperparameter_tuning_job_request(args):
|
||||
### Documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job
|
||||
with open('/app/common/hpo.template.yaml', 'r') as f:
|
||||
|
|
@ -514,9 +500,9 @@ def create_hyperparameter_tuning_job_request(args):
|
|||
request['TrainingJobDefinition']['InputDataConfig'] = args['channels']
|
||||
# Max number of input channels/data locations is 20, but currently only 8 data location parameters are exposed separately.
|
||||
# Source: Input data configuration description in the SageMaker create hyperparameter tuning job form
|
||||
for i in range(1, len(args['channels'] + 1):
|
||||
for i in range(1, len(args['channels']) + 1):
|
||||
if args['data_location_' + str(i)]:
|
||||
request['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
|
||||
request['TrainingJobDefinition']['InputDataConfig'][i-1]['DataSource']['S3DataSource']['S3Uri'] = args['data_location_' + str(i)]
|
||||
else:
|
||||
logging.error("Must specify at least one input channel.")
|
||||
raise Exception('Could not make job request')
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ outputs:
|
|||
- {name: endpoint_name, description: 'Endpoint name'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
deploy.py,
|
||||
|
|
|
|||
|
|
@ -12,12 +12,9 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
from datetime import datetime
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
parser = argparse.ArgumentParser(description='SageMaker Training Job')
|
||||
parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.')
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ outputs:
|
|||
- {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
ground_truth.py,
|
||||
|
|
|
|||
|
|
@ -12,10 +12,6 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib2 import Path
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ outputs:
|
|||
description: 'The registry path of the Docker image that contains the training algorithm'
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
hyperparameter_tuning.py,
|
||||
|
|
|
|||
|
|
@ -12,10 +12,7 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib2 import Path
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ outputs:
|
|||
- {name: model_name, description: 'The model name Sagemaker created'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
create_model.py,
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
from datetime import datetime
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ outputs:
|
|||
- {name: training_image, description: 'The registry path of the Docker image that contains the training algorithm'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
train.py,
|
||||
|
|
|
|||
|
|
@ -12,9 +12,6 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ outputs:
|
|||
- {name: workteam_arn, description: 'The ARN of the workteam.'}
|
||||
implementation:
|
||||
container:
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190801-01
|
||||
image: carowang/kubeflow-pipeline-aws-sm:20190809-02
|
||||
command: ['python']
|
||||
args: [
|
||||
workteam.py,
|
||||
|
|
|
|||
|
|
@ -12,10 +12,6 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib2 import Path
|
||||
|
||||
from common import _utils
|
||||
|
||||
|
|
@ -28,7 +24,7 @@ def main(argv=None):
|
|||
parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='')
|
||||
parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
|
||||
parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
|
||||
parser.add_argument('--tags', type=json.loads, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
|
||||
parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -22,13 +22,13 @@ def ground_truth_test(region='us-west-2',
|
|||
ground_truth_train_job_name='mini-image-classification-demo-train',
|
||||
ground_truth_validation_job_name='mini-image-classification-demo-validation',
|
||||
ground_truth_label_attribute_name='category',
|
||||
ground_truth_train_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/train.manifest',
|
||||
ground_truth_validation_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/validation.manifest',
|
||||
ground_truth_output_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/output',
|
||||
ground_truth_train_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/train.manifest',
|
||||
ground_truth_validation_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/validation.manifest',
|
||||
ground_truth_output_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/output',
|
||||
ground_truth_task_type='image classification',
|
||||
ground_truth_worker_type='private',
|
||||
ground_truth_label_category_config='s3://your-bucket-name/gt-demo-images/ground-truth-demo/class_labels.json',
|
||||
ground_truth_ui_template='s3://your-bucket-name/gt-demo-images/ground-truth-demo/instructions.template',
|
||||
ground_truth_label_category_config='s3://your-bucket-name/mini-image-classification/ground-truth-demo/class_labels.json',
|
||||
ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
|
||||
ground_truth_title='Mini image classification',
|
||||
ground_truth_description='Test for Ground Truth KFP component',
|
||||
ground_truth_num_workers_per_object='1',
|
||||
|
|
@ -62,7 +62,7 @@ def ground_truth_test(region='us-west-2',
|
|||
"ContentType": "application/x-recordio", \
|
||||
"CompressionType": "None", \
|
||||
"RecordWrapperType": "RecordIO"}]',
|
||||
training_output_location='s3://your-bucket-name/gt-demo-images/training-output',
|
||||
training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
|
||||
training_instance_type='ml.p2.xlarge',
|
||||
training_instance_count='1',
|
||||
training_volume_size='50',
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ ims['Fruit'] = [label[0] for label in all_labels if (label[2] == '/m/02xwb' and
|
|||
ims['Fruit'].remove('02a54f6864478101') # This image contains personal information, let's remove it from our dataset.
|
||||
num_classes = len(ims)
|
||||
|
||||
# If running the short version of the demo, reduce each class count 40 times.
|
||||
# If running the short version of the demo, reduce each class count 50 times.
|
||||
for key in ims.keys():
|
||||
ims[key] = set(ims[key][:int(len(ims[key]) / 50)])
|
||||
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ from kfp import dsl
|
|||
from kfp import gcp
|
||||
from kfp.aws import use_aws_secret
|
||||
|
||||
emr_create_cluster_op = components.load_component_from_file('../../../components/aws/emr/create_cluster/component.yaml')
|
||||
emr_submit_spark_job_op = components.load_component_from_file('../../../components/aws/emr/submit_spark_job/component.yaml')
|
||||
emr_delete_cluster_op = components.load_component_from_file('../../../components/aws/emr/delete_cluster/component.yaml')
|
||||
emr_create_cluster_op = components.load_component_from_file('../../../../components/aws/emr/create_cluster/component.yaml')
|
||||
emr_submit_spark_job_op = components.load_component_from_file('../../../../components/aws/emr/submit_spark_job/component.yaml')
|
||||
emr_delete_cluster_op = components.load_component_from_file('../../../../components/aws/emr/delete_cluster/component.yaml')
|
||||
|
||||
@dsl.pipeline(
|
||||
name='Titanic Suvival Prediction Pipeline',
|
||||
|
|
|
|||
Loading…
Reference in New Issue