fix indentation

This commit is contained in:
Rebecca McFadden 2019-08-02 15:18:30 -07:00
parent 733923410c
commit 10f3a1fc32
6 changed files with 495 additions and 502 deletions

View File

@ -1,8 +1,8 @@
import json
import time
import requests
import datetime
import numpy as np
import datetime
from PIL import Image
from io import BytesIO
import tensorflow as tf
@ -10,81 +10,81 @@ import tensorflow as tf
from azureml.core.model import Model
def init():
global model
global model
try:
model_path = Model.get_model_path('tacosandburritos')
except:
model_path = '/model/latest.h5'
try:
model_path = Model.get_model_path('tacosandburritos')
except:
model_path = '/model/latest.h5'
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
def run(raw_data):
global model
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
global model
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
current_time = time.time()
current_time = time.time()
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
print('Input ({}), Prediction ({})'.format(post['image'], payload))
return payload
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if(path.startswith('http')):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
# Extract image (from web or path)
if(path.startswith('http')):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
if __name__ == "__main__":
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
init()
init()
for k, v in images.items():
print('{} => {}'.format(k, v))
for k, v in images.items():
print('{} => {}'.format(k, v))
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)

View File

@ -1,75 +1,73 @@
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client
@dsl.pipeline(
name='Tacos vs. Burritos',
description='Simple TF CNN for binary classifier between burritos and tacos'
name='Tacos vs. Burritos',
description='Simple TF CNN for binary classifier between burritos and tacos'
)
def tacosandburritos_train(
tenant_id,
service_principal_id,
service_principal_password,
subscription_id,
resource_group,
workspace,
persistent_volume_name='azure',
persistent_volume_path='/mnt/azure',
data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
epochs=5,
batch=32,
learning_rate=0.0001,
imagetag='latest',
model_name='tacosandburritos',
profile_name='tacoprofile'
tenant_id,
service_principal_id,
service_principal_password,
subscription_id,
resource_group,
workspace,
persistent_volume_path='/mnt/azure',
data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
epochs=5,
batch=32,
learning_rate=0.0001,
model_name='tacosandburritos',
profile_name='tacoprofile'
):
operations = {}
image_size = 160
training_folder = 'train'
training_dataset = 'train.txt'
model_folder = 'model'
operations = {}
image_size = 160
training_folder = 'train'
training_dataset = 'train.txt'
model_folder = 'model'
# preprocess data
operations['preprocess'] = dsl.ContainerOp(
name='preprocess',
image='insert your image here',
command=['python'],
arguments=[
# preprocess data
operations['preprocess'] = dsl.ContainerOp(
name='preprocess',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/data.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--target', training_dataset,
'--img_size', image_size,
'--zipfile', data_download
]
)
]
)
#train
operations['training'] = dsl.ContainerOp(
name='training',
image='insert your image here',
command=['python'],
arguments=[
# train
operations['training'] = dsl.ContainerOp(
name='training',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/train.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--epochs', epochs,
'--batch', batch,
'--image_size', image_size,
'--lr', learning_rate,
'--outputs', model_folder,
'--data', training_folder,
'--epochs', epochs,
'--batch', batch,
'--image_size', image_size,
'--lr', learning_rate,
'--outputs', model_folder,
'--dataset', training_dataset
]
)
operations['training'].after(operations['preprocess'])
]
)
operations['training'].after(operations['preprocess'])
# register model
operations['register'] = dsl.ContainerOp(
name='register',
image='insert your image here',
command=['python'],
arguments=[
# register model
operations['register'] = dsl.ContainerOp(
name='register',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/register.py',
'--base_path', persistent_volume_path,
'--model', 'latest.h5',
@ -80,15 +78,15 @@ def tacosandburritos_train(
'--subscription_id', subscription_id,
'--resource_group', resource_group,
'--workspace', workspace
]
)
operations['register'].after(operations['training'])
]
)
operations['register'].after(operations['training'])
operations['profile'] = dsl.ContainerOp(
name='profile',
image='insert your image here',
command=['sh'],
arguments=[
operations['profile'] = dsl.ContainerOp(
name='profile',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/profile.sh',
'-n', profile_name,
'-m', model_name,
@ -101,45 +99,43 @@ def tacosandburritos_train(
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['profile'].after(operations['register'])
]
)
operations['profile'].after(operations['register'])
operations['deploy'] = dsl.ContainerOp(
name='deploy',
image='insert your image here',
command=['sh'],
arguments=[
operations['deploy'] = dsl.ContainerOp(
name='deploy',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/deploy.sh',
'-n', model_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '/scripts/deploymentconfig.json',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['deploy'].after(operations['profile'])
for _, op in operations.items():
op.container.set_image_pull_policy("Always")
op.add_volume(
k8s_client.V1Volume(
name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk')
)
).add_volume_mount(k8s_client.V1VolumeMount(
mount_path='/mnt/azure',
name='azure')
)
operations['deploy'].after(operations['profile'])
for _, op in operations.items():
op.container.set_image_pull_policy("Always")
op.add_volume(
k8s_client.V1Volume(
name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk')
)
).add_volume_mount(k8s_client.V1VolumeMount(
mount_path='/mnt/azure',
name='azure')
)
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')

View File

@ -1,5 +1,4 @@
import os
import time
import shutil
import wget
import zipfile
@ -9,108 +8,108 @@ import tensorflow as tf
from pathlib2 import Path
def check_dir(path, check=False):
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def download(source, target, force_clear=False):
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
shutil.rmtree(target)
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
shutil.rmtree(target)
check_dir(target)
targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
return
check_dir(target)
targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
return
if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
else:
print("Copying from {} to {}".format(source, target))
shutil.copyfile(source, targt_file)
if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
else:
print("Copying from {} to {}".format(source, target))
shutil.copyfile(source, targt_file)
print('Unzipping {}'.format(targt_file))
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()
print('Unzipping {}'.format(targt_file))
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()
def process_image(path, image_size=160):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def walk_images(base_path, image_size=160):
images = []
print('Scanning {}'.format(base_path))
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(base_path):
print('Found {}'.format(dirs))
labels = dirs
break
def walk_images(path, image_size=160):
images = []
print('Scanning {}'.format(path))
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs))
labels = dirs
break
for d in labels:
path = os.path.join(base_path, d)
print('Processing {}'.format(path))
# only care about files in directory
for item in os.listdir(path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
continue
for d in labels:
path = os.path.join(path, d)
print('Processing {}'.format(path))
# only care about files in directory
for item in os.listdir(path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
continue
image = os.path.join(path, item)
try:
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
images.append(image)
except Exception as e:
print('{}\n{}\n'.format(e, image))
image = os.path.join(path, item)
try:
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
images.append(image)
except Exception as e:
print('{}\n{}\n'.format(e, image))
return images
return images
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training data', default='train')
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
args = parser.parse_args()
print(args)
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training data', default='train')
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
args = parser.parse_args()
print(args)
print('Using TensorFlow v.{}'.format(tf.__version__))
print('Using TensorFlow v.{}'.format(tf.__version__))
base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
zip_path = args.zipfile
base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
zip_path = args.zipfile
print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
else:
print('Testing images...')
images = walk_images(str(data_path), args.img_size)
if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
else:
print('Testing images...')
images = walk_images(str(data_path), args.img_size)
# save file
print('writing dataset to {}'.format(target_path))
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))
# save file
print('writing dataset to {}'.format(target_path))
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))
# python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
# python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt

View File

@ -1,8 +1,8 @@
import json
import time
import requests
import datetime
import numpy as np
import datetime
from PIL import Image
from io import BytesIO
import tensorflow as tf
@ -10,81 +10,80 @@ import tensorflow as tf
from azureml.core.model import Model
def init():
global model
global model
try:
model_path = Model.get_model_path('tacosandburritos')
except:
model_path = '/model/latest.h5'
try:
model_path = Model.get_model_path('tacosandburritos')
except:
model_path = '/model/latest.h5'
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
def run(raw_data):
global model
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
current_time = time.time()
current_time = time.time()
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
print('Input ({}), Prediction ({})'.format(post['image'], payload))
return payload
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if(path.startswith('http')):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
# Extract image (from web or path)
if(path.startswith('http')):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
if __name__ == "__main__":
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
init()
init()
for k, v in images.items():
print('{} => {}'.format(k, v))
for k, v in images.items():
print('{} => {}'.format(k, v))
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)

View File

@ -1,4 +1,3 @@
import os
import json
from os.path import relpath
import azureml
@ -11,84 +10,84 @@ from azureml.core.webservice import Webservice, AciWebservice
from azureml.core.authentication import ServicePrincipalAuthentication
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def run(model_path, model_name, tenant_id, service_principal_id,
service_principal_password, subscription_id, resource_group, workspace, tags):
auth_args = {
'tenant_id': tenant_id,
'service_principal_id': service_principal_id,
'service_principal_password': service_principal_password
}
service_principal_password, subscription_id, resource_group, workspace, tags):
auth_args = {
'tenant_id': tenant_id,
'service_principal_id': service_principal_id,
'service_principal_password': service_principal_password
}
ws_args = {
'auth': ServicePrincipalAuthentication(**auth_args),
'subscription_id': subscription_id,
'resource_group': resource_group
}
ws_args = {
'auth': ServicePrincipalAuthentication(**auth_args),
'subscription_id': subscription_id,
'resource_group': resource_group
}
ws = Workspace.get(workspace, **ws_args)
ws = Workspace.get(workspace, **ws_args)
print(ws.get_details())
print(ws.get_details())
print('\nSaving model {} to {}'.format(model_path, model_name))
print('\nSaving model {} to {}'.format(model_path, model_name))
# Model Path needs to be relative
model_path = relpath(model_path, '.')
# Model Path needs to be relative
model_path = relpath(model_path, '.')
model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
print('Done!')
model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
print('Done!')
if __name__ == "__main__":
# argparse stuff for model path and model name
parser = argparse.ArgumentParser(description='sanity check on model')
parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
parser.add_argument('-t', '--tenant_id', help='tenant_id')
parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
parser.add_argument('-u', '--subscription_id', help='subscription_id')
parser.add_argument('-r', '--resource_group', help='resource_group')
parser.add_argument('-w', '--workspace', help='workspace')
args = parser.parse_args()
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
args.model = 'model/' + args.model
model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
rgs = {
'model_path': model_path,
'model_name': args.model_name,
'tenant_id': args.tenant_id,
'service_principal_id': args.service_principal_id,
'service_principal_password': args.service_principal_password,
'subscription_id': args.subscription_id,
'resource_group': args.resource_group,
'workspace': args.workspace
}
# argparse stuff for model path and model name
parser = argparse.ArgumentParser(description='sanity check on model')
parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
parser.add_argument('-t', '--tenant_id', help='tenant_id')
parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
parser.add_argument('-u', '--subscription_id', help='subscription_id')
parser.add_argument('-r', '--resource_group', help='resource_group')
parser.add_argument('-w', '--workspace', help='workspace')
args = parser.parse_args()
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
args.model = 'model/' + args.model
model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
rgs = {
'model_path': model_path,
'model_name': args.model_name,
'tenant_id': args.tenant_id,
'service_principal_id': args.service_principal_id,
'service_principal_password': args.service_principal_password,
'subscription_id': args.subscription_id,
'resource_group': args.resource_group,
'workspace': args.workspace
}
# printing out args for posterity
for i in rgs:
if i == 'service_principal_password':
print('{} => **********'.format(i))
else:
print('{} => {}'.format(i, rgs[i]))
# printing out args for posterity
for i in rgs:
if i == 'service_principal_password':
print('{} => **********'.format(i))
else:
print('{} => {}'.format(i, rgs[i]))
with(open(str(params_path), 'r')) as f:
tags = json.load(f)
with(open(str(params_path), 'r')) as f:
tags = json.load(f)
print('\n\nUsing the following tags:')
for tag in tags:
print('{} => {}'.format(tag, tags[tag]))
print('\n\nUsing the following tags:')
for tag in tags:
print('{} => {}'.format(tag, tags[tag]))
rgs['tags'] = tags
rgs['tags'] = tags
run(**rgs)
run(**rgs)
# python register.py --model_path v --model_name c --tenant_id c
# --service_principal_id v --service_principal_password v
# --subscription_id v --resource_group x --workspace c
# python register.py --model_path v --model_name c --tenant_id c
# --service_principal_id v --service_principal_password v
# --subscription_id v --resource_group x --workspace c

View File

@ -15,179 +15,179 @@ from tensorflow.data import Dataset
global image_size
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def check_dir(path, check=False):
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def process_image(path, label):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final, label
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final, label
def load_dataset(base_path, dataset, split=[8, 1, 1]):
# normalize splits
splits = np.array(split) / np.sum(np.array(split))
# normalize splits
splits = np.array(split) / np.sum(np.array(split))
# find labels - parent folder names
labels = {}
for (_, dirs, _) in os.walk(base_path):
print('found {}'.format(dirs))
labels = { k: v for (v, k) in enumerate(dirs) }
print('using {}'.format(labels))
break
# load all files along with idx label
print('loading dataset from {}'.format(dataset))
with open(dataset, 'r') as d:
data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
# find labels - parent folder names
labels = {}
for (_, dirs, _) in os.walk(base_path):
print('found {}'.format(dirs))
labels = { k: v for (v, k) in enumerate(dirs) }
print('using {}'.format(labels))
break
print('dataset size: {}\nsuffling data...'.format(len(data)))
# shuffle data
shuffle(data)
print('splitting data...')
# split data
train_idx = int(len(data) * splits[0])
eval_idx = int(len(data) * splits[1])
return data[:train_idx], \
data[train_idx:train_idx + eval_idx], \
data[train_idx + eval_idx:], \
labels
# load all files along with idx label
print('loading dataset from {}'.format(dataset))
with open(dataset, 'r') as d:
data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
print('dataset size: {}\nsuffling data...'.format(len(data)))
# shuffle data
shuffle(data)
print('splitting data...')
# split data
train_idx = int(len(data) * splits[0])
eval_idx = int(len(data) * splits[1])
return data[:train_idx], \
data[train_idx:train_idx + eval_idx], \
data[train_idx + eval_idx:], \
labels
#@print_info
def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None):
img_shape = (image_size, image_size, 3)
img_shape = (image_size, image_size, 3)
info('Loading Data Set')
# load dataset
train, test, val, labels = load_dataset(data_path, dataset)
info('Loading Data Set')
# load dataset
train, test, val, labels = load_dataset(data_path, dataset)
# training data
train_data, train_labels = zip(*train)
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
Dataset.from_tensor_slices(list(train_labels))))
# training data
train_data, train_labels = zip(*train)
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
Dataset.from_tensor_slices(list(train_labels))))
train_ds = train_ds.map(map_func=process_image,
num_parallel_calls=5)
train_ds = train_ds.map(map_func=process_image,
num_parallel_calls=5)
train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.prefetch(buffer_size=5)
train_ds = train_ds.repeat()
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.prefetch(buffer_size=5)
train_ds = train_ds.repeat()
# model
info('Creating Model')
base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
include_top=False,
weights='imagenet')
base_model.trainable = True
# model
info('Creating Model')
base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
include_top=False,
weights='imagenet')
base_model.trainable = True
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy'])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
model.summary()
# training
info('Training')
steps_per_epoch = math.ceil(len(train)/batch_size)
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
# training
info('Training')
steps_per_epoch = math.ceil(len(train)/batch_size)
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
# save model
info('Saving Model')
# check existence of base model folder
output = check_dir(output)
# save model
info('Saving Model')
print('Serializing into saved_model format')
tf.saved_model.save(model, str(output))
print('Done!')
# check existence of base model folder
output = check_dir(output)
# add time prefix folder
file_output = str(Path(output).joinpath('latest.h5'))
print('Serializing h5 model to:\n{}'.format(file_output))
model.save(file_output)
print('Serializing into saved_model format')
tf.saved_model.save(model, str(output))
print('Done!')
# add time prefix folder
file_output = str(Path(output).joinpath('latest.h5'))
print('Serializing h5 model to:\n{}'.format(file_output))
model.save(file_output)
return generate_hash(file_output, 'kf_pipeline')
return generate_hash(file_output, 'kf_pipeline')
def generate_hash(file, key):
print('Generating hash for {}'.format(file))
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
BUF_SIZE = 65536
with open(str(file), 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
m.update(data)
print('Generating hash for {}'.format(file))
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
BUF_SIZE = 65536
with open(str(file), 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
m.update(data)
return m.hexdigest()
return m.hexdigest()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
parser.add_argument('-o', '--outputs', help='output directory', default='model')
parser.add_argument('-f', '--dataset', help='cleaned data listing')
args = parser.parse_args()
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
parser.add_argument('-o', '--outputs', help='output directory', default='model')
parser.add_argument('-f', '--dataset', help='cleaned data listing')
args = parser.parse_args()
info('Using TensorFlow v.{}'.format(tf.__version__))
data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
dataset = Path(args.base_path).joinpath(args.dataset)
image_size = args.image_size
info('Using TensorFlow v.{}'.format(tf.__version__))
params = Path(args.base_path).joinpath('params.json')
data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
dataset = Path(args.base_path).joinpath(args.dataset)
image_size = args.image_size
args = {
"data_path": str(data_path),
"image_size": image_size,
"epochs": args.epochs,
"batch_size": args.batch,
"learning_rate": args.lr,
"output": str(target_path),
"dataset": str(dataset)
}
params = Path(args.base_path).joinpath('params.json')
dataset_signature = generate_hash(dataset, 'kf_pipeline')
# printing out args for posterity
for i in args:
print('{} => {}'.format(i, args[i]))
args = {
"data_path": str(data_path),
"image_size": image_size,
"epochs": args.epochs,
"batch_size": args.batch,
"learning_rate": args.lr,
"output": str(target_path),
"dataset": str(dataset)
}
model_signature = run(**args)
dataset_signature = generate_hash(dataset, 'kf_pipeline')
# printing out args for posterity
for i in args:
print('{} => {}'.format(i, args[i]))
args['dataset_signature'] = dataset_signature.upper()
args['model_signature'] = model_signature.upper()
args['model_type'] = 'tfv2-MobileNetV2'
print('Writing out params...', end='')
with open(str(params), 'w') as f:
json.dump(args, f)
model_signature = run(**args)
print(' Saved to {}'.format(str(params)))
args['dataset_signature'] = dataset_signature.upper()
args['model_signature'] = model_signature.upper()
args['model_type'] = 'tfv2-MobileNetV2'
print('Writing out params...', end='')
with open(str(params), 'w') as f:
json.dump(args, f)
# python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt
print(' Saved to {}'.format(str(params)))
# python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt