fixed some lint errors

This commit is contained in:
Rebecca McFadden 2019-08-16 01:04:15 -04:00
parent 153d9d87aa
commit d81d83512a
6 changed files with 301 additions and 275 deletions

View File

@ -1,33 +1,33 @@
import json
import time
from io import BytesIO
import datetime
import requests
import numpy as np
import datetime
from PIL import Image
from io import BytesIO
import tensorflow as tf
from azureml.core.model import Model
def init():
global model
try:
def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
except:
else:
model_path = '/model/latest.h5'
print('Attempting to load model')
print 'Attempting to load model'
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print 'Done!'
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
return model
def run(raw_data):
global model
def run(raw_data, model):
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
@ -35,8 +35,8 @@ def run(raw_data):
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = model.predict(t, steps=1) # [0][0]
print o
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
@ -45,28 +45,31 @@ def run(raw_data):
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
print 'Input ({}), Prediction ({})'.format(post['image'], payload)
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if(path.startswith('http')):
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def info(msg, char="#", width=75):
print ""
print char * width
print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
if __name__ == "__main__":
images = {
@ -74,17 +77,17 @@ if __name__ == "__main__":
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
init()
my_model = init()
for k, v in images.items():
print('{} => {}'.format(k, v))
print '{} => {}'.format(k, v)
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
taco = json.dumps({'image': images['tacos']})
print taco
run(taco, my_model)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)
burrito = json.dumps({'image': images['burrito']})
print burrito
run(burrito, my_model)

View File

@ -1,27 +1,29 @@
"""Main pipeline file"""
from kubernetes import client as k8s_client
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client
@dsl.pipeline(
name='Tacos vs. Burritos',
description='Simple TF CNN for binary classifier between burritos and tacos'
description='Simple TF CNN'
)
def tacosandburritos_train(
tenant_id,
service_principal_id,
service_principal_password,
subscription_id,
resource_group,
workspace,
persistent_volume_path='/mnt/azure',
data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
epochs=5,
batch=32,
learning_rate=0.0001,
model_name='tacosandburritos',
profile_name='tacoprofile'
tenant_id,
service_principal_id,
service_principal_password,
subscription_id,
resource_group,
workspace
):
"""Pipeline steps"""
persistent_volume_path = '/mnt/azure'
data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
epochs = 5
batch = 32
learning_rate = 0.0001
model_name = 'tacosandburritos'
profile_name = 'tacoprofile'
operations = {}
image_size = 160
training_folder = 'train'
@ -30,112 +32,109 @@ def tacosandburritos_train(
# preprocess data
operations['preprocess'] = dsl.ContainerOp(
name='preprocess',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/data.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--target', training_dataset,
'--img_size', image_size,
'--zipfile', data_download
]
name='preprocess',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/data.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--target', training_dataset,
'--img_size', image_size,
'--zipfile', data_download
]
)
# train
operations['training'] = dsl.ContainerOp(
name='training',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/train.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--epochs', epochs,
'--batch', batch,
'--image_size', image_size,
'--lr', learning_rate,
'--outputs', model_folder,
'--dataset', training_dataset
]
name='training',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/train.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--epochs', epochs,
'--batch', batch,
'--image_size', image_size,
'--lr', learning_rate,
'--outputs', model_folder,
'--dataset', training_dataset
]
)
operations['training'].after(operations['preprocess'])
# register model
operations['register'] = dsl.ContainerOp(
name='register',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/register.py',
'--base_path', persistent_volume_path,
'--model', 'latest.h5',
'--model_name', model_name,
'--tenant_id', tenant_id,
'--service_principal_id', service_principal_id,
'--service_principal_password', service_principal_password,
'--subscription_id', subscription_id,
'--resource_group', resource_group,
'--workspace', workspace
]
name='register',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/register.py',
'--base_path', persistent_volume_path,
'--model', 'latest.h5',
'--model_name', model_name,
'--tenant_id', tenant_id,
'--service_principal_id', service_principal_id,
'--service_principal_password', service_principal_password,
'--subscription_id', subscription_id,
'--resource_group', resource_group,
'--workspace', workspace
]
)
operations['register'].after(operations['training'])
operations['profile'] = dsl.ContainerOp(
name='profile',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/profile.sh',
'-n', profile_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
name='profile',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/profile.sh',
'-n', profile_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['profile'].after(operations['register'])
operations['deploy'] = dsl.ContainerOp(
name='deploy',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/deploy.sh',
'-n', model_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '/scripts/deploymentconfig.json',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
name='deploy',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/deploy.sh',
'-n', model_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '/scripts/deploymentconfig.json',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['deploy'].after(operations['profile'])
for _, op in operations.items():
op.container.set_image_pull_policy("Always")
op.add_volume(
k8s_client.V1Volume(
name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk')
)
for _, op_1 in operations.items():
op_1.container.set_image_pull_policy("Always")
op_1.add_volume(
k8s_client.V1Volume(
name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk')
)
).add_volume_mount(k8s_client.V1VolumeMount(
mount_path='/mnt/azure',
name='azure')
)
mount_path='/mnt/azure', name='azure'))
if __name__ == '__main__':
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')

View File

@ -1,69 +1,69 @@
import os
import shutil
import wget
import zipfile
import argparse
import numpy as np
import wget
import tensorflow as tf
from pathlib2 import Path
def check_dir(path, check=False):
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def check_dir(path):
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def download(source, target, force_clear=False):
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
print 'Removing {}...'.format(target)
shutil.rmtree(target)
check_dir(target)
targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
print 'data already exists, skipping download'
return
if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
print "Downloading from {} to {}".format(source, target)
wget.download(source, targt_file)
print "Done!"
else:
print("Copying from {} to {}".format(source, target))
print "Copying from {} to {}".format(source, target)
shutil.copyfile(source, targt_file)
print('Unzipping {}'.format(targt_file))
print 'Unzipping {}'.format(targt_file)
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()
def process_image(path, image_size=160):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def walk_images(path, image_size=160):
images = []
print('Scanning {}'.format(path))
imgs = []
print 'Scanning {}'.format(path)
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs))
print 'Found {}'.format(dirs)
labels = dirs
break
for d in labels:
path = os.path.join(path, d)
print('Processing {}'.format(path))
print 'Processing {}'.format(path)
# only care about files in directory
for item in os.listdir(path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
print 'skipping {}'.format(item)
continue
image = os.path.join(path, item)
@ -71,11 +71,12 @@ def walk_images(path, image_size=160):
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
images.append(image)
imgs.append(image)
except Exception as e:
print('{}\n{}\n'.format(e, image))
print '{}\n{}\n'.format(e, image)
return imgs
return images
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
@ -84,31 +85,33 @@ if __name__ == "__main__":
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
parser.add_argument('-f', '--force',
help='force clear all data', default=False, action='store_true')
args = parser.parse_args()
print(args)
print args
print('Using TensorFlow v.{}'.format(tf.__version__))
print 'Using TensorFlow v.{}'.format(tf.__version__)
base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
print 'Base Path: {}'.format(base_path)
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
print 'Train Path: {}'.format(data_path)
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
print 'Train File: {}'.format(target_path)
zip_path = args.zipfile
print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
print 'Acquiring data...'
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
str(base_path), args.force)
if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
print 'dataset text file already exists, skipping check'
else:
print('Testing images...')
print 'Testing images...'
images = walk_images(str(data_path), args.img_size)
# save file
print('writing dataset to {}'.format(target_path))
print 'writing dataset to {}'.format(target_path)
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))

View File

@ -1,32 +1,35 @@
import json
import time
import datetime
from io import BytesIO
import requests
import numpy as np
import datetime
from PIL import Image
from io import BytesIO
import tensorflow as tf
from azureml.core.model import Model
def init():
global model
try:
def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
except:
else:
model_path = '/model/latest.h5'
print('Attempting to load model')
print 'Attempting to load model'
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print 'Done!'
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
return model
def run(raw_data):
def run(raw_data, model):
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
@ -34,8 +37,8 @@ def run(raw_data):
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0]
print(o)
o = model.predict(t, steps=1) # [0][0]
print o
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
@ -44,28 +47,31 @@ def run(raw_data):
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
print 'Input ({}), Prediction ({})'.format(post['image'], payload)
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if(path.startswith('http')):
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def info(msg, char="#", width=75):
print ""
print char * width
print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
if __name__ == "__main__":
images = {
@ -73,17 +79,17 @@ if __name__ == "__main__":
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
init()
my_model = init()
for k, v in images.items():
print('{} => {}'.format(k, v))
print '{} => {}'.format(k, v)
info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] })
print(taco)
run(taco)
taco = json.dumps({'image': images['tacos']})
print taco
run(taco, my_model)
info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] })
print(burrito)
run(burrito)
burrito = json.dumps({'image': images['burrito']})
print burrito
run(burrito, my_model)

View File

@ -1,22 +1,22 @@
import json
from os.path import relpath
import azureml
import argparse
from pathlib2 import Path
import azureml
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.image import ContainerImage, Image
from azureml.core.webservice import Webservice, AciWebservice
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.authentication import ServicePrincipalAuthentication
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def run(model_path, model_name, tenant_id, service_principal_id,
service_principal_password, subscription_id, resource_group, workspace, tags):
def info(msg, char="#", width=75):
print ""
print char * width
print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
def get_ws(tenant_id, service_principal_id,
service_principal_password, subscription_id, resource_group, workspace):
auth_args = {
'tenant_id': tenant_id,
'service_principal_id': service_principal_id,
@ -28,18 +28,21 @@ def run(model_path, model_name, tenant_id, service_principal_id,
'subscription_id': subscription_id,
'resource_group': resource_group
}
ws = Workspace.get(workspace, **ws_args)
return ws
print(ws.get_details())
def run(mdl_path, model_name, ws, tgs):
print('\nSaving model {} to {}'.format(model_path, model_name))
print ws.get_details()
print '\nSaving model {} to {}'.format(mdl_path, model_name)
# Model Path needs to be relative
model_path = relpath(model_path, '.')
mdl_path = relpath(mdl_path, '.')
Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
print 'Done!'
model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
print('Done!')
if __name__ == "__main__":
# argparse stuff for model path and model name
@ -54,14 +57,14 @@ if __name__ == "__main__":
parser.add_argument('-r', '--resource_group', help='resource_group')
parser.add_argument('-w', '--workspace', help='workspace')
args = parser.parse_args()
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
print 'Azure ML SDK Version: {}'.format(azureml.core.VERSION)
args.model = 'model/' + args.model
model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
rgs = {
'model_path': model_path,
'model_name': args.model_name,
model_path = str(Path(args.base_path).resolve(
strict=False).joinpath(args.model).resolve(strict=False))
params_path = str(Path(args.base_path).resolve(
strict=False).joinpath('params.json').resolve(strict=False))
wsrgs = {
'tenant_id': args.tenant_id,
'service_principal_id': args.service_principal_id,
'service_principal_password': args.service_principal_password,
@ -69,23 +72,29 @@ if __name__ == "__main__":
'resource_group': args.resource_group,
'workspace': args.workspace
}
rgs = {
'mdl_path': model_path,
'model_name': args.model_name
}
# printing out args for posterity
for i in rgs:
for i in wsrgs:
if i == 'service_principal_password':
print('{} => **********'.format(i))
print '{} => **********'.format(i)
else:
print('{} => {}'.format(i, rgs[i]))
print '{} => {}'.format(i, rgs[i])
with(open(str(params_path), 'r')) as f:
tags = json.load(f)
print('\n\nUsing the following tags:')
print '\n\nUsing the following tags:'
for tag in tags:
print('{} => {}'.format(tag, tags[tag]))
print '{} => {}'.format(tag, tags[tag])
rgs['tags'] = tags
workspc = get_ws(**wsrgs)
rgs['ws'] = workspc
run(**rgs)
# python register.py --model_path v --model_name c --tenant_id c

View File

@ -5,51 +5,52 @@ import hmac
import json
import hashlib
import argparse
from random import shuffle
import numpy as np
import tensorflow as tf
from pathlib2 import Path
from random import shuffle
from datetime import datetime
from tensorflow.data import Dataset
from pathlib2 import Path
global image_size
def info(msg, char = "#", width = 75):
def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
def check_dir(path, check=False):
if check:
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def process_image(path, label):
def check_dir(path):
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def process_image(path, label, img_size):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
return img_final, label
def load_dataset(base_path, dataset, split=[8, 1, 1]):
def load_dataset(base_path, dset, split=None):
# normalize splits
if split is None:
split = [8, 1, 1]
splits = np.array(split) / np.sum(np.array(split))
# find labels - parent folder names
labels = {}
for (_, dirs, _) in os.walk(base_path):
print('found {}'.format(dirs))
labels = { k: v for (v, k) in enumerate(dirs) }
labels = {k: v for (v, k) in enumerate(dirs)}
print('using {}'.format(labels))
break
# load all files along with idx label
print('loading dataset from {}'.format(dataset))
with open(dataset, 'r') as d:
data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
print('loading dataset from {}'.format(dset))
with open(dset, 'r') as d:
data = [(str(Path(line.strip()).absolute()),
labels[Path(line.strip()).parent.name]) for line in d.readlines()]
print('dataset size: {}\nsuffling data...'.format(len(data)))
@ -59,25 +60,29 @@ def load_dataset(base_path, dataset, split=[8, 1, 1]):
print('splitting data...')
# split data
train_idx = int(len(data) * splits[0])
eval_idx = int(len(data) * splits[1])
return data[:train_idx], \
data[train_idx:train_idx + eval_idx], \
data[train_idx + eval_idx:], \
labels
return data[:train_idx]
#@print_info
def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None):
img_shape = (image_size, image_size, 3)
# @print_info
def run(
dpath,
img_size=160,
epochs=10,
batch_size=32,
learning_rate=0.0001,
output='model',
dset=None):
img_shape = (img_size, img_size, 3)
info('Loading Data Set')
# load dataset
train, test, val, labels = load_dataset(data_path, dataset)
train = load_dataset(dpath, dset)
# training data
train_data, train_labels = zip(*train)
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
Dataset.from_tensor_slices(list(train_labels))))
Dataset.from_tensor_slices(list(train_labels)), img_size))
train_ds = train_ds.map(map_func=process_image,
num_parallel_calls=5)
@ -91,8 +96,8 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
# model
info('Creating Model')
base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
include_top=False,
weights='imagenet')
include_top=False,
weights='imagenet')
base_model.trainable = True
model = tf.keras.Sequential([
@ -102,15 +107,15 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy'])
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
# training
info('Training')
steps_per_epoch = math.ceil(len(train)/batch_size)
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
steps_per_epoch = math.ceil(len(train) / batch_size)
model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
# save model
info('Saving Model')
@ -130,19 +135,20 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
return generate_hash(file_output, 'kf_pipeline')
def generate_hash(file, key):
print('Generating hash for {}'.format(file))
def generate_hash(dfile, key):
print('Generating hash for {}'.format(dfile))
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
BUF_SIZE = 65536
with open(str(file), 'rb') as f:
with open(str(dfile), 'rb') as myfile:
while True:
data = f.read(BUF_SIZE)
data = myfile.read(BUF_SIZE)
if not data:
break
m.update(data)
return m.hexdigest()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
@ -165,13 +171,13 @@ if __name__ == "__main__":
params = Path(args.base_path).joinpath('params.json')
args = {
"data_path": str(data_path),
"image_size": image_size,
"epochs": args.epochs,
"batch_size": args.batch,
"learning_rate": args.lr,
"output": str(target_path),
"dataset": str(dataset)
"dpath": str(data_path),
"img_size": image_size,
"epochs": args.epochs,
"batch_size": args.batch,
"learning_rate": args.lr,
"output": str(target_path),
"dset": str(dataset)
}
dataset_signature = generate_hash(dataset, 'kf_pipeline')