fixed some lint errors

This commit is contained in:
Rebecca McFadden 2019-08-16 01:04:15 -04:00
parent 153d9d87aa
commit d81d83512a
6 changed files with 301 additions and 275 deletions

View File

@ -1,31 +1,31 @@
import json import json
import time import time
from io import BytesIO
import datetime
import requests import requests
import numpy as np import numpy as np
import datetime
from PIL import Image from PIL import Image
from io import BytesIO
import tensorflow as tf import tensorflow as tf
from azureml.core.model import Model from azureml.core.model import Model
def init():
global model
try: def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos') model_path = Model.get_model_path('tacosandburritos')
except: else:
model_path = '/model/latest.h5' model_path = '/model/latest.h5'
print('Attempting to load model') print 'Attempting to load model'
model = tf.keras.models.load_model(model_path) model = tf.keras.models.load_model(model_path)
model.summary() model.summary()
print('Done!') print 'Done!'
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())) print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
return model
def run(raw_data):
global model def run(raw_data, model):
prev_time = time.time() prev_time = time.time()
post = json.loads(raw_data) post = json.loads(raw_data)
@ -35,8 +35,8 @@ def run(raw_data):
tensor = process_image(img_path, 160) tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3]) t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0] o = model.predict(t, steps=1) # [0][0]
print(o) print o
o = o[0][0] o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time) inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = { payload = {
@ -45,28 +45,31 @@ def run(raw_data):
'scores': str(o) 'scores': str(o)
} }
print('Input ({}), Prediction ({})'.format(post['image'], payload)) print 'Input ({}), Prediction ({})'.format(post['image'], payload)
return payload return payload
def process_image(path, image_size): def process_image(path, image_size):
# Extract image (from web or path) # Extract image (from web or path)
if(path.startswith('http')): if path.startswith('http'):
response = requests.get(path) response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content))) img = np.array(Image.open(BytesIO(response.content)))
else: else:
img = np.array(Image.open(path)) img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32) img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3) # tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final return img_final
def info(msg, char = "#", width = 75):
print("") def info(msg, char="#", width=75):
print(char * width) print ""
print(char + " %0*s" % ((-1*width)+5, msg) + char) print char * width
print(char * width) print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
if __name__ == "__main__": if __name__ == "__main__":
images = { images = {
@ -74,17 +77,17 @@ if __name__ == "__main__":
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg' 'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
} }
init() my_model = init()
for k, v in images.items(): for k, v in images.items():
print('{} => {}'.format(k, v)) print '{} => {}'.format(k, v)
info('Taco Test') info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] }) taco = json.dumps({'image': images['tacos']})
print(taco) print taco
run(taco) run(taco, my_model)
info('Burrito Test') info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] }) burrito = json.dumps({'image': images['burrito']})
print(burrito) print burrito
run(burrito) run(burrito, my_model)

View File

@ -1,11 +1,11 @@
"""Main pipeline file"""
from kubernetes import client as k8s_client
import kfp.dsl as dsl import kfp.dsl as dsl
import kfp.compiler as compiler import kfp.compiler as compiler
from kubernetes import client as k8s_client
@dsl.pipeline( @dsl.pipeline(
name='Tacos vs. Burritos', name='Tacos vs. Burritos',
description='Simple TF CNN for binary classifier between burritos and tacos' description='Simple TF CNN'
) )
def tacosandburritos_train( def tacosandburritos_train(
tenant_id, tenant_id,
@ -13,15 +13,17 @@ def tacosandburritos_train(
service_principal_password, service_principal_password,
subscription_id, subscription_id,
resource_group, resource_group,
workspace, workspace
persistent_volume_path='/mnt/azure',
data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
epochs=5,
batch=32,
learning_rate=0.0001,
model_name='tacosandburritos',
profile_name='tacoprofile'
): ):
"""Pipeline steps"""
persistent_volume_path = '/mnt/azure'
data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
epochs = 5
batch = 32
learning_rate = 0.0001
model_name = 'tacosandburritos'
profile_name = 'tacoprofile'
operations = {} operations = {}
image_size = 160 image_size = 160
training_folder = 'train' training_folder = 'train'
@ -123,19 +125,16 @@ def tacosandburritos_train(
] ]
) )
operations['deploy'].after(operations['profile']) operations['deploy'].after(operations['profile'])
for _, op in operations.items(): for _, op_1 in operations.items():
op.container.set_image_pull_policy("Always") op_1.container.set_image_pull_policy("Always")
op.add_volume( op_1.add_volume(
k8s_client.V1Volume( k8s_client.V1Volume(
name='azure', name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk') claim_name='azure-managed-disk')
) )
).add_volume_mount(k8s_client.V1VolumeMount( ).add_volume_mount(k8s_client.V1VolumeMount(
mount_path='/mnt/azure', mount_path='/mnt/azure', name='azure'))
name='azure')
)
if __name__ == '__main__': if __name__ == '__main__':
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz') compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')

View File

@ -1,69 +1,69 @@
import os import os
import shutil import shutil
import wget
import zipfile import zipfile
import argparse import argparse
import numpy as np import wget
import tensorflow as tf import tensorflow as tf
from pathlib2 import Path from pathlib2 import Path
def check_dir(path, check=False):
if check: def check_dir(path):
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
return Path(path).resolve(strict=False) return Path(path).resolve(strict=False)
def download(source, target, force_clear=False): def download(source, target, force_clear=False):
if force_clear and os.path.exists(target): if force_clear and os.path.exists(target):
print('Removing {}...'.format(target)) print 'Removing {}...'.format(target)
shutil.rmtree(target) shutil.rmtree(target)
check_dir(target) check_dir(target)
targt_file = str(Path(target).joinpath('data.zip')) targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear: if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download') print 'data already exists, skipping download'
return return
if source.startswith('http'): if source.startswith('http'):
print("Downloading from {} to {}".format(source, target)) print "Downloading from {} to {}".format(source, target)
wget.download(source, targt_file) wget.download(source, targt_file)
print("Done!") print "Done!"
else: else:
print("Copying from {} to {}".format(source, target)) print "Copying from {} to {}".format(source, target)
shutil.copyfile(source, targt_file) shutil.copyfile(source, targt_file)
print('Unzipping {}'.format(targt_file)) print 'Unzipping {}'.format(targt_file)
zipr = zipfile.ZipFile(targt_file) zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target) zipr.extractall(target)
zipr.close() zipr.close()
def process_image(path, image_size=160): def process_image(path, image_size=160):
img_raw = tf.io.read_file(path) img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3) img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final return img_final
def walk_images(path, image_size=160): def walk_images(path, image_size=160):
images = [] imgs = []
print('Scanning {}'.format(path)) print 'Scanning {}'.format(path)
# find subdirectories in base path # find subdirectories in base path
# (they should be the labels) # (they should be the labels)
labels = [] labels = []
for (_, dirs, _) in os.walk(path): for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs)) print 'Found {}'.format(dirs)
labels = dirs labels = dirs
break break
for d in labels: for d in labels:
path = os.path.join(path, d) path = os.path.join(path, d)
print('Processing {}'.format(path)) print 'Processing {}'.format(path)
# only care about files in directory # only care about files in directory
for item in os.listdir(path): for item in os.listdir(path):
if not item.lower().endswith('.jpg'): if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item)) print 'skipping {}'.format(item)
continue continue
image = os.path.join(path, item) image = os.path.join(path, item)
@ -71,11 +71,12 @@ def walk_images(path, image_size=160):
img = process_image(image, image_size) img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count" assert img.shape[2] == 3, "Invalid channel count"
# write out good images # write out good images
images.append(image) imgs.append(image)
except Exception as e: except Exception as e:
print('{}\n{}\n'.format(e, image)) print '{}\n{}\n'.format(e, image)
return imgs
return images
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='data cleaning for binary image task') parser = argparse.ArgumentParser(description='data cleaning for binary image task')
@ -84,31 +85,33 @@ if __name__ == "__main__":
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt') parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int) parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip') parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true') parser.add_argument('-f', '--force',
help='force clear all data', default=False, action='store_true')
args = parser.parse_args() args = parser.parse_args()
print(args) print args
print('Using TensorFlow v.{}'.format(tf.__version__)) print 'Using TensorFlow v.{}'.format(tf.__version__)
base_path = Path(args.base_path).resolve(strict=False) base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path)) print 'Base Path: {}'.format(base_path)
data_path = base_path.joinpath(args.data).resolve(strict=False) data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path)) print 'Train Path: {}'.format(data_path)
target_path = Path(base_path).resolve(strict=False).joinpath(args.target) target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path)) print 'Train File: {}'.format(target_path)
zip_path = args.zipfile zip_path = args.zipfile
print('Acquiring data...') print 'Acquiring data...'
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force) download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
str(base_path), args.force)
if os.path.exists(str(target_path)): if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check') print 'dataset text file already exists, skipping check'
else: else:
print('Testing images...') print 'Testing images...'
images = walk_images(str(data_path), args.img_size) images = walk_images(str(data_path), args.img_size)
# save file # save file
print('writing dataset to {}'.format(target_path)) print 'writing dataset to {}'.format(target_path)
with open(str(target_path), 'w+') as f: with open(str(target_path), 'w+') as f:
f.write('\n'.join(images)) f.write('\n'.join(images))

View File

@ -1,30 +1,33 @@
import json import json
import time import time
import datetime
from io import BytesIO
import requests import requests
import numpy as np import numpy as np
import datetime
from PIL import Image from PIL import Image
from io import BytesIO
import tensorflow as tf import tensorflow as tf
from azureml.core.model import Model from azureml.core.model import Model
def init():
global model
try: def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos') model_path = Model.get_model_path('tacosandburritos')
except: else:
model_path = '/model/latest.h5' model_path = '/model/latest.h5'
print('Attempting to load model') print 'Attempting to load model'
model = tf.keras.models.load_model(model_path) model = tf.keras.models.load_model(model_path)
model.summary() model.summary()
print('Done!') print 'Done!'
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())) print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
return model
def run(raw_data):
def run(raw_data, model):
prev_time = time.time() prev_time = time.time()
post = json.loads(raw_data) post = json.loads(raw_data)
@ -34,8 +37,8 @@ def run(raw_data):
tensor = process_image(img_path, 160) tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3]) t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1)#[0][0] o = model.predict(t, steps=1) # [0][0]
print(o) print o
o = o[0][0] o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time) inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = { payload = {
@ -44,28 +47,31 @@ def run(raw_data):
'scores': str(o) 'scores': str(o)
} }
print('Input ({}), Prediction ({})'.format(post['image'], payload)) print 'Input ({}), Prediction ({})'.format(post['image'], payload)
return payload return payload
def process_image(path, image_size): def process_image(path, image_size):
# Extract image (from web or path) # Extract image (from web or path)
if(path.startswith('http')): if path.startswith('http'):
response = requests.get(path) response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content))) img = np.array(Image.open(BytesIO(response.content)))
else: else:
img = np.array(Image.open(path)) img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32) img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
#tf.image.decode_jpeg(img_raw, channels=3) # tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final return img_final
def info(msg, char = "#", width = 75):
print("") def info(msg, char="#", width=75):
print(char * width) print ""
print(char + " %0*s" % ((-1*width)+5, msg) + char) print char * width
print(char * width) print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
if __name__ == "__main__": if __name__ == "__main__":
images = { images = {
@ -73,17 +79,17 @@ if __name__ == "__main__":
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg' 'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
} }
init() my_model = init()
for k, v in images.items(): for k, v in images.items():
print('{} => {}'.format(k, v)) print '{} => {}'.format(k, v)
info('Taco Test') info('Taco Test')
taco = json.dumps({ 'image': images['tacos'] }) taco = json.dumps({'image': images['tacos']})
print(taco) print taco
run(taco) run(taco, my_model)
info('Burrito Test') info('Burrito Test')
burrito = json.dumps({ 'image': images['burrito'] }) burrito = json.dumps({'image': images['burrito']})
print(burrito) print burrito
run(burrito) run(burrito, my_model)

View File

@ -1,22 +1,22 @@
import json import json
from os.path import relpath from os.path import relpath
import azureml
import argparse import argparse
from pathlib2 import Path from pathlib2 import Path
import azureml
from azureml.core import Workspace from azureml.core import Workspace
from azureml.core.model import Model from azureml.core.model import Model
from azureml.core.image import ContainerImage, Image
from azureml.core.webservice import Webservice, AciWebservice
from azureml.core.authentication import ServicePrincipalAuthentication from azureml.core.authentication import ServicePrincipalAuthentication
def info(msg, char = "#", width = 75):
print("")
print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char)
print(char * width)
def run(model_path, model_name, tenant_id, service_principal_id, def info(msg, char="#", width=75):
service_principal_password, subscription_id, resource_group, workspace, tags): print ""
print char * width
print char + " %0*s" % ((-1 * width) + 5, msg) + char
print char * width
def get_ws(tenant_id, service_principal_id,
service_principal_password, subscription_id, resource_group, workspace):
auth_args = { auth_args = {
'tenant_id': tenant_id, 'tenant_id': tenant_id,
'service_principal_id': service_principal_id, 'service_principal_id': service_principal_id,
@ -28,18 +28,21 @@ def run(model_path, model_name, tenant_id, service_principal_id,
'subscription_id': subscription_id, 'subscription_id': subscription_id,
'resource_group': resource_group 'resource_group': resource_group
} }
ws = Workspace.get(workspace, **ws_args) ws = Workspace.get(workspace, **ws_args)
return ws
print(ws.get_details()) def run(mdl_path, model_name, ws, tgs):
print('\nSaving model {} to {}'.format(model_path, model_name)) print ws.get_details()
print '\nSaving model {} to {}'.format(mdl_path, model_name)
# Model Path needs to be relative # Model Path needs to be relative
model_path = relpath(model_path, '.') mdl_path = relpath(mdl_path, '.')
Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
print 'Done!'
model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
print('Done!')
if __name__ == "__main__": if __name__ == "__main__":
# argparse stuff for model path and model name # argparse stuff for model path and model name
@ -55,13 +58,13 @@ if __name__ == "__main__":
parser.add_argument('-w', '--workspace', help='workspace') parser.add_argument('-w', '--workspace', help='workspace')
args = parser.parse_args() args = parser.parse_args()
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION)) print 'Azure ML SDK Version: {}'.format(azureml.core.VERSION)
args.model = 'model/' + args.model args.model = 'model/' + args.model
model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False)) model_path = str(Path(args.base_path).resolve(
params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False)) strict=False).joinpath(args.model).resolve(strict=False))
rgs = { params_path = str(Path(args.base_path).resolve(
'model_path': model_path, strict=False).joinpath('params.json').resolve(strict=False))
'model_name': args.model_name, wsrgs = {
'tenant_id': args.tenant_id, 'tenant_id': args.tenant_id,
'service_principal_id': args.service_principal_id, 'service_principal_id': args.service_principal_id,
'service_principal_password': args.service_principal_password, 'service_principal_password': args.service_principal_password,
@ -69,23 +72,29 @@ if __name__ == "__main__":
'resource_group': args.resource_group, 'resource_group': args.resource_group,
'workspace': args.workspace 'workspace': args.workspace
} }
rgs = {
'mdl_path': model_path,
'model_name': args.model_name
}
# printing out args for posterity # printing out args for posterity
for i in rgs: for i in wsrgs:
if i == 'service_principal_password': if i == 'service_principal_password':
print('{} => **********'.format(i)) print '{} => **********'.format(i)
else: else:
print('{} => {}'.format(i, rgs[i])) print '{} => {}'.format(i, rgs[i])
with(open(str(params_path), 'r')) as f: with(open(str(params_path), 'r')) as f:
tags = json.load(f) tags = json.load(f)
print('\n\nUsing the following tags:') print '\n\nUsing the following tags:'
for tag in tags: for tag in tags:
print('{} => {}'.format(tag, tags[tag])) print '{} => {}'.format(tag, tags[tag])
rgs['tags'] = tags rgs['tags'] = tags
workspc = get_ws(**wsrgs)
rgs['ws'] = workspc
run(**rgs) run(**rgs)
# python register.py --model_path v --model_name c --tenant_id c # python register.py --model_path v --model_name c --tenant_id c

View File

@ -5,51 +5,52 @@ import hmac
import json import json
import hashlib import hashlib
import argparse import argparse
from random import shuffle
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from pathlib2 import Path
from random import shuffle
from datetime import datetime
from tensorflow.data import Dataset from tensorflow.data import Dataset
from pathlib2 import Path
global image_size
def info(msg, char = "#", width = 75): def info(msg, char="#", width=75):
print("") print("")
print(char * width) print(char * width)
print(char + " %0*s" % ((-1*width)+5, msg) + char) print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width) print(char * width)
def check_dir(path, check=False):
if check: def check_dir(path):
assert os.path.exists(path), '{} does not exist!'.format(path)
else:
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
return Path(path).resolve(strict=False) return Path(path).resolve(strict=False)
def process_image(path, label):
def process_image(path, label, img_size):
img_raw = tf.io.read_file(path) img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3) img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
return img_final, label return img_final, label
def load_dataset(base_path, dataset, split=[8, 1, 1]):
def load_dataset(base_path, dset, split=None):
# normalize splits # normalize splits
if split is None:
split = [8, 1, 1]
splits = np.array(split) / np.sum(np.array(split)) splits = np.array(split) / np.sum(np.array(split))
# find labels - parent folder names # find labels - parent folder names
labels = {} labels = {}
for (_, dirs, _) in os.walk(base_path): for (_, dirs, _) in os.walk(base_path):
print('found {}'.format(dirs)) print('found {}'.format(dirs))
labels = { k: v for (v, k) in enumerate(dirs) } labels = {k: v for (v, k) in enumerate(dirs)}
print('using {}'.format(labels)) print('using {}'.format(labels))
break break
# load all files along with idx label # load all files along with idx label
print('loading dataset from {}'.format(dataset)) print('loading dataset from {}'.format(dset))
with open(dataset, 'r') as d: with open(dset, 'r') as d:
data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()] data = [(str(Path(line.strip()).absolute()),
labels[Path(line.strip()).parent.name]) for line in d.readlines()]
print('dataset size: {}\nsuffling data...'.format(len(data))) print('dataset size: {}\nsuffling data...'.format(len(data)))
@ -59,25 +60,29 @@ def load_dataset(base_path, dataset, split=[8, 1, 1]):
print('splitting data...') print('splitting data...')
# split data # split data
train_idx = int(len(data) * splits[0]) train_idx = int(len(data) * splits[0])
eval_idx = int(len(data) * splits[1])
return data[:train_idx], \ return data[:train_idx]
data[train_idx:train_idx + eval_idx], \
data[train_idx + eval_idx:], \
labels
#@print_info
def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None): # @print_info
img_shape = (image_size, image_size, 3) def run(
dpath,
img_size=160,
epochs=10,
batch_size=32,
learning_rate=0.0001,
output='model',
dset=None):
img_shape = (img_size, img_size, 3)
info('Loading Data Set') info('Loading Data Set')
# load dataset # load dataset
train, test, val, labels = load_dataset(data_path, dataset) train = load_dataset(dpath, dset)
# training data # training data
train_data, train_labels = zip(*train) train_data, train_labels = zip(*train)
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)), train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
Dataset.from_tensor_slices(list(train_labels)))) Dataset.from_tensor_slices(list(train_labels)), img_size))
train_ds = train_ds.map(map_func=process_image, train_ds = train_ds.map(map_func=process_image,
num_parallel_calls=5) num_parallel_calls=5)
@ -109,8 +114,8 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
# training # training
info('Training') info('Training')
steps_per_epoch = math.ceil(len(train)/batch_size) steps_per_epoch = math.ceil(len(train) / batch_size)
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch) model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
# save model # save model
info('Saving Model') info('Saving Model')
@ -130,19 +135,20 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
return generate_hash(file_output, 'kf_pipeline') return generate_hash(file_output, 'kf_pipeline')
def generate_hash(file, key): def generate_hash(dfile, key):
print('Generating hash for {}'.format(file)) print('Generating hash for {}'.format(dfile))
m = hmac.new(str.encode(key), digestmod=hashlib.sha256) m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
BUF_SIZE = 65536 BUF_SIZE = 65536
with open(str(file), 'rb') as f: with open(str(dfile), 'rb') as myfile:
while True: while True:
data = f.read(BUF_SIZE) data = myfile.read(BUF_SIZE)
if not data: if not data:
break break
m.update(data) m.update(data)
return m.hexdigest() return m.hexdigest()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='transfer learning for binary image task') parser = argparse.ArgumentParser(description='transfer learning for binary image task')
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data') parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
@ -165,13 +171,13 @@ if __name__ == "__main__":
params = Path(args.base_path).joinpath('params.json') params = Path(args.base_path).joinpath('params.json')
args = { args = {
"data_path": str(data_path), "dpath": str(data_path),
"image_size": image_size, "img_size": image_size,
"epochs": args.epochs, "epochs": args.epochs,
"batch_size": args.batch, "batch_size": args.batch,
"learning_rate": args.lr, "learning_rate": args.lr,
"output": str(target_path), "output": str(target_path),
"dataset": str(dataset) "dset": str(dataset)
} }
dataset_signature = generate_hash(dataset, 'kf_pipeline') dataset_signature = generate_hash(dataset, 'kf_pipeline')