mirror of https://github.com/kubeflow/examples.git
fixed some lint errors
This commit is contained in:
parent
153d9d87aa
commit
d81d83512a
|
|
@ -1,31 +1,31 @@
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
from io import BytesIO
|
||||||
|
import datetime
|
||||||
import requests
|
import requests
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import datetime
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from azureml.core.model import Model
|
from azureml.core.model import Model
|
||||||
|
|
||||||
def init():
|
|
||||||
global model
|
|
||||||
|
|
||||||
try:
|
def init():
|
||||||
|
if Model.get_model_path('tacosandburritos'):
|
||||||
model_path = Model.get_model_path('tacosandburritos')
|
model_path = Model.get_model_path('tacosandburritos')
|
||||||
except:
|
else:
|
||||||
model_path = '/model/latest.h5'
|
model_path = '/model/latest.h5'
|
||||||
|
|
||||||
print('Attempting to load model')
|
print 'Attempting to load model'
|
||||||
model = tf.keras.models.load_model(model_path)
|
model = tf.keras.models.load_model(model_path)
|
||||||
model.summary()
|
model.summary()
|
||||||
print('Done!')
|
print 'Done!'
|
||||||
|
|
||||||
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
|
print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
|
||||||
|
return model
|
||||||
|
|
||||||
def run(raw_data):
|
|
||||||
global model
|
def run(raw_data, model):
|
||||||
prev_time = time.time()
|
prev_time = time.time()
|
||||||
|
|
||||||
post = json.loads(raw_data)
|
post = json.loads(raw_data)
|
||||||
|
|
@ -35,8 +35,8 @@ def run(raw_data):
|
||||||
|
|
||||||
tensor = process_image(img_path, 160)
|
tensor = process_image(img_path, 160)
|
||||||
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
||||||
o = model.predict(t, steps=1)#[0][0]
|
o = model.predict(t, steps=1) # [0][0]
|
||||||
print(o)
|
print o
|
||||||
o = o[0][0]
|
o = o[0][0]
|
||||||
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
||||||
payload = {
|
payload = {
|
||||||
|
|
@ -45,28 +45,31 @@ def run(raw_data):
|
||||||
'scores': str(o)
|
'scores': str(o)
|
||||||
}
|
}
|
||||||
|
|
||||||
print('Input ({}), Prediction ({})'.format(post['image'], payload))
|
print 'Input ({}), Prediction ({})'.format(post['image'], payload)
|
||||||
|
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def process_image(path, image_size):
|
def process_image(path, image_size):
|
||||||
# Extract image (from web or path)
|
# Extract image (from web or path)
|
||||||
if(path.startswith('http')):
|
if path.startswith('http'):
|
||||||
response = requests.get(path)
|
response = requests.get(path)
|
||||||
img = np.array(Image.open(BytesIO(response.content)))
|
img = np.array(Image.open(BytesIO(response.content)))
|
||||||
else:
|
else:
|
||||||
img = np.array(Image.open(path))
|
img = np.array(Image.open(path))
|
||||||
|
|
||||||
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
||||||
#tf.image.decode_jpeg(img_raw, channels=3)
|
# tf.image.decode_jpeg(img_raw, channels=3)
|
||||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||||
return img_final
|
return img_final
|
||||||
|
|
||||||
def info(msg, char = "#", width = 75):
|
|
||||||
print("")
|
def info(msg, char="#", width=75):
|
||||||
print(char * width)
|
print ""
|
||||||
print(char + " %0*s" % ((-1*width)+5, msg) + char)
|
print char * width
|
||||||
print(char * width)
|
print char + " %0*s" % ((-1 * width) + 5, msg) + char
|
||||||
|
print char * width
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
images = {
|
images = {
|
||||||
|
|
@ -74,17 +77,17 @@ if __name__ == "__main__":
|
||||||
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
||||||
}
|
}
|
||||||
|
|
||||||
init()
|
my_model = init()
|
||||||
|
|
||||||
for k, v in images.items():
|
for k, v in images.items():
|
||||||
print('{} => {}'.format(k, v))
|
print '{} => {}'.format(k, v)
|
||||||
|
|
||||||
info('Taco Test')
|
info('Taco Test')
|
||||||
taco = json.dumps({ 'image': images['tacos'] })
|
taco = json.dumps({'image': images['tacos']})
|
||||||
print(taco)
|
print taco
|
||||||
run(taco)
|
run(taco, my_model)
|
||||||
|
|
||||||
info('Burrito Test')
|
info('Burrito Test')
|
||||||
burrito = json.dumps({ 'image': images['burrito'] })
|
burrito = json.dumps({'image': images['burrito']})
|
||||||
print(burrito)
|
print burrito
|
||||||
run(burrito)
|
run(burrito, my_model)
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
|
"""Main pipeline file"""
|
||||||
|
from kubernetes import client as k8s_client
|
||||||
import kfp.dsl as dsl
|
import kfp.dsl as dsl
|
||||||
import kfp.compiler as compiler
|
import kfp.compiler as compiler
|
||||||
from kubernetes import client as k8s_client
|
|
||||||
|
|
||||||
|
|
||||||
@dsl.pipeline(
|
@dsl.pipeline(
|
||||||
name='Tacos vs. Burritos',
|
name='Tacos vs. Burritos',
|
||||||
description='Simple TF CNN for binary classifier between burritos and tacos'
|
description='Simple TF CNN'
|
||||||
)
|
)
|
||||||
def tacosandburritos_train(
|
def tacosandburritos_train(
|
||||||
tenant_id,
|
tenant_id,
|
||||||
|
|
@ -13,15 +13,17 @@ def tacosandburritos_train(
|
||||||
service_principal_password,
|
service_principal_password,
|
||||||
subscription_id,
|
subscription_id,
|
||||||
resource_group,
|
resource_group,
|
||||||
workspace,
|
workspace
|
||||||
persistent_volume_path='/mnt/azure',
|
|
||||||
data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
|
|
||||||
epochs=5,
|
|
||||||
batch=32,
|
|
||||||
learning_rate=0.0001,
|
|
||||||
model_name='tacosandburritos',
|
|
||||||
profile_name='tacoprofile'
|
|
||||||
):
|
):
|
||||||
|
"""Pipeline steps"""
|
||||||
|
|
||||||
|
persistent_volume_path = '/mnt/azure'
|
||||||
|
data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
|
||||||
|
epochs = 5
|
||||||
|
batch = 32
|
||||||
|
learning_rate = 0.0001
|
||||||
|
model_name = 'tacosandburritos'
|
||||||
|
profile_name = 'tacoprofile'
|
||||||
operations = {}
|
operations = {}
|
||||||
image_size = 160
|
image_size = 160
|
||||||
training_folder = 'train'
|
training_folder = 'train'
|
||||||
|
|
@ -123,19 +125,16 @@ def tacosandburritos_train(
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
operations['deploy'].after(operations['profile'])
|
operations['deploy'].after(operations['profile'])
|
||||||
for _, op in operations.items():
|
for _, op_1 in operations.items():
|
||||||
op.container.set_image_pull_policy("Always")
|
op_1.container.set_image_pull_policy("Always")
|
||||||
op.add_volume(
|
op_1.add_volume(
|
||||||
k8s_client.V1Volume(
|
k8s_client.V1Volume(
|
||||||
name='azure',
|
name='azure',
|
||||||
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
|
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
|
||||||
claim_name='azure-managed-disk')
|
claim_name='azure-managed-disk')
|
||||||
)
|
)
|
||||||
).add_volume_mount(k8s_client.V1VolumeMount(
|
).add_volume_mount(k8s_client.V1VolumeMount(
|
||||||
mount_path='/mnt/azure',
|
mount_path='/mnt/azure', name='azure'))
|
||||||
name='azure')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
|
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
|
||||||
|
|
|
||||||
|
|
@ -1,69 +1,69 @@
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import wget
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import wget
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
|
||||||
def check_dir(path, check=False):
|
|
||||||
if check:
|
def check_dir(path):
|
||||||
assert os.path.exists(path), '{} does not exist!'.format(path)
|
|
||||||
else:
|
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
return Path(path).resolve(strict=False)
|
return Path(path).resolve(strict=False)
|
||||||
|
|
||||||
|
|
||||||
def download(source, target, force_clear=False):
|
def download(source, target, force_clear=False):
|
||||||
if force_clear and os.path.exists(target):
|
if force_clear and os.path.exists(target):
|
||||||
print('Removing {}...'.format(target))
|
print 'Removing {}...'.format(target)
|
||||||
shutil.rmtree(target)
|
shutil.rmtree(target)
|
||||||
|
|
||||||
check_dir(target)
|
check_dir(target)
|
||||||
|
|
||||||
targt_file = str(Path(target).joinpath('data.zip'))
|
targt_file = str(Path(target).joinpath('data.zip'))
|
||||||
if os.path.exists(targt_file) and not force_clear:
|
if os.path.exists(targt_file) and not force_clear:
|
||||||
print('data already exists, skipping download')
|
print 'data already exists, skipping download'
|
||||||
return
|
return
|
||||||
|
|
||||||
if source.startswith('http'):
|
if source.startswith('http'):
|
||||||
print("Downloading from {} to {}".format(source, target))
|
print "Downloading from {} to {}".format(source, target)
|
||||||
wget.download(source, targt_file)
|
wget.download(source, targt_file)
|
||||||
print("Done!")
|
print "Done!"
|
||||||
else:
|
else:
|
||||||
print("Copying from {} to {}".format(source, target))
|
print "Copying from {} to {}".format(source, target)
|
||||||
shutil.copyfile(source, targt_file)
|
shutil.copyfile(source, targt_file)
|
||||||
|
|
||||||
print('Unzipping {}'.format(targt_file))
|
print 'Unzipping {}'.format(targt_file)
|
||||||
zipr = zipfile.ZipFile(targt_file)
|
zipr = zipfile.ZipFile(targt_file)
|
||||||
zipr.extractall(target)
|
zipr.extractall(target)
|
||||||
zipr.close()
|
zipr.close()
|
||||||
|
|
||||||
|
|
||||||
def process_image(path, image_size=160):
|
def process_image(path, image_size=160):
|
||||||
img_raw = tf.io.read_file(path)
|
img_raw = tf.io.read_file(path)
|
||||||
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
||||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||||
return img_final
|
return img_final
|
||||||
|
|
||||||
|
|
||||||
def walk_images(path, image_size=160):
|
def walk_images(path, image_size=160):
|
||||||
images = []
|
imgs = []
|
||||||
print('Scanning {}'.format(path))
|
print 'Scanning {}'.format(path)
|
||||||
# find subdirectories in base path
|
# find subdirectories in base path
|
||||||
# (they should be the labels)
|
# (they should be the labels)
|
||||||
labels = []
|
labels = []
|
||||||
for (_, dirs, _) in os.walk(path):
|
for (_, dirs, _) in os.walk(path):
|
||||||
print('Found {}'.format(dirs))
|
print 'Found {}'.format(dirs)
|
||||||
labels = dirs
|
labels = dirs
|
||||||
break
|
break
|
||||||
|
|
||||||
for d in labels:
|
for d in labels:
|
||||||
path = os.path.join(path, d)
|
path = os.path.join(path, d)
|
||||||
print('Processing {}'.format(path))
|
print 'Processing {}'.format(path)
|
||||||
# only care about files in directory
|
# only care about files in directory
|
||||||
for item in os.listdir(path):
|
for item in os.listdir(path):
|
||||||
if not item.lower().endswith('.jpg'):
|
if not item.lower().endswith('.jpg'):
|
||||||
print('skipping {}'.format(item))
|
print 'skipping {}'.format(item)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image = os.path.join(path, item)
|
image = os.path.join(path, item)
|
||||||
|
|
@ -71,11 +71,12 @@ def walk_images(path, image_size=160):
|
||||||
img = process_image(image, image_size)
|
img = process_image(image, image_size)
|
||||||
assert img.shape[2] == 3, "Invalid channel count"
|
assert img.shape[2] == 3, "Invalid channel count"
|
||||||
# write out good images
|
# write out good images
|
||||||
images.append(image)
|
imgs.append(image)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('{}\n{}\n'.format(e, image))
|
print '{}\n{}\n'.format(e, image)
|
||||||
|
|
||||||
|
return imgs
|
||||||
|
|
||||||
return images
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
|
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
|
||||||
|
|
@ -84,31 +85,33 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
|
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
|
||||||
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
|
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
|
||||||
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
|
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
|
||||||
parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
|
parser.add_argument('-f', '--force',
|
||||||
|
help='force clear all data', default=False, action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
print(args)
|
print args
|
||||||
|
|
||||||
print('Using TensorFlow v.{}'.format(tf.__version__))
|
print 'Using TensorFlow v.{}'.format(tf.__version__)
|
||||||
|
|
||||||
base_path = Path(args.base_path).resolve(strict=False)
|
base_path = Path(args.base_path).resolve(strict=False)
|
||||||
print('Base Path: {}'.format(base_path))
|
print 'Base Path: {}'.format(base_path)
|
||||||
data_path = base_path.joinpath(args.data).resolve(strict=False)
|
data_path = base_path.joinpath(args.data).resolve(strict=False)
|
||||||
print('Train Path: {}'.format(data_path))
|
print 'Train Path: {}'.format(data_path)
|
||||||
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
|
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
|
||||||
print('Train File: {}'.format(target_path))
|
print 'Train File: {}'.format(target_path)
|
||||||
zip_path = args.zipfile
|
zip_path = args.zipfile
|
||||||
|
|
||||||
print('Acquiring data...')
|
print 'Acquiring data...'
|
||||||
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
|
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
|
||||||
|
str(base_path), args.force)
|
||||||
|
|
||||||
if os.path.exists(str(target_path)):
|
if os.path.exists(str(target_path)):
|
||||||
print('dataset text file already exists, skipping check')
|
print 'dataset text file already exists, skipping check'
|
||||||
else:
|
else:
|
||||||
print('Testing images...')
|
print 'Testing images...'
|
||||||
images = walk_images(str(data_path), args.img_size)
|
images = walk_images(str(data_path), args.img_size)
|
||||||
|
|
||||||
# save file
|
# save file
|
||||||
print('writing dataset to {}'.format(target_path))
|
print 'writing dataset to {}'.format(target_path)
|
||||||
with open(str(target_path), 'w+') as f:
|
with open(str(target_path), 'w+') as f:
|
||||||
f.write('\n'.join(images))
|
f.write('\n'.join(images))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,33 @@
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import datetime
|
||||||
|
from io import BytesIO
|
||||||
import requests
|
import requests
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import datetime
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from azureml.core.model import Model
|
from azureml.core.model import Model
|
||||||
|
|
||||||
def init():
|
|
||||||
global model
|
|
||||||
|
|
||||||
try:
|
def init():
|
||||||
|
if Model.get_model_path('tacosandburritos'):
|
||||||
model_path = Model.get_model_path('tacosandburritos')
|
model_path = Model.get_model_path('tacosandburritos')
|
||||||
except:
|
else:
|
||||||
model_path = '/model/latest.h5'
|
model_path = '/model/latest.h5'
|
||||||
|
|
||||||
print('Attempting to load model')
|
print 'Attempting to load model'
|
||||||
model = tf.keras.models.load_model(model_path)
|
model = tf.keras.models.load_model(model_path)
|
||||||
model.summary()
|
model.summary()
|
||||||
print('Done!')
|
print 'Done!'
|
||||||
|
|
||||||
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
|
print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
|
||||||
|
return model
|
||||||
|
|
||||||
def run(raw_data):
|
|
||||||
|
def run(raw_data, model):
|
||||||
prev_time = time.time()
|
prev_time = time.time()
|
||||||
|
|
||||||
post = json.loads(raw_data)
|
post = json.loads(raw_data)
|
||||||
|
|
@ -34,8 +37,8 @@ def run(raw_data):
|
||||||
|
|
||||||
tensor = process_image(img_path, 160)
|
tensor = process_image(img_path, 160)
|
||||||
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
||||||
o = model.predict(t, steps=1)#[0][0]
|
o = model.predict(t, steps=1) # [0][0]
|
||||||
print(o)
|
print o
|
||||||
o = o[0][0]
|
o = o[0][0]
|
||||||
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
||||||
payload = {
|
payload = {
|
||||||
|
|
@ -44,28 +47,31 @@ def run(raw_data):
|
||||||
'scores': str(o)
|
'scores': str(o)
|
||||||
}
|
}
|
||||||
|
|
||||||
print('Input ({}), Prediction ({})'.format(post['image'], payload))
|
print 'Input ({}), Prediction ({})'.format(post['image'], payload)
|
||||||
|
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def process_image(path, image_size):
|
def process_image(path, image_size):
|
||||||
# Extract image (from web or path)
|
# Extract image (from web or path)
|
||||||
if(path.startswith('http')):
|
if path.startswith('http'):
|
||||||
response = requests.get(path)
|
response = requests.get(path)
|
||||||
img = np.array(Image.open(BytesIO(response.content)))
|
img = np.array(Image.open(BytesIO(response.content)))
|
||||||
else:
|
else:
|
||||||
img = np.array(Image.open(path))
|
img = np.array(Image.open(path))
|
||||||
|
|
||||||
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
||||||
#tf.image.decode_jpeg(img_raw, channels=3)
|
# tf.image.decode_jpeg(img_raw, channels=3)
|
||||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||||
return img_final
|
return img_final
|
||||||
|
|
||||||
def info(msg, char = "#", width = 75):
|
|
||||||
print("")
|
def info(msg, char="#", width=75):
|
||||||
print(char * width)
|
print ""
|
||||||
print(char + " %0*s" % ((-1*width)+5, msg) + char)
|
print char * width
|
||||||
print(char * width)
|
print char + " %0*s" % ((-1 * width) + 5, msg) + char
|
||||||
|
print char * width
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
images = {
|
images = {
|
||||||
|
|
@ -73,17 +79,17 @@ if __name__ == "__main__":
|
||||||
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
||||||
}
|
}
|
||||||
|
|
||||||
init()
|
my_model = init()
|
||||||
|
|
||||||
for k, v in images.items():
|
for k, v in images.items():
|
||||||
print('{} => {}'.format(k, v))
|
print '{} => {}'.format(k, v)
|
||||||
|
|
||||||
info('Taco Test')
|
info('Taco Test')
|
||||||
taco = json.dumps({ 'image': images['tacos'] })
|
taco = json.dumps({'image': images['tacos']})
|
||||||
print(taco)
|
print taco
|
||||||
run(taco)
|
run(taco, my_model)
|
||||||
|
|
||||||
info('Burrito Test')
|
info('Burrito Test')
|
||||||
burrito = json.dumps({ 'image': images['burrito'] })
|
burrito = json.dumps({'image': images['burrito']})
|
||||||
print(burrito)
|
print burrito
|
||||||
run(burrito)
|
run(burrito, my_model)
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,22 @@
|
||||||
import json
|
import json
|
||||||
from os.path import relpath
|
from os.path import relpath
|
||||||
import azureml
|
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
import azureml
|
||||||
from azureml.core import Workspace
|
from azureml.core import Workspace
|
||||||
from azureml.core.model import Model
|
from azureml.core.model import Model
|
||||||
from azureml.core.image import ContainerImage, Image
|
|
||||||
from azureml.core.webservice import Webservice, AciWebservice
|
|
||||||
from azureml.core.authentication import ServicePrincipalAuthentication
|
from azureml.core.authentication import ServicePrincipalAuthentication
|
||||||
|
|
||||||
def info(msg, char = "#", width = 75):
|
|
||||||
print("")
|
|
||||||
print(char * width)
|
|
||||||
print(char + " %0*s" % ((-1*width)+5, msg) + char)
|
|
||||||
print(char * width)
|
|
||||||
|
|
||||||
def run(model_path, model_name, tenant_id, service_principal_id,
|
def info(msg, char="#", width=75):
|
||||||
service_principal_password, subscription_id, resource_group, workspace, tags):
|
print ""
|
||||||
|
print char * width
|
||||||
|
print char + " %0*s" % ((-1 * width) + 5, msg) + char
|
||||||
|
print char * width
|
||||||
|
|
||||||
|
|
||||||
|
def get_ws(tenant_id, service_principal_id,
|
||||||
|
service_principal_password, subscription_id, resource_group, workspace):
|
||||||
auth_args = {
|
auth_args = {
|
||||||
'tenant_id': tenant_id,
|
'tenant_id': tenant_id,
|
||||||
'service_principal_id': service_principal_id,
|
'service_principal_id': service_principal_id,
|
||||||
|
|
@ -28,18 +28,21 @@ def run(model_path, model_name, tenant_id, service_principal_id,
|
||||||
'subscription_id': subscription_id,
|
'subscription_id': subscription_id,
|
||||||
'resource_group': resource_group
|
'resource_group': resource_group
|
||||||
}
|
}
|
||||||
|
|
||||||
ws = Workspace.get(workspace, **ws_args)
|
ws = Workspace.get(workspace, **ws_args)
|
||||||
|
return ws
|
||||||
|
|
||||||
print(ws.get_details())
|
def run(mdl_path, model_name, ws, tgs):
|
||||||
|
|
||||||
print('\nSaving model {} to {}'.format(model_path, model_name))
|
print ws.get_details()
|
||||||
|
|
||||||
|
print '\nSaving model {} to {}'.format(mdl_path, model_name)
|
||||||
|
|
||||||
# Model Path needs to be relative
|
# Model Path needs to be relative
|
||||||
model_path = relpath(model_path, '.')
|
mdl_path = relpath(mdl_path, '.')
|
||||||
|
|
||||||
|
Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
|
||||||
|
print 'Done!'
|
||||||
|
|
||||||
model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
|
|
||||||
print('Done!')
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# argparse stuff for model path and model name
|
# argparse stuff for model path and model name
|
||||||
|
|
@ -55,13 +58,13 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('-w', '--workspace', help='workspace')
|
parser.add_argument('-w', '--workspace', help='workspace')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
|
print 'Azure ML SDK Version: {}'.format(azureml.core.VERSION)
|
||||||
args.model = 'model/' + args.model
|
args.model = 'model/' + args.model
|
||||||
model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
|
model_path = str(Path(args.base_path).resolve(
|
||||||
params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
|
strict=False).joinpath(args.model).resolve(strict=False))
|
||||||
rgs = {
|
params_path = str(Path(args.base_path).resolve(
|
||||||
'model_path': model_path,
|
strict=False).joinpath('params.json').resolve(strict=False))
|
||||||
'model_name': args.model_name,
|
wsrgs = {
|
||||||
'tenant_id': args.tenant_id,
|
'tenant_id': args.tenant_id,
|
||||||
'service_principal_id': args.service_principal_id,
|
'service_principal_id': args.service_principal_id,
|
||||||
'service_principal_password': args.service_principal_password,
|
'service_principal_password': args.service_principal_password,
|
||||||
|
|
@ -69,23 +72,29 @@ if __name__ == "__main__":
|
||||||
'resource_group': args.resource_group,
|
'resource_group': args.resource_group,
|
||||||
'workspace': args.workspace
|
'workspace': args.workspace
|
||||||
}
|
}
|
||||||
|
rgs = {
|
||||||
|
'mdl_path': model_path,
|
||||||
|
'model_name': args.model_name
|
||||||
|
}
|
||||||
|
|
||||||
# printing out args for posterity
|
# printing out args for posterity
|
||||||
for i in rgs:
|
for i in wsrgs:
|
||||||
if i == 'service_principal_password':
|
if i == 'service_principal_password':
|
||||||
print('{} => **********'.format(i))
|
print '{} => **********'.format(i)
|
||||||
else:
|
else:
|
||||||
print('{} => {}'.format(i, rgs[i]))
|
print '{} => {}'.format(i, rgs[i])
|
||||||
|
|
||||||
with(open(str(params_path), 'r')) as f:
|
with(open(str(params_path), 'r')) as f:
|
||||||
tags = json.load(f)
|
tags = json.load(f)
|
||||||
|
|
||||||
print('\n\nUsing the following tags:')
|
print '\n\nUsing the following tags:'
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
print('{} => {}'.format(tag, tags[tag]))
|
print '{} => {}'.format(tag, tags[tag])
|
||||||
|
|
||||||
rgs['tags'] = tags
|
rgs['tags'] = tags
|
||||||
|
|
||||||
|
workspc = get_ws(**wsrgs)
|
||||||
|
rgs['ws'] = workspc
|
||||||
run(**rgs)
|
run(**rgs)
|
||||||
|
|
||||||
# python register.py --model_path v --model_name c --tenant_id c
|
# python register.py --model_path v --model_name c --tenant_id c
|
||||||
|
|
|
||||||
|
|
@ -5,51 +5,52 @@ import hmac
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
import argparse
|
import argparse
|
||||||
|
from random import shuffle
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from pathlib2 import Path
|
|
||||||
from random import shuffle
|
|
||||||
from datetime import datetime
|
|
||||||
from tensorflow.data import Dataset
|
from tensorflow.data import Dataset
|
||||||
|
from pathlib2 import Path
|
||||||
|
|
||||||
global image_size
|
|
||||||
|
|
||||||
def info(msg, char = "#", width = 75):
|
def info(msg, char="#", width=75):
|
||||||
print("")
|
print("")
|
||||||
print(char * width)
|
print(char * width)
|
||||||
print(char + " %0*s" % ((-1*width)+5, msg) + char)
|
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
|
||||||
print(char * width)
|
print(char * width)
|
||||||
|
|
||||||
def check_dir(path, check=False):
|
|
||||||
if check:
|
def check_dir(path):
|
||||||
assert os.path.exists(path), '{} does not exist!'.format(path)
|
|
||||||
else:
|
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
return Path(path).resolve(strict=False)
|
return Path(path).resolve(strict=False)
|
||||||
|
|
||||||
def process_image(path, label):
|
|
||||||
|
def process_image(path, label, img_size):
|
||||||
img_raw = tf.io.read_file(path)
|
img_raw = tf.io.read_file(path)
|
||||||
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
||||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
|
||||||
return img_final, label
|
return img_final, label
|
||||||
|
|
||||||
def load_dataset(base_path, dataset, split=[8, 1, 1]):
|
|
||||||
|
def load_dataset(base_path, dset, split=None):
|
||||||
# normalize splits
|
# normalize splits
|
||||||
|
if split is None:
|
||||||
|
split = [8, 1, 1]
|
||||||
splits = np.array(split) / np.sum(np.array(split))
|
splits = np.array(split) / np.sum(np.array(split))
|
||||||
|
|
||||||
# find labels - parent folder names
|
# find labels - parent folder names
|
||||||
labels = {}
|
labels = {}
|
||||||
for (_, dirs, _) in os.walk(base_path):
|
for (_, dirs, _) in os.walk(base_path):
|
||||||
print('found {}'.format(dirs))
|
print('found {}'.format(dirs))
|
||||||
labels = { k: v for (v, k) in enumerate(dirs) }
|
labels = {k: v for (v, k) in enumerate(dirs)}
|
||||||
print('using {}'.format(labels))
|
print('using {}'.format(labels))
|
||||||
break
|
break
|
||||||
|
|
||||||
# load all files along with idx label
|
# load all files along with idx label
|
||||||
print('loading dataset from {}'.format(dataset))
|
print('loading dataset from {}'.format(dset))
|
||||||
with open(dataset, 'r') as d:
|
with open(dset, 'r') as d:
|
||||||
data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
|
data = [(str(Path(line.strip()).absolute()),
|
||||||
|
labels[Path(line.strip()).parent.name]) for line in d.readlines()]
|
||||||
|
|
||||||
print('dataset size: {}\nsuffling data...'.format(len(data)))
|
print('dataset size: {}\nsuffling data...'.format(len(data)))
|
||||||
|
|
||||||
|
|
@ -59,25 +60,29 @@ def load_dataset(base_path, dataset, split=[8, 1, 1]):
|
||||||
print('splitting data...')
|
print('splitting data...')
|
||||||
# split data
|
# split data
|
||||||
train_idx = int(len(data) * splits[0])
|
train_idx = int(len(data) * splits[0])
|
||||||
eval_idx = int(len(data) * splits[1])
|
|
||||||
|
|
||||||
return data[:train_idx], \
|
return data[:train_idx]
|
||||||
data[train_idx:train_idx + eval_idx], \
|
|
||||||
data[train_idx + eval_idx:], \
|
|
||||||
labels
|
|
||||||
|
|
||||||
#@print_info
|
|
||||||
def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None):
|
# @print_info
|
||||||
img_shape = (image_size, image_size, 3)
|
def run(
|
||||||
|
dpath,
|
||||||
|
img_size=160,
|
||||||
|
epochs=10,
|
||||||
|
batch_size=32,
|
||||||
|
learning_rate=0.0001,
|
||||||
|
output='model',
|
||||||
|
dset=None):
|
||||||
|
img_shape = (img_size, img_size, 3)
|
||||||
|
|
||||||
info('Loading Data Set')
|
info('Loading Data Set')
|
||||||
# load dataset
|
# load dataset
|
||||||
train, test, val, labels = load_dataset(data_path, dataset)
|
train = load_dataset(dpath, dset)
|
||||||
|
|
||||||
# training data
|
# training data
|
||||||
train_data, train_labels = zip(*train)
|
train_data, train_labels = zip(*train)
|
||||||
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
|
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
|
||||||
Dataset.from_tensor_slices(list(train_labels))))
|
Dataset.from_tensor_slices(list(train_labels)), img_size))
|
||||||
|
|
||||||
train_ds = train_ds.map(map_func=process_image,
|
train_ds = train_ds.map(map_func=process_image,
|
||||||
num_parallel_calls=5)
|
num_parallel_calls=5)
|
||||||
|
|
@ -109,8 +114,8 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
|
||||||
|
|
||||||
# training
|
# training
|
||||||
info('Training')
|
info('Training')
|
||||||
steps_per_epoch = math.ceil(len(train)/batch_size)
|
steps_per_epoch = math.ceil(len(train) / batch_size)
|
||||||
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
|
model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
|
||||||
|
|
||||||
# save model
|
# save model
|
||||||
info('Saving Model')
|
info('Saving Model')
|
||||||
|
|
@ -130,19 +135,20 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
|
||||||
return generate_hash(file_output, 'kf_pipeline')
|
return generate_hash(file_output, 'kf_pipeline')
|
||||||
|
|
||||||
|
|
||||||
def generate_hash(file, key):
|
def generate_hash(dfile, key):
|
||||||
print('Generating hash for {}'.format(file))
|
print('Generating hash for {}'.format(dfile))
|
||||||
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
|
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
|
||||||
BUF_SIZE = 65536
|
BUF_SIZE = 65536
|
||||||
with open(str(file), 'rb') as f:
|
with open(str(dfile), 'rb') as myfile:
|
||||||
while True:
|
while True:
|
||||||
data = f.read(BUF_SIZE)
|
data = myfile.read(BUF_SIZE)
|
||||||
if not data:
|
if not data:
|
||||||
break
|
break
|
||||||
m.update(data)
|
m.update(data)
|
||||||
|
|
||||||
return m.hexdigest()
|
return m.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
|
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
|
||||||
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
|
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
|
||||||
|
|
@ -165,13 +171,13 @@ if __name__ == "__main__":
|
||||||
params = Path(args.base_path).joinpath('params.json')
|
params = Path(args.base_path).joinpath('params.json')
|
||||||
|
|
||||||
args = {
|
args = {
|
||||||
"data_path": str(data_path),
|
"dpath": str(data_path),
|
||||||
"image_size": image_size,
|
"img_size": image_size,
|
||||||
"epochs": args.epochs,
|
"epochs": args.epochs,
|
||||||
"batch_size": args.batch,
|
"batch_size": args.batch,
|
||||||
"learning_rate": args.lr,
|
"learning_rate": args.lr,
|
||||||
"output": str(target_path),
|
"output": str(target_path),
|
||||||
"dataset": str(dataset)
|
"dset": str(dataset)
|
||||||
}
|
}
|
||||||
|
|
||||||
dataset_signature = generate_hash(dataset, 'kf_pipeline')
|
dataset_signature = generate_hash(dataset, 'kf_pipeline')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue