Merge pull request #599 from rem20806/master

Added azure pipeline example for Kubeflow
This commit is contained in:
Michelle Casbon 2019-09-03 18:05:04 -07:00 committed by GitHub
commit e37a9d7acd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 1014 additions and 0 deletions

18
pipelines/azurepipeline/.gitignore vendored Normal file
View File

@ -0,0 +1,18 @@
# standard things
.vscode
.ipynb_checkpoints/
__pycache__
# Environment Variables
*.env
*.cfg
*-creds.yaml
# models and data
data/
model/
*.tar.gz
*.h5
*.zip
aml_config/

View File

@ -0,0 +1,3 @@
# Kubeflow and Azure Pipelines Example
See the docs on the [Kubeflow website](https://www.kubeflow.org/docs/azure/deploy/) for information on how to deploy this Machine Learning pipeline example.

View File

@ -0,0 +1,9 @@
FROM mcr.microsoft.com/azure-cli
RUN az extension add -n azure-cli-ml
RUN pip install --upgrade pip
COPY deploymentconfig.json /scripts/deploymentconfig.json
COPY inferenceconfig.json /scripts/inferenceconfig.json
COPY deploy.sh /scripts/deploy.sh
COPY score.py /scripts/score.py
COPY environment.yml /scripts/environment.yml
CMD bash

View File

@ -0,0 +1,4 @@
{
"computeType": "aks",
"ComputeTarget": "aks-cluster"
}

View File

@ -0,0 +1,9 @@
#!/bin/bash
while getopts "r:" option;
do
case "$option" in
r ) REGISTRY_NAME=${OPTARG};;
esac
done
IMAGE=${REGISTRY_NAME}.azurecr.io/deploy
docker build -t $IMAGE . && docker run -it $IMAGE

View File

@ -0,0 +1,20 @@
# az ml model deploy -n tacosandburritos -m tacosandburritos:1 --ic inferenceconfig.json --dc deploymentconfig.json --resource-group taco-rg --workspace-name taco-workspace --overwrite -v
#!/bin/sh
while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
do
case "$option" in
m ) MODEL=${OPTARG};;
n ) MODEL_NAME=${OPTARG};;
i ) INFERENCE_CONFIG=${OPTARG};;
d ) DEPLOYMENTCONFIG=${OPTARG};;
s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
u ) SUBSCRIPTION_ID=${OPTARG};;
r ) RESOURCE_GROUP=${OPTARG};;
w ) WORKSPACE=${OPTARG};;
t ) TENANT_ID=${OPTARG};;
b ) BASE_PATH=${OPTARG};;
esac
done
az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
az ml model deploy -n $MODEL_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG --pi ${BASE_PATH}/myprofileresult.json --dc $DEPLOYMENTCONFIG -w $WORKSPACE -g $RESOURCE_GROUP --overwrite -v

View File

@ -0,0 +1,8 @@
{
"containerResourceRequirements": {
"cpu": 2,
"memoryInGB": 4
},
"computeType": "ACI",
"enableAppInsights": "True"
}

View File

@ -0,0 +1,19 @@
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.
# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
name: project_environment
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-defaults
- numpy
- tensorflow==2.0.0-alpha0
- Pillow
- requests

View File

@ -0,0 +1,13 @@
#!/bin/bash
while getopts "n:w:g:" option;
do
case "$option" in
n ) DEPLOYMENT_NAME=${OPTARG};;
w ) WORKSPACE=${OPTARG};;
g ) RESOURCE_GROUP=${OPTARG};;
esac
done
echo "test the deployment with a burrito image"
az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}
echo "test the deployment with a taco image"
az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}

View File

@ -0,0 +1,10 @@
{
"entryScript": "/scripts/score.py",
"runtime": "python",
"condaFile": "/scripts/environment.yml",
"extraDockerfileSteps": null,
"sourceDirectory": null,
"enableGpu": false,
"baseImage": null,
"baseImageRegistry": null
}

View File

@ -0,0 +1,93 @@
import json
import time
from io import BytesIO
import datetime
import requests
import numpy as np
from PIL import Image
import tensorflow as tf
from azureml.core.model import Model
def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
else:
model_path = '/model/latest.h5'
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
return model
def run(raw_data, model):
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
current_time = time.time()
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1) # [0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
if __name__ == "__main__":
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
my_model = init()
for k, v in images.items():
print('{} => {}'.format(k, v))
info('Taco Test')
taco = json.dumps({'image': images['tacos']})
print(taco)
run(taco, my_model)
info('Burrito Test')
burrito = json.dumps({'image': images['burrito']})
print(burrito)
run(burrito, my_model)

View File

@ -0,0 +1,140 @@
"""Main pipeline file"""
from kubernetes import client as k8s_client
import kfp.dsl as dsl
import kfp.compiler as compiler
@dsl.pipeline(
name='Tacos vs. Burritos',
description='Simple TF CNN'
)
def tacosandburritos_train(
tenant_id,
service_principal_id,
service_principal_password,
subscription_id,
resource_group,
workspace
):
"""Pipeline steps"""
persistent_volume_path = '/mnt/azure'
data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
epochs = 5
batch = 32
learning_rate = 0.0001
model_name = 'tacosandburritos'
profile_name = 'tacoprofile'
operations = {}
image_size = 160
training_folder = 'train'
training_dataset = 'train.txt'
model_folder = 'model'
# preprocess data
operations['preprocess'] = dsl.ContainerOp(
name='preprocess',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/data.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--target', training_dataset,
'--img_size', image_size,
'--zipfile', data_download
]
)
# train
operations['training'] = dsl.ContainerOp(
name='training',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/train.py',
'--base_path', persistent_volume_path,
'--data', training_folder,
'--epochs', epochs,
'--batch', batch,
'--image_size', image_size,
'--lr', learning_rate,
'--outputs', model_folder,
'--dataset', training_dataset
]
)
operations['training'].after(operations['preprocess'])
# register model
operations['register'] = dsl.ContainerOp(
name='register',
image='insert your image here',
command=['python'],
arguments=[
'/scripts/register.py',
'--base_path', persistent_volume_path,
'--model', 'latest.h5',
'--model_name', model_name,
'--tenant_id', tenant_id,
'--service_principal_id', service_principal_id,
'--service_principal_password', service_principal_password,
'--subscription_id', subscription_id,
'--resource_group', resource_group,
'--workspace', workspace
]
)
operations['register'].after(operations['training'])
operations['profile'] = dsl.ContainerOp(
name='profile',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/profile.sh',
'-n', profile_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['profile'].after(operations['register'])
operations['deploy'] = dsl.ContainerOp(
name='deploy',
image='insert your image here',
command=['sh'],
arguments=[
'/scripts/deploy.sh',
'-n', model_name,
'-m', model_name,
'-i', '/scripts/inferenceconfig.json',
'-d', '/scripts/deploymentconfig.json',
'-t', tenant_id,
'-r', resource_group,
'-w', workspace,
'-s', service_principal_id,
'-p', service_principal_password,
'-u', subscription_id,
'-b', persistent_volume_path
]
)
operations['deploy'].after(operations['profile'])
for _, op_1 in operations.items():
op_1.container.set_image_pull_policy("Always")
op_1.add_volume(
k8s_client.V1Volume(
name='azure',
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
claim_name='azure-managed-disk')
)
).add_volume_mount(k8s_client.V1VolumeMount(
mount_path='/mnt/azure', name='azure'))
if __name__ == '__main__':
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')

View File

@ -0,0 +1,10 @@
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
# pip install
COPY requirements.txt /scripts/requirements.txt
RUN pip install -r /scripts/requirements.txt
COPY data.py /scripts/data.py
# will be overwritten by kf pipeline
ENTRYPOINT [ "python", "/scripts/data.py" ]

View File

@ -0,0 +1,9 @@
#!/bin/bash
while getopts "r:" option;
do
case "$option" in
r ) REGISTRY_NAME=${OPTARG};;
esac
done
IMAGE=${REGISTRY_NAME}.azurecr.io/preprocess
docker build -t $IMAGE . && docker run -it $IMAGE

View File

@ -0,0 +1,118 @@
import os
import shutil
import zipfile
import argparse
from pathlib2 import Path
import wget
import tensorflow as tf
def check_dir(path):
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def download(source, target, force_clear=False):
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
shutil.rmtree(target)
check_dir(target)
targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
return
if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
else:
print("Copying from {} to {}".format(source, target))
shutil.copyfile(source, targt_file)
print('Unzipping {}'.format(targt_file))
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()
def process_image(path, image_size=160):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def walk_images(path, image_size=160):
imgs = []
print('Scanning {}'.format(path))
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs))
labels = dirs
break
for d in labels:
path = os.path.join(path, d)
print('Processing {}'.format(path))
# only care about files in directory
for item in os.listdir(path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
continue
image = os.path.join(path, item)
try:
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
imgs.append(image)
except img.shape[2] != 3:
print('{}\n'.format(image))
return imgs
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training data', default='train')
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force',
help='force clear all data', default=False, action='store_true')
args = parser.parse_args()
print(args)
print('Using TensorFlow v.{}'.format(tf.__version__))
base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
zip_path = args.zipfile
print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
str(base_path), args.force)
if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
else:
print('Testing images...')
images = walk_images(str(data_path), args.img_size)
# save file
print('writing dataset to {}'.format(target_path))
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))
# python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt

View File

@ -0,0 +1,3 @@
pathlib2
requests
wget

View File

@ -0,0 +1,12 @@
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
RUN pip install azure-cli
RUN az extension add -n azure-cli-ml
RUN pip install --upgrade pip
RUN pip install --upgrade pillow
RUN pip install azureml
RUN pip install azureml.core
COPY profile.sh /scripts/profile.sh
COPY inferenceconfig.json /scripts/inferenceconfig.json
COPY score.py /scripts/score.py
COPY environment.yml /scripts/environment.yml
ENTRYPOINT bash

View File

@ -0,0 +1,9 @@
#!/bin/bash
while getopts "r:" option;
do
case "$option" in
r ) REGISTRY_NAME=${OPTARG};;
esac
done
IMAGE=${REGISTRY_NAME}.azurecr.io/profile
docker build -t $IMAGE . && docker run -it $IMAGE

View File

@ -0,0 +1,19 @@
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.
# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
name: project_environment
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-defaults
- numpy
- tensorflow==2.0.0-alpha0
- Pillow
- requests

View File

@ -0,0 +1,10 @@
{
"entryScript": "/scripts/score.py",
"runtime": "python",
"condaFile": "/scripts/environment.yml",
"extraDockerfileSteps": null,
"sourceDirectory": null,
"enableGpu": false,
"baseImage": null,
"baseImageRegistry": null
}

View File

@ -0,0 +1,21 @@
#!/bin/sh
while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
do
case "$option" in
m ) MODEL=${OPTARG};;
n ) PROFILE_NAME=${OPTARG};;
i ) INFERENCE_CONFIG=${OPTARG};;
d ) DATA=${OPTARG};;
s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
u ) SUBSCRIPTION_ID=${OPTARG};;
r ) RESOURCE_GROUP=${OPTARG};;
w ) WORKSPACE=${OPTARG};;
t ) TENANT_ID=${OPTARG};;
b ) BASE_PATH=${OPTARG};;
esac
done
az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
az ml model profile -n $PROFILE_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG -d $DATA -t myprofileresult.json -w $WORKSPACE -g $RESOURCE_GROUP
mv myprofileresult.json ${BASE_PATH}/myprofileresult.json
echo ${BASE_PATH}

View File

@ -0,0 +1,3 @@
requests
numpy
pillow

View File

@ -0,0 +1,89 @@
import json
import time
import datetime
from io import BytesIO
import requests
import numpy as np
from PIL import Image
import tensorflow as tf
from azureml.core.model import Model
def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
else:
model_path = '/model/latest.h5'
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
return model
def run(raw_data, model):
prev_time = time.time()
post = json.loads(raw_data)
img_path = post['image']
current_time = time.time()
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1) # [0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
print('Input ({}), Prediction ({})'.format(post['image'], payload))
return payload
def process_image(path, image_size):
# Extract image (from web or path)
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
if __name__ == "__main__":
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
my_model = init()
for k, v in images.items():
print('{} => {}'.format(k, v))
info('Taco Test')
taco = json.dumps({'image': images['tacos']})
print(taco)
run(taco, my_model)
info('Burrito Test')
burrito = json.dumps({'image': images['burrito']})
print(burrito)
run(burrito, my_model)

View File

@ -0,0 +1,11 @@
FROM python:3.7-slim
# pip install
COPY requirements.txt /scripts/requirements.txt
RUN pip install -r /scripts/requirements.txt
# only for local testing
COPY register.py /scripts/register.py
# will be overwritten by kf pipeline
ENTRYPOINT [ "python", "/scripts/register.py" ]

View File

@ -0,0 +1,9 @@
#!/bin/bash
while getopts "r:" option;
do
case "$option" in
r ) REGISTRY_NAME=${OPTARG};;
esac
done
IMAGE=${REGISTRY_NAME}.azurecr.io/register
docker build -t $IMAGE . && docker run -it $IMAGE

View File

@ -0,0 +1,102 @@
import json
from os.path import relpath
import argparse
from pathlib2 import Path
import azureml
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.authentication import ServicePrincipalAuthentication
def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
def get_ws(tenant_id, service_principal_id,
service_principal_password, subscription_id, resource_group, workspace):
auth_args = {
'tenant_id': tenant_id,
'service_principal_id': service_principal_id,
'service_principal_password': service_principal_password
}
ws_args = {
'auth': ServicePrincipalAuthentication(**auth_args),
'subscription_id': subscription_id,
'resource_group': resource_group
}
ws = Workspace.get(workspace, **ws_args)
return ws
def run(mdl_path, model_name, ws, tgs):
print(ws.get_details())
print('\nSaving model {} to {}'.format(mdl_path, model_name))
# Model Path needs to be relative
mdl_path = relpath(mdl_path, '.')
Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
print('Done!')
if __name__ == "__main__":
# argparse stuff for model path and model name
parser = argparse.ArgumentParser(description='sanity check on model')
parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
parser.add_argument('-t', '--tenant_id', help='tenant_id')
parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
parser.add_argument('-u', '--subscription_id', help='subscription_id')
parser.add_argument('-r', '--resource_group', help='resource_group')
parser.add_argument('-w', '--workspace', help='workspace')
args = parser.parse_args()
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
args.model = 'model/' + args.model
model_path = str(Path(args.base_path).resolve(
strict=False).joinpath(args.model).resolve(strict=False))
params_path = str(Path(args.base_path).resolve(
strict=False).joinpath('params.json').resolve(strict=False))
wsrgs = {
'tenant_id': args.tenant_id,
'service_principal_id': args.service_principal_id,
'service_principal_password': args.service_principal_password,
'subscription_id': args.subscription_id,
'resource_group': args.resource_group,
'workspace': args.workspace
}
rgs = {
'mdl_path': model_path,
'model_name': args.model_name
}
# printing out args for posterity
for i in wsrgs:
if i == 'service_principal_password':
print('{} => **********'.format(i))
else:
print('{} => {}'.format(i, rgs[i]))
with(open(str(params_path), 'r')) as f:
tags = json.load(f)
print('\n\nUsing the following tags:')
for tag in tags:
print('{} => {}'.format(tag, tags[tag]))
rgs['tags'] = tags
workspc = get_ws(**wsrgs)
rgs['ws'] = workspc
run(**rgs)
# python register.py --model_path v --model_name c --tenant_id c
# --service_principal_id v --service_principal_password v
# --subscription_id v --resource_group x --workspace c

View File

@ -0,0 +1,3 @@
pathlib2
requests
azureml-sdk

View File

@ -0,0 +1,18 @@
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
# pip install
COPY requirements.txt /scripts/requirements.txt
RUN pip install -r /scripts/requirements.txt
COPY train.py /scripts/train.py
# python train.py -d data/PetImages -e 1 -b 32 -l 0.0001 -o model -f dataset.txt
# will be overwritten by kf pipeline
ENTRYPOINT [ "python", \
"/scripts/train.py", \
"-d", "data/train", \
"-e", "10", \
"-b", "32", \
"-l", "0.0001", \
"-o", "model", \
"-f", "train.txt" ]

View File

@ -0,0 +1,9 @@
#!/bin/bash
while getopts "r:" option;
do
case "$option" in
r ) REGISTRY_NAME=${OPTARG};;
esac
done
IMAGE=${REGISTRY_NAME}.azurecr.io/training
docker build -t $IMAGE . && docker run -it $IMAGE

View File

@ -0,0 +1,2 @@
Pillow
pathlib2

View File

@ -0,0 +1,199 @@
from __future__ import absolute_import, division, print_function
import os
import math
import hmac
import json
import hashlib
import argparse
from random import shuffle
from pathlib2 import Path
import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset
def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
def check_dir(path):
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
def process_image(path, label, img_size):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
return img_final, label
def load_dataset(base_path, dset, split=None):
# normalize splits
if split is None:
split = [8, 1, 1]
splits = np.array(split) / np.sum(np.array(split))
# find labels - parent folder names
labels = {}
for (_, dirs, _) in os.walk(base_path):
print('found {}'.format(dirs))
labels = {k: v for (v, k) in enumerate(dirs)}
print('using {}'.format(labels))
break
# load all files along with idx label
print('loading dataset from {}'.format(dset))
with open(dset, 'r') as d:
data = [(str(Path(line.strip()).absolute()),
labels[Path(line.strip()).parent.name]) for line in d.readlines()]
print('dataset size: {}\nsuffling data...'.format(len(data)))
# shuffle data
shuffle(data)
print('splitting data...')
# split data
train_idx = int(len(data) * splits[0])
return data[:train_idx]
# @print_info
def run(
dpath,
img_size=160,
epochs=10,
batch_size=32,
learning_rate=0.0001,
output='model',
dset=None):
img_shape = (img_size, img_size, 3)
info('Loading Data Set')
# load dataset
train = load_dataset(dpath, dset)
# training data
train_data, train_labels = zip(*train)
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
Dataset.from_tensor_slices(list(train_labels)), img_size))
train_ds = train_ds.map(map_func=process_image,
num_parallel_calls=5)
train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.prefetch(buffer_size=5)
train_ds = train_ds.repeat()
# model
info('Creating Model')
base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
include_top=False,
weights='imagenet')
base_model.trainable = True
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
# training
info('Training')
steps_per_epoch = math.ceil(len(train) / batch_size)
model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
# save model
info('Saving Model')
# check existence of base model folder
output = check_dir(output)
print('Serializing into saved_model format')
tf.saved_model.save(model, str(output))
print('Done!')
# add time prefix folder
file_output = str(Path(output).joinpath('latest.h5'))
print('Serializing h5 model to:\n{}'.format(file_output))
model.save(file_output)
return generate_hash(file_output, 'kf_pipeline')
def generate_hash(dfile, key):
print('Generating hash for {}'.format(dfile))
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
BUF_SIZE = 65536
with open(str(dfile), 'rb') as myfile:
while True:
data = myfile.read(BUF_SIZE)
if not data:
break
m.update(data)
return m.hexdigest()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
parser.add_argument('-o', '--outputs', help='output directory', default='model')
parser.add_argument('-f', '--dataset', help='cleaned data listing')
args = parser.parse_args()
info('Using TensorFlow v.{}'.format(tf.__version__))
data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
dataset = Path(args.base_path).joinpath(args.dataset)
image_size = args.image_size
params = Path(args.base_path).joinpath('params.json')
args = {
"dpath": str(data_path),
"img_size": image_size,
"epochs": args.epochs,
"batch_size": args.batch,
"learning_rate": args.lr,
"output": str(target_path),
"dset": str(dataset)
}
dataset_signature = generate_hash(dataset, 'kf_pipeline')
# printing out args for posterity
for i in args:
print('{} => {}'.format(i, args[i]))
model_signature = run(**args)
args['dataset_signature'] = dataset_signature.upper()
args['model_signature'] = model_signature.upper()
args['model_type'] = 'tfv2-MobileNetV2'
print('Writing out params...', end='')
with open(str(params), 'w') as f:
json.dump(args, f)
print(' Saved to {}'.format(str(params)))
# python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: azure-managed-disk
namespace: kubeflow
spec:
accessModes:
- ReadWriteOnce
storageClassName: managed-premium
resources:
requests:
storage: 300Gi