mirror of https://github.com/kubeflow/examples.git
Merge pull request #599 from rem20806/master
Added azure pipeline example for Kubeflow
This commit is contained in:
commit
e37a9d7acd
|
|
@ -0,0 +1,18 @@
|
|||
# standard things
|
||||
.vscode
|
||||
.ipynb_checkpoints/
|
||||
__pycache__
|
||||
|
||||
# Environment Variables
|
||||
*.env
|
||||
*.cfg
|
||||
*-creds.yaml
|
||||
|
||||
# models and data
|
||||
data/
|
||||
model/
|
||||
*.tar.gz
|
||||
*.h5
|
||||
*.zip
|
||||
|
||||
aml_config/
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Kubeflow and Azure Pipelines Example
|
||||
|
||||
See the docs on the [Kubeflow website](https://www.kubeflow.org/docs/azure/deploy/) for information on how to deploy this Machine Learning pipeline example.
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
FROM mcr.microsoft.com/azure-cli
|
||||
RUN az extension add -n azure-cli-ml
|
||||
RUN pip install --upgrade pip
|
||||
COPY deploymentconfig.json /scripts/deploymentconfig.json
|
||||
COPY inferenceconfig.json /scripts/inferenceconfig.json
|
||||
COPY deploy.sh /scripts/deploy.sh
|
||||
COPY score.py /scripts/score.py
|
||||
COPY environment.yml /scripts/environment.yml
|
||||
CMD bash
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"computeType": "aks",
|
||||
"ComputeTarget": "aks-cluster"
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
while getopts "r:" option;
|
||||
do
|
||||
case "$option" in
|
||||
r ) REGISTRY_NAME=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
IMAGE=${REGISTRY_NAME}.azurecr.io/deploy
|
||||
docker build -t $IMAGE . && docker run -it $IMAGE
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
# az ml model deploy -n tacosandburritos -m tacosandburritos:1 --ic inferenceconfig.json --dc deploymentconfig.json --resource-group taco-rg --workspace-name taco-workspace --overwrite -v
|
||||
#!/bin/sh
|
||||
while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
|
||||
do
|
||||
case "$option" in
|
||||
m ) MODEL=${OPTARG};;
|
||||
n ) MODEL_NAME=${OPTARG};;
|
||||
i ) INFERENCE_CONFIG=${OPTARG};;
|
||||
d ) DEPLOYMENTCONFIG=${OPTARG};;
|
||||
s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
|
||||
p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
|
||||
u ) SUBSCRIPTION_ID=${OPTARG};;
|
||||
r ) RESOURCE_GROUP=${OPTARG};;
|
||||
w ) WORKSPACE=${OPTARG};;
|
||||
t ) TENANT_ID=${OPTARG};;
|
||||
b ) BASE_PATH=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
|
||||
az ml model deploy -n $MODEL_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG --pi ${BASE_PATH}/myprofileresult.json --dc $DEPLOYMENTCONFIG -w $WORKSPACE -g $RESOURCE_GROUP --overwrite -v
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"containerResourceRequirements": {
|
||||
"cpu": 2,
|
||||
"memoryInGB": 4
|
||||
},
|
||||
"computeType": "ACI",
|
||||
"enableAppInsights": "True"
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Conda environment specification. The dependencies defined in this file will
|
||||
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||
|
||||
# Details about the Conda environment file format:
|
||||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||
|
||||
name: project_environment
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- python=3.6.2
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- numpy
|
||||
- tensorflow==2.0.0-alpha0
|
||||
- Pillow
|
||||
- requests
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
while getopts "n:w:g:" option;
|
||||
do
|
||||
case "$option" in
|
||||
n ) DEPLOYMENT_NAME=${OPTARG};;
|
||||
w ) WORKSPACE=${OPTARG};;
|
||||
g ) RESOURCE_GROUP=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
echo "test the deployment with a burrito image"
|
||||
az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}
|
||||
echo "test the deployment with a taco image"
|
||||
az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"entryScript": "/scripts/score.py",
|
||||
"runtime": "python",
|
||||
"condaFile": "/scripts/environment.yml",
|
||||
"extraDockerfileSteps": null,
|
||||
"sourceDirectory": null,
|
||||
"enableGpu": false,
|
||||
"baseImage": null,
|
||||
"baseImageRegistry": null
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
import json
|
||||
import time
|
||||
from io import BytesIO
|
||||
import datetime
|
||||
import requests
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import tensorflow as tf
|
||||
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
def init():
|
||||
if Model.get_model_path('tacosandburritos'):
|
||||
model_path = Model.get_model_path('tacosandburritos')
|
||||
else:
|
||||
model_path = '/model/latest.h5'
|
||||
|
||||
print('Attempting to load model')
|
||||
model = tf.keras.models.load_model(model_path)
|
||||
model.summary()
|
||||
print('Done!')
|
||||
|
||||
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
|
||||
return model
|
||||
|
||||
|
||||
def run(raw_data, model):
|
||||
prev_time = time.time()
|
||||
|
||||
post = json.loads(raw_data)
|
||||
img_path = post['image']
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
tensor = process_image(img_path, 160)
|
||||
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
||||
o = model.predict(t, steps=1) # [0][0]
|
||||
print(o)
|
||||
o = o[0][0]
|
||||
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
||||
payload = {
|
||||
'time': inference_time.total_seconds(),
|
||||
'prediction': 'burrito' if o > 0.5 else 'tacos',
|
||||
'scores': str(o)
|
||||
}
|
||||
|
||||
print('Input ({}), Prediction ({})'.format(post['image'], payload))
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def process_image(path, image_size):
|
||||
# Extract image (from web or path)
|
||||
if path.startswith('http'):
|
||||
response = requests.get(path)
|
||||
img = np.array(Image.open(BytesIO(response.content)))
|
||||
else:
|
||||
img = np.array(Image.open(path))
|
||||
|
||||
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
||||
# tf.image.decode_jpeg(img_raw, channels=3)
|
||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||
return img_final
|
||||
|
||||
|
||||
def info(msg, char="#", width=75):
|
||||
print("")
|
||||
print(char * width)
|
||||
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
|
||||
print(char * width)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
images = {
|
||||
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
|
||||
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
||||
}
|
||||
|
||||
my_model = init()
|
||||
|
||||
for k, v in images.items():
|
||||
print('{} => {}'.format(k, v))
|
||||
|
||||
info('Taco Test')
|
||||
taco = json.dumps({'image': images['tacos']})
|
||||
print(taco)
|
||||
run(taco, my_model)
|
||||
|
||||
info('Burrito Test')
|
||||
burrito = json.dumps({'image': images['burrito']})
|
||||
print(burrito)
|
||||
run(burrito, my_model)
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
"""Main pipeline file"""
|
||||
from kubernetes import client as k8s_client
|
||||
import kfp.dsl as dsl
|
||||
import kfp.compiler as compiler
|
||||
|
||||
@dsl.pipeline(
|
||||
name='Tacos vs. Burritos',
|
||||
description='Simple TF CNN'
|
||||
)
|
||||
def tacosandburritos_train(
|
||||
tenant_id,
|
||||
service_principal_id,
|
||||
service_principal_password,
|
||||
subscription_id,
|
||||
resource_group,
|
||||
workspace
|
||||
):
|
||||
"""Pipeline steps"""
|
||||
|
||||
persistent_volume_path = '/mnt/azure'
|
||||
data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
|
||||
epochs = 5
|
||||
batch = 32
|
||||
learning_rate = 0.0001
|
||||
model_name = 'tacosandburritos'
|
||||
profile_name = 'tacoprofile'
|
||||
operations = {}
|
||||
image_size = 160
|
||||
training_folder = 'train'
|
||||
training_dataset = 'train.txt'
|
||||
model_folder = 'model'
|
||||
|
||||
# preprocess data
|
||||
operations['preprocess'] = dsl.ContainerOp(
|
||||
name='preprocess',
|
||||
image='insert your image here',
|
||||
command=['python'],
|
||||
arguments=[
|
||||
'/scripts/data.py',
|
||||
'--base_path', persistent_volume_path,
|
||||
'--data', training_folder,
|
||||
'--target', training_dataset,
|
||||
'--img_size', image_size,
|
||||
'--zipfile', data_download
|
||||
]
|
||||
)
|
||||
|
||||
# train
|
||||
operations['training'] = dsl.ContainerOp(
|
||||
name='training',
|
||||
image='insert your image here',
|
||||
command=['python'],
|
||||
arguments=[
|
||||
'/scripts/train.py',
|
||||
'--base_path', persistent_volume_path,
|
||||
'--data', training_folder,
|
||||
'--epochs', epochs,
|
||||
'--batch', batch,
|
||||
'--image_size', image_size,
|
||||
'--lr', learning_rate,
|
||||
'--outputs', model_folder,
|
||||
'--dataset', training_dataset
|
||||
]
|
||||
)
|
||||
operations['training'].after(operations['preprocess'])
|
||||
|
||||
# register model
|
||||
operations['register'] = dsl.ContainerOp(
|
||||
name='register',
|
||||
image='insert your image here',
|
||||
command=['python'],
|
||||
arguments=[
|
||||
'/scripts/register.py',
|
||||
'--base_path', persistent_volume_path,
|
||||
'--model', 'latest.h5',
|
||||
'--model_name', model_name,
|
||||
'--tenant_id', tenant_id,
|
||||
'--service_principal_id', service_principal_id,
|
||||
'--service_principal_password', service_principal_password,
|
||||
'--subscription_id', subscription_id,
|
||||
'--resource_group', resource_group,
|
||||
'--workspace', workspace
|
||||
]
|
||||
)
|
||||
operations['register'].after(operations['training'])
|
||||
|
||||
operations['profile'] = dsl.ContainerOp(
|
||||
name='profile',
|
||||
image='insert your image here',
|
||||
command=['sh'],
|
||||
arguments=[
|
||||
'/scripts/profile.sh',
|
||||
'-n', profile_name,
|
||||
'-m', model_name,
|
||||
'-i', '/scripts/inferenceconfig.json',
|
||||
'-d', '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
|
||||
'-t', tenant_id,
|
||||
'-r', resource_group,
|
||||
'-w', workspace,
|
||||
'-s', service_principal_id,
|
||||
'-p', service_principal_password,
|
||||
'-u', subscription_id,
|
||||
'-b', persistent_volume_path
|
||||
]
|
||||
)
|
||||
operations['profile'].after(operations['register'])
|
||||
|
||||
operations['deploy'] = dsl.ContainerOp(
|
||||
name='deploy',
|
||||
image='insert your image here',
|
||||
command=['sh'],
|
||||
arguments=[
|
||||
'/scripts/deploy.sh',
|
||||
'-n', model_name,
|
||||
'-m', model_name,
|
||||
'-i', '/scripts/inferenceconfig.json',
|
||||
'-d', '/scripts/deploymentconfig.json',
|
||||
'-t', tenant_id,
|
||||
'-r', resource_group,
|
||||
'-w', workspace,
|
||||
'-s', service_principal_id,
|
||||
'-p', service_principal_password,
|
||||
'-u', subscription_id,
|
||||
'-b', persistent_volume_path
|
||||
]
|
||||
)
|
||||
operations['deploy'].after(operations['profile'])
|
||||
for _, op_1 in operations.items():
|
||||
op_1.container.set_image_pull_policy("Always")
|
||||
op_1.add_volume(
|
||||
k8s_client.V1Volume(
|
||||
name='azure',
|
||||
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
|
||||
claim_name='azure-managed-disk')
|
||||
)
|
||||
).add_volume_mount(k8s_client.V1VolumeMount(
|
||||
mount_path='/mnt/azure', name='azure'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
|
||||
|
||||
# pip install
|
||||
COPY requirements.txt /scripts/requirements.txt
|
||||
RUN pip install -r /scripts/requirements.txt
|
||||
|
||||
COPY data.py /scripts/data.py
|
||||
|
||||
# will be overwritten by kf pipeline
|
||||
ENTRYPOINT [ "python", "/scripts/data.py" ]
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
while getopts "r:" option;
|
||||
do
|
||||
case "$option" in
|
||||
r ) REGISTRY_NAME=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
IMAGE=${REGISTRY_NAME}.azurecr.io/preprocess
|
||||
docker build -t $IMAGE . && docker run -it $IMAGE
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
import argparse
|
||||
from pathlib2 import Path
|
||||
import wget
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def check_dir(path):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
return Path(path).resolve(strict=False)
|
||||
|
||||
|
||||
def download(source, target, force_clear=False):
|
||||
if force_clear and os.path.exists(target):
|
||||
print('Removing {}...'.format(target))
|
||||
shutil.rmtree(target)
|
||||
|
||||
check_dir(target)
|
||||
|
||||
targt_file = str(Path(target).joinpath('data.zip'))
|
||||
if os.path.exists(targt_file) and not force_clear:
|
||||
print('data already exists, skipping download')
|
||||
return
|
||||
|
||||
if source.startswith('http'):
|
||||
print("Downloading from {} to {}".format(source, target))
|
||||
wget.download(source, targt_file)
|
||||
print("Done!")
|
||||
else:
|
||||
print("Copying from {} to {}".format(source, target))
|
||||
shutil.copyfile(source, targt_file)
|
||||
|
||||
print('Unzipping {}'.format(targt_file))
|
||||
zipr = zipfile.ZipFile(targt_file)
|
||||
zipr.extractall(target)
|
||||
zipr.close()
|
||||
|
||||
|
||||
def process_image(path, image_size=160):
|
||||
img_raw = tf.io.read_file(path)
|
||||
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||
return img_final
|
||||
|
||||
|
||||
def walk_images(path, image_size=160):
|
||||
imgs = []
|
||||
print('Scanning {}'.format(path))
|
||||
# find subdirectories in base path
|
||||
# (they should be the labels)
|
||||
labels = []
|
||||
for (_, dirs, _) in os.walk(path):
|
||||
print('Found {}'.format(dirs))
|
||||
labels = dirs
|
||||
break
|
||||
|
||||
for d in labels:
|
||||
path = os.path.join(path, d)
|
||||
print('Processing {}'.format(path))
|
||||
# only care about files in directory
|
||||
for item in os.listdir(path):
|
||||
if not item.lower().endswith('.jpg'):
|
||||
print('skipping {}'.format(item))
|
||||
continue
|
||||
|
||||
image = os.path.join(path, item)
|
||||
try:
|
||||
img = process_image(image, image_size)
|
||||
assert img.shape[2] == 3, "Invalid channel count"
|
||||
# write out good images
|
||||
imgs.append(image)
|
||||
except img.shape[2] != 3:
|
||||
print('{}\n'.format(image))
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
|
||||
parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
|
||||
parser.add_argument('-d', '--data', help='directory to training data', default='train')
|
||||
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
|
||||
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
|
||||
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
|
||||
parser.add_argument('-f', '--force',
|
||||
help='force clear all data', default=False, action='store_true')
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
|
||||
print('Using TensorFlow v.{}'.format(tf.__version__))
|
||||
|
||||
base_path = Path(args.base_path).resolve(strict=False)
|
||||
print('Base Path: {}'.format(base_path))
|
||||
data_path = base_path.joinpath(args.data).resolve(strict=False)
|
||||
print('Train Path: {}'.format(data_path))
|
||||
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
|
||||
print('Train File: {}'.format(target_path))
|
||||
zip_path = args.zipfile
|
||||
|
||||
print('Acquiring data...')
|
||||
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
|
||||
str(base_path), args.force)
|
||||
|
||||
if os.path.exists(str(target_path)):
|
||||
print('dataset text file already exists, skipping check')
|
||||
else:
|
||||
print('Testing images...')
|
||||
images = walk_images(str(data_path), args.img_size)
|
||||
|
||||
# save file
|
||||
print('writing dataset to {}'.format(target_path))
|
||||
with open(str(target_path), 'w+') as f:
|
||||
f.write('\n'.join(images))
|
||||
|
||||
# python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
pathlib2
|
||||
requests
|
||||
wget
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
|
||||
RUN pip install azure-cli
|
||||
RUN az extension add -n azure-cli-ml
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install --upgrade pillow
|
||||
RUN pip install azureml
|
||||
RUN pip install azureml.core
|
||||
COPY profile.sh /scripts/profile.sh
|
||||
COPY inferenceconfig.json /scripts/inferenceconfig.json
|
||||
COPY score.py /scripts/score.py
|
||||
COPY environment.yml /scripts/environment.yml
|
||||
ENTRYPOINT bash
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
while getopts "r:" option;
|
||||
do
|
||||
case "$option" in
|
||||
r ) REGISTRY_NAME=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
IMAGE=${REGISTRY_NAME}.azurecr.io/profile
|
||||
docker build -t $IMAGE . && docker run -it $IMAGE
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Conda environment specification. The dependencies defined in this file will
|
||||
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||
|
||||
# Details about the Conda environment file format:
|
||||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||
|
||||
name: project_environment
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- python=3.6.2
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- numpy
|
||||
- tensorflow==2.0.0-alpha0
|
||||
- Pillow
|
||||
- requests
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"entryScript": "/scripts/score.py",
|
||||
"runtime": "python",
|
||||
"condaFile": "/scripts/environment.yml",
|
||||
"extraDockerfileSteps": null,
|
||||
"sourceDirectory": null,
|
||||
"enableGpu": false,
|
||||
"baseImage": null,
|
||||
"baseImageRegistry": null
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#!/bin/sh
|
||||
while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
|
||||
do
|
||||
case "$option" in
|
||||
m ) MODEL=${OPTARG};;
|
||||
n ) PROFILE_NAME=${OPTARG};;
|
||||
i ) INFERENCE_CONFIG=${OPTARG};;
|
||||
d ) DATA=${OPTARG};;
|
||||
s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
|
||||
p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
|
||||
u ) SUBSCRIPTION_ID=${OPTARG};;
|
||||
r ) RESOURCE_GROUP=${OPTARG};;
|
||||
w ) WORKSPACE=${OPTARG};;
|
||||
t ) TENANT_ID=${OPTARG};;
|
||||
b ) BASE_PATH=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
|
||||
az ml model profile -n $PROFILE_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG -d $DATA -t myprofileresult.json -w $WORKSPACE -g $RESOURCE_GROUP
|
||||
mv myprofileresult.json ${BASE_PATH}/myprofileresult.json
|
||||
echo ${BASE_PATH}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
requests
|
||||
numpy
|
||||
pillow
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
import json
|
||||
import time
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
import requests
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import tensorflow as tf
|
||||
from azureml.core.model import Model
|
||||
|
||||
def init():
|
||||
if Model.get_model_path('tacosandburritos'):
|
||||
model_path = Model.get_model_path('tacosandburritos')
|
||||
else:
|
||||
model_path = '/model/latest.h5'
|
||||
print('Attempting to load model')
|
||||
model = tf.keras.models.load_model(model_path)
|
||||
model.summary()
|
||||
print('Done!')
|
||||
print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
|
||||
return model
|
||||
|
||||
|
||||
def run(raw_data, model):
|
||||
prev_time = time.time()
|
||||
|
||||
post = json.loads(raw_data)
|
||||
img_path = post['image']
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
tensor = process_image(img_path, 160)
|
||||
t = tf.reshape(tensor, [-1, 160, 160, 3])
|
||||
o = model.predict(t, steps=1) # [0][0]
|
||||
print(o)
|
||||
o = o[0][0]
|
||||
inference_time = datetime.timedelta(seconds=current_time - prev_time)
|
||||
payload = {
|
||||
'time': inference_time.total_seconds(),
|
||||
'prediction': 'burrito' if o > 0.5 else 'tacos',
|
||||
'scores': str(o)
|
||||
}
|
||||
|
||||
print('Input ({}), Prediction ({})'.format(post['image'], payload))
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def process_image(path, image_size):
|
||||
# Extract image (from web or path)
|
||||
if path.startswith('http'):
|
||||
response = requests.get(path)
|
||||
img = np.array(Image.open(BytesIO(response.content)))
|
||||
else:
|
||||
img = np.array(Image.open(path))
|
||||
|
||||
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
|
||||
# tf.image.decode_jpeg(img_raw, channels=3)
|
||||
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
|
||||
return img_final
|
||||
|
||||
|
||||
def info(msg, char="#", width=75):
|
||||
print("")
|
||||
print(char * width)
|
||||
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
|
||||
print(char * width)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
images = {
|
||||
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
|
||||
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
|
||||
}
|
||||
|
||||
my_model = init()
|
||||
|
||||
for k, v in images.items():
|
||||
print('{} => {}'.format(k, v))
|
||||
|
||||
info('Taco Test')
|
||||
taco = json.dumps({'image': images['tacos']})
|
||||
print(taco)
|
||||
run(taco, my_model)
|
||||
|
||||
info('Burrito Test')
|
||||
burrito = json.dumps({'image': images['burrito']})
|
||||
print(burrito)
|
||||
run(burrito, my_model)
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
FROM python:3.7-slim
|
||||
|
||||
# pip install
|
||||
COPY requirements.txt /scripts/requirements.txt
|
||||
RUN pip install -r /scripts/requirements.txt
|
||||
|
||||
# only for local testing
|
||||
COPY register.py /scripts/register.py
|
||||
|
||||
# will be overwritten by kf pipeline
|
||||
ENTRYPOINT [ "python", "/scripts/register.py" ]
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
while getopts "r:" option;
|
||||
do
|
||||
case "$option" in
|
||||
r ) REGISTRY_NAME=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
IMAGE=${REGISTRY_NAME}.azurecr.io/register
|
||||
docker build -t $IMAGE . && docker run -it $IMAGE
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import json
|
||||
from os.path import relpath
|
||||
import argparse
|
||||
from pathlib2 import Path
|
||||
import azureml
|
||||
from azureml.core import Workspace
|
||||
from azureml.core.model import Model
|
||||
from azureml.core.authentication import ServicePrincipalAuthentication
|
||||
|
||||
|
||||
def info(msg, char="#", width=75):
|
||||
print("")
|
||||
print(char * width)
|
||||
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
|
||||
print(char * width)
|
||||
|
||||
|
||||
def get_ws(tenant_id, service_principal_id,
|
||||
service_principal_password, subscription_id, resource_group, workspace):
|
||||
auth_args = {
|
||||
'tenant_id': tenant_id,
|
||||
'service_principal_id': service_principal_id,
|
||||
'service_principal_password': service_principal_password
|
||||
}
|
||||
|
||||
ws_args = {
|
||||
'auth': ServicePrincipalAuthentication(**auth_args),
|
||||
'subscription_id': subscription_id,
|
||||
'resource_group': resource_group
|
||||
}
|
||||
ws = Workspace.get(workspace, **ws_args)
|
||||
return ws
|
||||
|
||||
|
||||
def run(mdl_path, model_name, ws, tgs):
|
||||
print(ws.get_details())
|
||||
|
||||
print('\nSaving model {} to {}'.format(mdl_path, model_name))
|
||||
|
||||
# Model Path needs to be relative
|
||||
mdl_path = relpath(mdl_path, '.')
|
||||
|
||||
Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
|
||||
print('Done!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# argparse stuff for model path and model name
|
||||
parser = argparse.ArgumentParser(description='sanity check on model')
|
||||
parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
|
||||
parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
|
||||
parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
|
||||
parser.add_argument('-t', '--tenant_id', help='tenant_id')
|
||||
parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
|
||||
parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
|
||||
parser.add_argument('-u', '--subscription_id', help='subscription_id')
|
||||
parser.add_argument('-r', '--resource_group', help='resource_group')
|
||||
parser.add_argument('-w', '--workspace', help='workspace')
|
||||
args = parser.parse_args()
|
||||
|
||||
print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
|
||||
args.model = 'model/' + args.model
|
||||
model_path = str(Path(args.base_path).resolve(
|
||||
strict=False).joinpath(args.model).resolve(strict=False))
|
||||
params_path = str(Path(args.base_path).resolve(
|
||||
strict=False).joinpath('params.json').resolve(strict=False))
|
||||
wsrgs = {
|
||||
'tenant_id': args.tenant_id,
|
||||
'service_principal_id': args.service_principal_id,
|
||||
'service_principal_password': args.service_principal_password,
|
||||
'subscription_id': args.subscription_id,
|
||||
'resource_group': args.resource_group,
|
||||
'workspace': args.workspace
|
||||
}
|
||||
rgs = {
|
||||
'mdl_path': model_path,
|
||||
'model_name': args.model_name
|
||||
}
|
||||
|
||||
# printing out args for posterity
|
||||
for i in wsrgs:
|
||||
if i == 'service_principal_password':
|
||||
print('{} => **********'.format(i))
|
||||
else:
|
||||
print('{} => {}'.format(i, rgs[i]))
|
||||
|
||||
with(open(str(params_path), 'r')) as f:
|
||||
tags = json.load(f)
|
||||
|
||||
print('\n\nUsing the following tags:')
|
||||
for tag in tags:
|
||||
print('{} => {}'.format(tag, tags[tag]))
|
||||
|
||||
rgs['tags'] = tags
|
||||
|
||||
workspc = get_ws(**wsrgs)
|
||||
rgs['ws'] = workspc
|
||||
run(**rgs)
|
||||
|
||||
# python register.py --model_path v --model_name c --tenant_id c
|
||||
# --service_principal_id v --service_principal_password v
|
||||
# --subscription_id v --resource_group x --workspace c
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
pathlib2
|
||||
requests
|
||||
azureml-sdk
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
|
||||
|
||||
# pip install
|
||||
COPY requirements.txt /scripts/requirements.txt
|
||||
RUN pip install -r /scripts/requirements.txt
|
||||
|
||||
COPY train.py /scripts/train.py
|
||||
|
||||
# python train.py -d data/PetImages -e 1 -b 32 -l 0.0001 -o model -f dataset.txt
|
||||
# will be overwritten by kf pipeline
|
||||
ENTRYPOINT [ "python", \
|
||||
"/scripts/train.py", \
|
||||
"-d", "data/train", \
|
||||
"-e", "10", \
|
||||
"-b", "32", \
|
||||
"-l", "0.0001", \
|
||||
"-o", "model", \
|
||||
"-f", "train.txt" ]
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
while getopts "r:" option;
|
||||
do
|
||||
case "$option" in
|
||||
r ) REGISTRY_NAME=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
IMAGE=${REGISTRY_NAME}.azurecr.io/training
|
||||
docker build -t $IMAGE . && docker run -it $IMAGE
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
Pillow
|
||||
pathlib2
|
||||
|
|
@ -0,0 +1,199 @@
|
|||
from __future__ import absolute_import, division, print_function
|
||||
import os
|
||||
import math
|
||||
import hmac
|
||||
import json
|
||||
import hashlib
|
||||
import argparse
|
||||
from random import shuffle
|
||||
from pathlib2 import Path
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.data import Dataset
|
||||
|
||||
|
||||
def info(msg, char="#", width=75):
|
||||
print("")
|
||||
print(char * width)
|
||||
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
|
||||
print(char * width)
|
||||
|
||||
|
||||
def check_dir(path):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
return Path(path).resolve(strict=False)
|
||||
|
||||
|
||||
def process_image(path, label, img_size):
|
||||
img_raw = tf.io.read_file(path)
|
||||
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
|
||||
img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
|
||||
return img_final, label
|
||||
|
||||
|
||||
def load_dataset(base_path, dset, split=None):
|
||||
# normalize splits
|
||||
if split is None:
|
||||
split = [8, 1, 1]
|
||||
splits = np.array(split) / np.sum(np.array(split))
|
||||
|
||||
# find labels - parent folder names
|
||||
labels = {}
|
||||
for (_, dirs, _) in os.walk(base_path):
|
||||
print('found {}'.format(dirs))
|
||||
labels = {k: v for (v, k) in enumerate(dirs)}
|
||||
print('using {}'.format(labels))
|
||||
break
|
||||
|
||||
# load all files along with idx label
|
||||
print('loading dataset from {}'.format(dset))
|
||||
with open(dset, 'r') as d:
|
||||
data = [(str(Path(line.strip()).absolute()),
|
||||
labels[Path(line.strip()).parent.name]) for line in d.readlines()]
|
||||
|
||||
print('dataset size: {}\nsuffling data...'.format(len(data)))
|
||||
|
||||
# shuffle data
|
||||
shuffle(data)
|
||||
|
||||
print('splitting data...')
|
||||
# split data
|
||||
train_idx = int(len(data) * splits[0])
|
||||
|
||||
return data[:train_idx]
|
||||
|
||||
|
||||
# @print_info
|
||||
def run(
|
||||
dpath,
|
||||
img_size=160,
|
||||
epochs=10,
|
||||
batch_size=32,
|
||||
learning_rate=0.0001,
|
||||
output='model',
|
||||
dset=None):
|
||||
img_shape = (img_size, img_size, 3)
|
||||
|
||||
info('Loading Data Set')
|
||||
# load dataset
|
||||
train = load_dataset(dpath, dset)
|
||||
|
||||
# training data
|
||||
train_data, train_labels = zip(*train)
|
||||
train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
|
||||
Dataset.from_tensor_slices(list(train_labels)), img_size))
|
||||
|
||||
train_ds = train_ds.map(map_func=process_image,
|
||||
num_parallel_calls=5)
|
||||
|
||||
train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
|
||||
|
||||
train_ds = train_ds.batch(batch_size)
|
||||
train_ds = train_ds.prefetch(buffer_size=5)
|
||||
train_ds = train_ds.repeat()
|
||||
|
||||
# model
|
||||
info('Creating Model')
|
||||
base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
|
||||
include_top=False,
|
||||
weights='imagenet')
|
||||
base_model.trainable = True
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
base_model,
|
||||
tf.keras.layers.GlobalAveragePooling2D(),
|
||||
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||
])
|
||||
|
||||
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
model.summary()
|
||||
|
||||
# training
|
||||
info('Training')
|
||||
steps_per_epoch = math.ceil(len(train) / batch_size)
|
||||
model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
|
||||
|
||||
# save model
|
||||
info('Saving Model')
|
||||
|
||||
# check existence of base model folder
|
||||
output = check_dir(output)
|
||||
|
||||
print('Serializing into saved_model format')
|
||||
tf.saved_model.save(model, str(output))
|
||||
print('Done!')
|
||||
|
||||
# add time prefix folder
|
||||
file_output = str(Path(output).joinpath('latest.h5'))
|
||||
print('Serializing h5 model to:\n{}'.format(file_output))
|
||||
model.save(file_output)
|
||||
|
||||
return generate_hash(file_output, 'kf_pipeline')
|
||||
|
||||
|
||||
def generate_hash(dfile, key):
|
||||
print('Generating hash for {}'.format(dfile))
|
||||
m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
|
||||
BUF_SIZE = 65536
|
||||
with open(str(dfile), 'rb') as myfile:
|
||||
while True:
|
||||
data = myfile.read(BUF_SIZE)
|
||||
if not data:
|
||||
break
|
||||
m.update(data)
|
||||
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='transfer learning for binary image task')
|
||||
parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
|
||||
parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
|
||||
parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
|
||||
parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
|
||||
parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
|
||||
parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
|
||||
parser.add_argument('-o', '--outputs', help='output directory', default='model')
|
||||
parser.add_argument('-f', '--dataset', help='cleaned data listing')
|
||||
args = parser.parse_args()
|
||||
|
||||
info('Using TensorFlow v.{}'.format(tf.__version__))
|
||||
|
||||
data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
|
||||
target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
|
||||
dataset = Path(args.base_path).joinpath(args.dataset)
|
||||
image_size = args.image_size
|
||||
|
||||
params = Path(args.base_path).joinpath('params.json')
|
||||
|
||||
args = {
|
||||
"dpath": str(data_path),
|
||||
"img_size": image_size,
|
||||
"epochs": args.epochs,
|
||||
"batch_size": args.batch,
|
||||
"learning_rate": args.lr,
|
||||
"output": str(target_path),
|
||||
"dset": str(dataset)
|
||||
}
|
||||
|
||||
dataset_signature = generate_hash(dataset, 'kf_pipeline')
|
||||
# printing out args for posterity
|
||||
for i in args:
|
||||
print('{} => {}'.format(i, args[i]))
|
||||
|
||||
model_signature = run(**args)
|
||||
|
||||
args['dataset_signature'] = dataset_signature.upper()
|
||||
args['model_signature'] = model_signature.upper()
|
||||
args['model_type'] = 'tfv2-MobileNetV2'
|
||||
print('Writing out params...', end='')
|
||||
with open(str(params), 'w') as f:
|
||||
json.dump(args, f)
|
||||
|
||||
print(' Saved to {}'.format(str(params)))
|
||||
|
||||
# python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: azure-managed-disk
|
||||
namespace: kubeflow
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: managed-premium
|
||||
resources:
|
||||
requests:
|
||||
storage: 300Gi
|
||||
Loading…
Reference in New Issue