Merge pull request #599 from rem20806/master

Added azure pipeline example for Kubeflow
2019-09-03 18:05:04 -07:00 · 2019-09-03 18:05:04 -07:00 · e37a9d7acd
parent 230bb1bf1b 43f6475f1e
commit e37a9d7acd
33 changed files with 1014 additions and 0 deletions
--- a/pipelines/azurepipeline/.gitignore
+++ b/pipelines/azurepipeline/.gitignore
@ -0,0 +1,18 @@
+# standard things
+.vscode
+.ipynb_checkpoints/
+__pycache__
+
+# Environment Variables
+*.env
+*.cfg
+*-creds.yaml
+
+# models and data
+data/
+model/
+*.tar.gz
+*.h5
+*.zip
+
+aml_config/
--- a/pipelines/azurepipeline/README.md
+++ b/pipelines/azurepipeline/README.md
@ -0,0 +1,3 @@
+# Kubeflow and Azure Pipelines Example
+
+See the docs on the [Kubeflow website](https://www.kubeflow.org/docs/azure/deploy/) for information on how to deploy this Machine Learning pipeline example.
--- a/pipelines/azurepipeline/code/deploy/Dockerfile
+++ b/pipelines/azurepipeline/code/deploy/Dockerfile
@ -0,0 +1,9 @@
+FROM mcr.microsoft.com/azure-cli
+RUN az extension add -n azure-cli-ml
+RUN pip install --upgrade pip
+COPY deploymentconfig.json /scripts/deploymentconfig.json
+COPY inferenceconfig.json /scripts/inferenceconfig.json
+COPY deploy.sh /scripts/deploy.sh
+COPY score.py /scripts/score.py
+COPY environment.yml /scripts/environment.yml
+CMD bash
--- a/pipelines/azurepipeline/code/deploy/aksdeploymentconfig.json
+++ b/pipelines/azurepipeline/code/deploy/aksdeploymentconfig.json
@ -0,0 +1,4 @@
+{
+    "computeType": "aks",
+    "ComputeTarget": "aks-cluster"
+}
--- a/pipelines/azurepipeline/code/deploy/build.sh
+++ b/pipelines/azurepipeline/code/deploy/build.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+while getopts "r:" option;
+    do
+    case "$option" in
+        r ) REGISTRY_NAME=${OPTARG};;
+    esac
+done
+IMAGE=${REGISTRY_NAME}.azurecr.io/deploy
+docker build -t $IMAGE . && docker run -it $IMAGE
--- a/pipelines/azurepipeline/code/deploy/deploy.sh
+++ b/pipelines/azurepipeline/code/deploy/deploy.sh
@ -0,0 +1,20 @@
+# az ml model deploy -n tacosandburritos -m tacosandburritos:1 --ic inferenceconfig.json --dc deploymentconfig.json --resource-group taco-rg --workspace-name taco-workspace --overwrite -v
+#!/bin/sh
+while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
+    do
+    case "$option" in
+        m ) MODEL=${OPTARG};;
+        n ) MODEL_NAME=${OPTARG};;
+        i ) INFERENCE_CONFIG=${OPTARG};;
+        d ) DEPLOYMENTCONFIG=${OPTARG};;
+        s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
+        p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
+        u ) SUBSCRIPTION_ID=${OPTARG};;
+        r ) RESOURCE_GROUP=${OPTARG};;
+        w ) WORKSPACE=${OPTARG};;
+        t ) TENANT_ID=${OPTARG};;
+        b ) BASE_PATH=${OPTARG};;
+    esac
+done
+az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
+az ml model deploy -n $MODEL_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG --pi ${BASE_PATH}/myprofileresult.json --dc $DEPLOYMENTCONFIG -w $WORKSPACE -g $RESOURCE_GROUP --overwrite -v
--- a/pipelines/azurepipeline/code/deploy/deploymentconfig.json
+++ b/pipelines/azurepipeline/code/deploy/deploymentconfig.json
@ -0,0 +1,8 @@
+{
+    "containerResourceRequirements": {
+       "cpu": 2,
+       "memoryInGB": 4
+    },
+    "computeType": "ACI",
+    "enableAppInsights": "True"
+ }
--- a/pipelines/azurepipeline/code/deploy/environment.yml
+++ b/pipelines/azurepipeline/code/deploy/environment.yml
@ -0,0 +1,19 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for runs with userManagedDependencies=False.
+
+# Details about the Conda environment file format:
+# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+name: project_environment
+dependencies:
+  # The python interpreter version.
+  # Currently Azure ML only supports 3.5.2 and later.
+- python=3.6.2
+
+- pip:
+    # Required packages for AzureML execution, history, and data preparation.
+  - azureml-defaults
+  - numpy
+  - tensorflow==2.0.0-alpha0
+  - Pillow
+  - requests
--- a/pipelines/azurepipeline/code/deploy/inference.sh
+++ b/pipelines/azurepipeline/code/deploy/inference.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+while getopts "n:w:g:" option;
+    do
+    case "$option" in
+        n ) DEPLOYMENT_NAME=${OPTARG};;
+        w ) WORKSPACE=${OPTARG};;
+        g ) RESOURCE_GROUP=${OPTARG};;
+    esac
+done
+echo "test the deployment with a burrito image"
+az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}
+echo "test the deployment with a taco image"
+az ml service run -n ${DEPLOYMENT_NAME} -d '{ "image": "https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg" }' -w ${WORKSPACE} -g ${RESOURCE_GROUP}
--- a/pipelines/azurepipeline/code/deploy/inferenceconfig.json
+++ b/pipelines/azurepipeline/code/deploy/inferenceconfig.json
@ -0,0 +1,10 @@
+{
+    "entryScript": "/scripts/score.py",
+    "runtime": "python",
+    "condaFile": "/scripts/environment.yml",
+    "extraDockerfileSteps": null,
+    "sourceDirectory": null,
+    "enableGpu": false,
+    "baseImage": null,
+    "baseImageRegistry": null
+ }
--- a/pipelines/azurepipeline/code/deploy/score.py
+++ b/pipelines/azurepipeline/code/deploy/score.py
@ -0,0 +1,93 @@
+import json
+import time
+from io import BytesIO
+import datetime
+import requests
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+
+from azureml.core.model import Model
+
+
+def init():
+  if Model.get_model_path('tacosandburritos'):
+    model_path = Model.get_model_path('tacosandburritos')
+  else:
+    model_path = '/model/latest.h5'
+
+  print('Attempting to load model')
+  model = tf.keras.models.load_model(model_path)
+  model.summary()
+  print('Done!')
+
+  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  return model
+
+
+def run(raw_data, model):
+  prev_time = time.time()
+
+  post = json.loads(raw_data)
+  img_path = post['image']
+
+  current_time = time.time()
+
+  tensor = process_image(img_path, 160)
+  t = tf.reshape(tensor, [-1, 160, 160, 3])
+  o = model.predict(t, steps=1)  # [0][0]
+  print(o)
+  o = o[0][0]
+  inference_time = datetime.timedelta(seconds=current_time - prev_time)
+  payload = {
+    'time': inference_time.total_seconds(),
+    'prediction': 'burrito' if o > 0.5 else 'tacos',
+    'scores': str(o)
+  }
+
+  print('Input ({}), Prediction ({})'.format(post['image'], payload))
+
+  return payload
+
+
+def process_image(path, image_size):
+  # Extract image (from web or path)
+  if path.startswith('http'):
+    response = requests.get(path)
+    img = np.array(Image.open(BytesIO(response.content)))
+  else:
+    img = np.array(Image.open(path))
+
+  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
+  # tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final
+
+
+def info(msg, char="#", width=75):
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1 * width) + 5, msg) + char)
+  print(char * width)
+
+
+if __name__ == "__main__":
+  images = {
+    'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
+    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
+  }
+
+  my_model = init()
+
+  for k, v in images.items():
+    print('{} => {}'.format(k, v))
+
+  info('Taco Test')
+  taco = json.dumps({'image': images['tacos']})
+  print(taco)
+  run(taco, my_model)
+
+  info('Burrito Test')
+  burrito = json.dumps({'image': images['burrito']})
+  print(burrito)
+  run(burrito, my_model)
--- a/pipelines/azurepipeline/code/pipeline.py
+++ b/pipelines/azurepipeline/code/pipeline.py
@ -0,0 +1,140 @@
+"""Main pipeline file"""
+from kubernetes import client as k8s_client
+import kfp.dsl as dsl
+import kfp.compiler as compiler
+
+@dsl.pipeline(
+  name='Tacos vs. Burritos',
+  description='Simple TF CNN'
+)
+def tacosandburritos_train(
+    tenant_id,
+    service_principal_id,
+    service_principal_password,
+    subscription_id,
+    resource_group,
+    workspace
+):
+  """Pipeline steps"""
+
+  persistent_volume_path = '/mnt/azure'
+  data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
+  epochs = 5
+  batch = 32
+  learning_rate = 0.0001
+  model_name = 'tacosandburritos'
+  profile_name = 'tacoprofile'
+  operations = {}
+  image_size = 160
+  training_folder = 'train'
+  training_dataset = 'train.txt'
+  model_folder = 'model'
+
+  # preprocess data
+  operations['preprocess'] = dsl.ContainerOp(
+    name='preprocess',
+    image='insert your image here',
+    command=['python'],
+    arguments=[
+      '/scripts/data.py',
+      '--base_path', persistent_volume_path,
+      '--data', training_folder,
+      '--target', training_dataset,
+      '--img_size', image_size,
+      '--zipfile', data_download
+    ]
+  )
+
+  # train
+  operations['training'] = dsl.ContainerOp(
+    name='training',
+    image='insert your image here',
+    command=['python'],
+    arguments=[
+      '/scripts/train.py',
+      '--base_path', persistent_volume_path,
+      '--data', training_folder,
+      '--epochs', epochs,
+      '--batch', batch,
+      '--image_size', image_size,
+      '--lr', learning_rate,
+      '--outputs', model_folder,
+      '--dataset', training_dataset
+    ]
+  )
+  operations['training'].after(operations['preprocess'])
+
+  # register model
+  operations['register'] = dsl.ContainerOp(
+    name='register',
+    image='insert your image here',
+    command=['python'],
+    arguments=[
+      '/scripts/register.py',
+      '--base_path', persistent_volume_path,
+      '--model', 'latest.h5',
+      '--model_name', model_name,
+      '--tenant_id', tenant_id,
+      '--service_principal_id', service_principal_id,
+      '--service_principal_password', service_principal_password,
+      '--subscription_id', subscription_id,
+      '--resource_group', resource_group,
+      '--workspace', workspace
+    ]
+  )
+  operations['register'].after(operations['training'])
+
+  operations['profile'] = dsl.ContainerOp(
+    name='profile',
+    image='insert your image here',
+    command=['sh'],
+    arguments=[
+      '/scripts/profile.sh',
+      '-n', profile_name,
+      '-m', model_name,
+      '-i', '/scripts/inferenceconfig.json',
+      '-d', '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
+      '-t', tenant_id,
+      '-r', resource_group,
+      '-w', workspace,
+      '-s', service_principal_id,
+      '-p', service_principal_password,
+      '-u', subscription_id,
+      '-b', persistent_volume_path
+    ]
+  )
+  operations['profile'].after(operations['register'])
+
+  operations['deploy'] = dsl.ContainerOp(
+    name='deploy',
+    image='insert your image here',
+    command=['sh'],
+    arguments=[
+      '/scripts/deploy.sh',
+      '-n', model_name,
+      '-m', model_name,
+      '-i', '/scripts/inferenceconfig.json',
+      '-d', '/scripts/deploymentconfig.json',
+      '-t', tenant_id,
+      '-r', resource_group,
+      '-w', workspace,
+      '-s', service_principal_id,
+      '-p', service_principal_password,
+      '-u', subscription_id,
+      '-b', persistent_volume_path
+    ]
+  )
+  operations['deploy'].after(operations['profile'])
+  for _, op_1 in operations.items():
+    op_1.container.set_image_pull_policy("Always")
+    op_1.add_volume(
+      k8s_client.V1Volume(
+        name='azure',
+        persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
+          claim_name='azure-managed-disk')
+      )
+    ).add_volume_mount(k8s_client.V1VolumeMount(
+      mount_path='/mnt/azure', name='azure'))
+
+if __name__ == '__main__':
+  compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
--- a/pipelines/azurepipeline/code/preprocess/Dockerfile
+++ b/pipelines/azurepipeline/code/preprocess/Dockerfile
@ -0,0 +1,10 @@
+FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
+
+# pip install
+COPY requirements.txt /scripts/requirements.txt
+RUN pip install -r /scripts/requirements.txt
+
+COPY data.py /scripts/data.py
+
+# will be overwritten by kf pipeline
+ENTRYPOINT [ "python", "/scripts/data.py" ]
--- a/pipelines/azurepipeline/code/preprocess/Running
+++ b/pipelines/azurepipeline/code/preprocess/Running
--- a/pipelines/azurepipeline/code/preprocess/build.sh
+++ b/pipelines/azurepipeline/code/preprocess/build.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+while getopts "r:" option;
+    do
+    case "$option" in
+        r ) REGISTRY_NAME=${OPTARG};;
+    esac
+done
+IMAGE=${REGISTRY_NAME}.azurecr.io/preprocess
+docker build -t $IMAGE . && docker run -it $IMAGE
--- a/pipelines/azurepipeline/code/preprocess/data.py
+++ b/pipelines/azurepipeline/code/preprocess/data.py
@ -0,0 +1,118 @@
+import os
+import shutil
+import zipfile
+import argparse
+from pathlib2 import Path
+import wget
+import tensorflow as tf
+
+
+def check_dir(path):
+  if not os.path.exists(path):
+    os.makedirs(path)
+  return Path(path).resolve(strict=False)
+
+
+def download(source, target, force_clear=False):
+  if force_clear and os.path.exists(target):
+    print('Removing {}...'.format(target))
+    shutil.rmtree(target)
+
+  check_dir(target)
+
+  targt_file = str(Path(target).joinpath('data.zip'))
+  if os.path.exists(targt_file) and not force_clear:
+    print('data already exists, skipping download')
+    return
+
+  if source.startswith('http'):
+    print("Downloading from {} to {}".format(source, target))
+    wget.download(source, targt_file)
+    print("Done!")
+  else:
+    print("Copying from {} to {}".format(source, target))
+    shutil.copyfile(source, targt_file)
+
+  print('Unzipping {}'.format(targt_file))
+  zipr = zipfile.ZipFile(targt_file)
+  zipr.extractall(target)
+  zipr.close()
+
+
+def process_image(path, image_size=160):
+  img_raw = tf.io.read_file(path)
+  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final
+
+
+def walk_images(path, image_size=160):
+  imgs = []
+  print('Scanning {}'.format(path))
+  # find subdirectories in base path
+  # (they should be the labels)
+  labels = []
+  for (_, dirs, _) in os.walk(path):
+    print('Found {}'.format(dirs))
+    labels = dirs
+    break
+
+  for d in labels:
+    path = os.path.join(path, d)
+    print('Processing {}'.format(path))
+    # only care about files in directory
+    for item in os.listdir(path):
+      if not item.lower().endswith('.jpg'):
+        print('skipping {}'.format(item))
+        continue
+
+      image = os.path.join(path, item)
+      try:
+        img = process_image(image, image_size)
+        assert img.shape[2] == 3, "Invalid channel count"
+        # write out good images
+        imgs.append(image)
+      except img.shape[2] != 3:
+        print('{}\n'.format(image))
+
+  return imgs
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(description='data cleaning for binary image task')
+  parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
+  parser.add_argument('-d', '--data', help='directory to training data', default='train')
+  parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
+  parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
+  parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
+  parser.add_argument('-f', '--force',
+                      help='force clear all data', default=False, action='store_true')
+  args = parser.parse_args()
+  print(args)
+
+  print('Using TensorFlow v.{}'.format(tf.__version__))
+
+  base_path = Path(args.base_path).resolve(strict=False)
+  print('Base Path:  {}'.format(base_path))
+  data_path = base_path.joinpath(args.data).resolve(strict=False)
+  print('Train Path: {}'.format(data_path))
+  target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
+  print('Train File: {}'.format(target_path))
+  zip_path = args.zipfile
+
+  print('Acquiring data...')
+  download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
+           str(base_path), args.force)
+
+  if os.path.exists(str(target_path)):
+    print('dataset text file already exists, skipping check')
+  else:
+    print('Testing images...')
+    images = walk_images(str(data_path), args.img_size)
+
+    # save file
+    print('writing dataset to {}'.format(target_path))
+    with open(str(target_path), 'w+') as f:
+      f.write('\n'.join(images))
+
+  # python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
--- a/pipelines/azurepipeline/code/preprocess/requirements.txt
+++ b/pipelines/azurepipeline/code/preprocess/requirements.txt
@ -0,0 +1,3 @@
+pathlib2
+requests
+wget
--- a/pipelines/azurepipeline/code/profile/Dockerfile
+++ b/pipelines/azurepipeline/code/profile/Dockerfile
@ -0,0 +1,12 @@
+FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
+RUN pip install azure-cli
+RUN az extension add -n azure-cli-ml
+RUN pip install --upgrade pip
+RUN pip install --upgrade pillow
+RUN pip install azureml
+RUN pip install azureml.core
+COPY profile.sh /scripts/profile.sh
+COPY inferenceconfig.json /scripts/inferenceconfig.json
+COPY score.py /scripts/score.py
+COPY environment.yml /scripts/environment.yml
+ENTRYPOINT bash
--- a/pipelines/azurepipeline/code/profile/build.sh
+++ b/pipelines/azurepipeline/code/profile/build.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+while getopts "r:" option;
+    do
+    case "$option" in
+        r ) REGISTRY_NAME=${OPTARG};;
+    esac
+done
+IMAGE=${REGISTRY_NAME}.azurecr.io/profile
+docker build -t $IMAGE . && docker run -it $IMAGE
--- a/pipelines/azurepipeline/code/profile/environment.yml
+++ b/pipelines/azurepipeline/code/profile/environment.yml
@ -0,0 +1,19 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for runs with userManagedDependencies=False.
+
+# Details about the Conda environment file format:
+# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+name: project_environment
+dependencies:
+  # The python interpreter version.
+  # Currently Azure ML only supports 3.5.2 and later.
+- python=3.6.2
+
+- pip:
+    # Required packages for AzureML execution, history, and data preparation.
+  - azureml-defaults
+  - numpy
+  - tensorflow==2.0.0-alpha0
+  - Pillow
+  - requests
--- a/pipelines/azurepipeline/code/profile/inferenceconfig.json
+++ b/pipelines/azurepipeline/code/profile/inferenceconfig.json
@ -0,0 +1,10 @@
+{
+    "entryScript": "/scripts/score.py",
+    "runtime": "python",
+    "condaFile": "/scripts/environment.yml",
+    "extraDockerfileSteps": null,
+    "sourceDirectory": null,
+    "enableGpu": false,
+    "baseImage": null,
+    "baseImageRegistry": null
+ }
--- a/pipelines/azurepipeline/code/profile/profile.sh
+++ b/pipelines/azurepipeline/code/profile/profile.sh
@ -0,0 +1,21 @@
+#!/bin/sh
+while getopts "m:n:i:d:s:p:u:r:w:t:b:" option;
+    do
+    case "$option" in
+        m ) MODEL=${OPTARG};;
+        n ) PROFILE_NAME=${OPTARG};;
+        i ) INFERENCE_CONFIG=${OPTARG};;
+        d ) DATA=${OPTARG};;
+        s ) SERVICE_PRINCIPAL_ID=${OPTARG};;
+        p ) SERVICE_PRINCIPAL_PASSWORD=${OPTARG};;
+        u ) SUBSCRIPTION_ID=${OPTARG};;
+        r ) RESOURCE_GROUP=${OPTARG};;
+        w ) WORKSPACE=${OPTARG};;
+        t ) TENANT_ID=${OPTARG};;
+        b ) BASE_PATH=${OPTARG};;
+    esac
+done
+az login --service-principal --username ${SERVICE_PRINCIPAL_ID} --password ${SERVICE_PRINCIPAL_PASSWORD} -t $TENANT_ID
+az ml model profile -n $PROFILE_NAME -m ${MODEL}:1 --ic $INFERENCE_CONFIG -d $DATA -t myprofileresult.json -w $WORKSPACE -g $RESOURCE_GROUP
+mv myprofileresult.json ${BASE_PATH}/myprofileresult.json
+echo ${BASE_PATH}
--- a/pipelines/azurepipeline/code/profile/requirements.txt
+++ b/pipelines/azurepipeline/code/profile/requirements.txt
@ -0,0 +1,3 @@
+requests
+numpy
+pillow
--- a/pipelines/azurepipeline/code/profile/score.py
+++ b/pipelines/azurepipeline/code/profile/score.py
@ -0,0 +1,89 @@
+import json
+import time
+import datetime
+from io import BytesIO
+import requests
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from azureml.core.model import Model
+
+def init():
+  if Model.get_model_path('tacosandburritos'):
+    model_path = Model.get_model_path('tacosandburritos')
+  else:
+    model_path = '/model/latest.h5'
+  print('Attempting to load model')
+  model = tf.keras.models.load_model(model_path)
+  model.summary()
+  print('Done!')
+  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  return model
+
+
+def run(raw_data, model):
+  prev_time = time.time()
+
+  post = json.loads(raw_data)
+  img_path = post['image']
+
+  current_time = time.time()
+
+  tensor = process_image(img_path, 160)
+  t = tf.reshape(tensor, [-1, 160, 160, 3])
+  o = model.predict(t, steps=1)  # [0][0]
+  print(o)
+  o = o[0][0]
+  inference_time = datetime.timedelta(seconds=current_time - prev_time)
+  payload = {
+    'time': inference_time.total_seconds(),
+    'prediction': 'burrito' if o > 0.5 else 'tacos',
+    'scores': str(o)
+  }
+
+  print('Input ({}), Prediction ({})'.format(post['image'], payload))
+
+  return payload
+
+
+def process_image(path, image_size):
+  # Extract image (from web or path)
+  if path.startswith('http'):
+    response = requests.get(path)
+    img = np.array(Image.open(BytesIO(response.content)))
+  else:
+    img = np.array(Image.open(path))
+
+  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
+  # tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final
+
+
+def info(msg, char="#", width=75):
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1 * width) + 5, msg) + char)
+  print(char * width)
+
+
+if __name__ == "__main__":
+  images = {
+    'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
+    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
+  }
+
+  my_model = init()
+
+  for k, v in images.items():
+    print('{} => {}'.format(k, v))
+
+  info('Taco Test')
+  taco = json.dumps({'image': images['tacos']})
+  print(taco)
+  run(taco, my_model)
+
+  info('Burrito Test')
+  burrito = json.dumps({'image': images['burrito']})
+  print(burrito)
+  run(burrito, my_model)
--- a/pipelines/azurepipeline/code/register/Dockerfile
+++ b/pipelines/azurepipeline/code/register/Dockerfile
@ -0,0 +1,11 @@
+FROM python:3.7-slim
+
+# pip install
+COPY requirements.txt /scripts/requirements.txt
+RUN pip install -r /scripts/requirements.txt
+
+# only for local testing
+COPY register.py /scripts/register.py
+
+# will be overwritten by kf pipeline
+ENTRYPOINT [ "python", "/scripts/register.py" ]
--- a/pipelines/azurepipeline/code/register/build.sh
+++ b/pipelines/azurepipeline/code/register/build.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+while getopts "r:" option;
+    do
+    case "$option" in
+        r ) REGISTRY_NAME=${OPTARG};;
+    esac
+done
+IMAGE=${REGISTRY_NAME}.azurecr.io/register
+docker build -t $IMAGE . && docker run -it $IMAGE
--- a/pipelines/azurepipeline/code/register/register.py
+++ b/pipelines/azurepipeline/code/register/register.py
@ -0,0 +1,102 @@
+import json
+from os.path import relpath
+import argparse
+from pathlib2 import Path
+import azureml
+from azureml.core import Workspace
+from azureml.core.model import Model
+from azureml.core.authentication import ServicePrincipalAuthentication
+
+
+def info(msg, char="#", width=75):
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1 * width) + 5, msg) + char)
+  print(char * width)
+
+
+def get_ws(tenant_id, service_principal_id,
+           service_principal_password, subscription_id, resource_group, workspace):
+  auth_args = {
+    'tenant_id': tenant_id,
+    'service_principal_id': service_principal_id,
+    'service_principal_password': service_principal_password
+  }
+
+  ws_args = {
+    'auth': ServicePrincipalAuthentication(**auth_args),
+    'subscription_id': subscription_id,
+    'resource_group': resource_group
+  }
+  ws = Workspace.get(workspace, **ws_args)
+  return ws
+
+
+def run(mdl_path, model_name, ws, tgs):
+  print(ws.get_details())
+
+  print('\nSaving model {} to {}'.format(mdl_path, model_name))
+
+  # Model Path needs to be relative
+  mdl_path = relpath(mdl_path, '.')
+
+  Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
+  print('Done!')
+
+
+if __name__ == "__main__":
+  # argparse stuff for model path and model name
+  parser = argparse.ArgumentParser(description='sanity check on model')
+  parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
+  parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
+  parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
+  parser.add_argument('-t', '--tenant_id', help='tenant_id')
+  parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
+  parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
+  parser.add_argument('-u', '--subscription_id', help='subscription_id')
+  parser.add_argument('-r', '--resource_group', help='resource_group')
+  parser.add_argument('-w', '--workspace', help='workspace')
+  args = parser.parse_args()
+
+  print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
+  args.model = 'model/' + args.model
+  model_path = str(Path(args.base_path).resolve(
+    strict=False).joinpath(args.model).resolve(strict=False))
+  params_path = str(Path(args.base_path).resolve(
+    strict=False).joinpath('params.json').resolve(strict=False))
+  wsrgs = {
+    'tenant_id': args.tenant_id,
+    'service_principal_id': args.service_principal_id,
+    'service_principal_password': args.service_principal_password,
+    'subscription_id': args.subscription_id,
+    'resource_group': args.resource_group,
+    'workspace': args.workspace
+  }
+  rgs = {
+    'mdl_path': model_path,
+    'model_name': args.model_name
+  }
+
+  # printing out args for posterity
+  for i in wsrgs:
+    if i == 'service_principal_password':
+      print('{} => **********'.format(i))
+    else:
+      print('{} => {}'.format(i, rgs[i]))
+
+  with(open(str(params_path), 'r')) as f:
+    tags = json.load(f)
+
+  print('\n\nUsing the following tags:')
+  for tag in tags:
+    print('{} => {}'.format(tag, tags[tag]))
+
+  rgs['tags'] = tags
+
+  workspc = get_ws(**wsrgs)
+  rgs['ws'] = workspc
+  run(**rgs)
+
+  # python register.py --model_path v --model_name c --tenant_id c
+  # --service_principal_id v --service_principal_password v
+  # --subscription_id v --resource_group x --workspace c
--- a/pipelines/azurepipeline/code/register/requirements.txt
+++ b/pipelines/azurepipeline/code/register/requirements.txt
@ -0,0 +1,3 @@
+pathlib2
+requests
+azureml-sdk
--- a/pipelines/azurepipeline/code/training/Dockerfile
+++ b/pipelines/azurepipeline/code/training/Dockerfile
@ -0,0 +1,18 @@
+FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
+
+# pip install
+COPY requirements.txt /scripts/requirements.txt
+RUN pip install -r /scripts/requirements.txt
+
+COPY train.py /scripts/train.py
+
+# python train.py -d data/PetImages -e 1 -b 32 -l 0.0001 -o model -f dataset.txt
+# will be overwritten by kf pipeline
+ENTRYPOINT [ "python", \
+            "/scripts/train.py", \
+            "-d", "data/train", \
+            "-e", "10", \
+            "-b", "32", \
+            "-l", "0.0001", \
+            "-o", "model", \
+            "-f", "train.txt" ]
--- a/pipelines/azurepipeline/code/training/build.sh
+++ b/pipelines/azurepipeline/code/training/build.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+while getopts "r:" option;
+    do
+    case "$option" in
+        r ) REGISTRY_NAME=${OPTARG};;
+    esac
+done
+IMAGE=${REGISTRY_NAME}.azurecr.io/training
+docker build -t $IMAGE . && docker run -it $IMAGE
--- a/pipelines/azurepipeline/code/training/requirements.txt
+++ b/pipelines/azurepipeline/code/training/requirements.txt
@ -0,0 +1,2 @@
+Pillow
+pathlib2
--- a/pipelines/azurepipeline/code/training/train.py
+++ b/pipelines/azurepipeline/code/training/train.py
@ -0,0 +1,199 @@
+from __future__ import absolute_import, division, print_function
+import os
+import math
+import hmac
+import json
+import hashlib
+import argparse
+from random import shuffle
+from pathlib2 import Path
+import numpy as np
+import tensorflow as tf
+from tensorflow.data import Dataset
+
+
+def info(msg, char="#", width=75):
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1 * width) + 5, msg) + char)
+  print(char * width)
+
+
+def check_dir(path):
+  if not os.path.exists(path):
+    os.makedirs(path)
+  return Path(path).resolve(strict=False)
+
+
+def process_image(path, label, img_size):
+  img_raw = tf.io.read_file(path)
+  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
+  return img_final, label
+
+
+def load_dataset(base_path, dset, split=None):
+  # normalize splits
+  if split is None:
+    split = [8, 1, 1]
+  splits = np.array(split) / np.sum(np.array(split))
+
+  # find labels - parent folder names
+  labels = {}
+  for (_, dirs, _) in os.walk(base_path):
+    print('found {}'.format(dirs))
+    labels = {k: v for (v, k) in enumerate(dirs)}
+    print('using {}'.format(labels))
+    break
+
+  # load all files along with idx label
+  print('loading dataset from {}'.format(dset))
+  with open(dset, 'r') as d:
+    data = [(str(Path(line.strip()).absolute()),
+             labels[Path(line.strip()).parent.name]) for line in d.readlines()]
+
+  print('dataset size: {}\nsuffling data...'.format(len(data)))
+
+  # shuffle data
+  shuffle(data)
+
+  print('splitting data...')
+  # split data
+  train_idx = int(len(data) * splits[0])
+
+  return data[:train_idx]
+
+
+# @print_info
+def run(
+    dpath,
+    img_size=160,
+    epochs=10,
+    batch_size=32,
+    learning_rate=0.0001,
+    output='model',
+    dset=None):
+  img_shape = (img_size, img_size, 3)
+
+  info('Loading Data Set')
+  # load dataset
+  train = load_dataset(dpath, dset)
+
+  # training data
+  train_data, train_labels = zip(*train)
+  train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
+                          Dataset.from_tensor_slices(list(train_labels)), img_size))
+
+  train_ds = train_ds.map(map_func=process_image,
+                          num_parallel_calls=5)
+
+  train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
+
+  train_ds = train_ds.batch(batch_size)
+  train_ds = train_ds.prefetch(buffer_size=5)
+  train_ds = train_ds.repeat()
+
+  # model
+  info('Creating Model')
+  base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
+                                                 include_top=False,
+                                                 weights='imagenet')
+  base_model.trainable = True
+
+  model = tf.keras.Sequential([
+    base_model,
+    tf.keras.layers.GlobalAveragePooling2D(),
+    tf.keras.layers.Dense(1, activation='sigmoid')
+  ])
+
+  model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
+                loss='binary_crossentropy',
+                metrics=['accuracy'])
+
+  model.summary()
+
+  # training
+  info('Training')
+  steps_per_epoch = math.ceil(len(train) / batch_size)
+  model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
+
+  # save model
+  info('Saving Model')
+
+  # check existence of base model folder
+  output = check_dir(output)
+
+  print('Serializing into saved_model format')
+  tf.saved_model.save(model, str(output))
+  print('Done!')
+
+  # add time prefix folder
+  file_output = str(Path(output).joinpath('latest.h5'))
+  print('Serializing h5 model to:\n{}'.format(file_output))
+  model.save(file_output)
+
+  return generate_hash(file_output, 'kf_pipeline')
+
+
+def generate_hash(dfile, key):
+  print('Generating hash for {}'.format(dfile))
+  m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
+  BUF_SIZE = 65536
+  with open(str(dfile), 'rb') as myfile:
+    while True:
+      data = myfile.read(BUF_SIZE)
+      if not data:
+        break
+      m.update(data)
+
+  return m.hexdigest()
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(description='transfer learning for binary image task')
+  parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
+  parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
+  parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
+  parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
+  parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
+  parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
+  parser.add_argument('-o', '--outputs', help='output directory', default='model')
+  parser.add_argument('-f', '--dataset', help='cleaned data listing')
+  args = parser.parse_args()
+
+  info('Using TensorFlow v.{}'.format(tf.__version__))
+
+  data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
+  target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
+  dataset = Path(args.base_path).joinpath(args.dataset)
+  image_size = args.image_size
+
+  params = Path(args.base_path).joinpath('params.json')
+
+  args = {
+    "dpath": str(data_path),
+    "img_size": image_size,
+    "epochs": args.epochs,
+    "batch_size": args.batch,
+    "learning_rate": args.lr,
+    "output": str(target_path),
+    "dset": str(dataset)
+  }
+
+  dataset_signature = generate_hash(dataset, 'kf_pipeline')
+  # printing out args for posterity
+  for i in args:
+    print('{} => {}'.format(i, args[i]))
+
+  model_signature = run(**args)
+
+  args['dataset_signature'] = dataset_signature.upper()
+  args['model_signature'] = model_signature.upper()
+  args['model_type'] = 'tfv2-MobileNetV2'
+  print('Writing out params...', end='')
+  with open(str(params), 'w') as f:
+    json.dump(args, f)
+
+  print(' Saved to {}'.format(str(params)))
+
+  # python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt
--- a/pipelines/azurepipeline/kubernetes/pvc.yaml
+++ b/pipelines/azurepipeline/kubernetes/pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: azure-managed-disk
+  namespace: kubeflow
+spec:
+  accessModes:
+  - ReadWriteOnce
+  storageClassName: managed-premium
+  resources:
+    requests:
+      storage: 300Gi