fix indentation

2019-08-02 15:18:30 -07:00 · 2019-08-02 15:18:30 -07:00 · 10f3a1fc32
parent 733923410c
commit 10f3a1fc32
6 changed files with 495 additions and 502 deletions
--- a/pipelines/azurepipeline/code/deploy/score.py
+++ b/pipelines/azurepipeline/code/deploy/score.py
@ -1,8 +1,8 @@
 import json
 import time
 import requests
-import datetime
 import numpy as np
+import datetime
 from PIL import Image
 from io import BytesIO
 import tensorflow as tf
@ -10,81 +10,81 @@ import tensorflow as tf
 from azureml.core.model import Model

 def init():
-    global model
+  global model

-    try:
-        model_path = Model.get_model_path('tacosandburritos')
-    except:
-        model_path = '/model/latest.h5'
+  try:
+    model_path = Model.get_model_path('tacosandburritos')
+  except:
+    model_path = '/model/latest.h5'

-    print('Attempting to load model')
-    model = tf.keras.models.load_model(model_path)
-    model.summary()
-    print('Done!')
+  print('Attempting to load model')
+  model = tf.keras.models.load_model(model_path)
+  model.summary()
+  print('Done!')

-    print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))

 def run(raw_data):
-    global model
-    prev_time = time.time()
-          
-    post = json.loads(raw_data)
-    img_path = post['image']
+  global model
+  prev_time = time.time()
+      
+  post = json.loads(raw_data)
+  img_path = post['image']

-    current_time = time.time()
+  current_time = time.time()

-    tensor = process_image(img_path, 160)
-    t = tf.reshape(tensor, [-1, 160, 160, 3])
-    o = model.predict(t, steps=1)#[0][0]
-    print(o)
-    o = o[0][0]
-    inference_time = datetime.timedelta(seconds=current_time - prev_time)
-    payload = {
-        'time': inference_time.total_seconds(),
-        'prediction': 'burrito' if o > 0.5 else 'tacos',
-        'scores': str(o)
-    }
+  tensor = process_image(img_path, 160)
+  t = tf.reshape(tensor, [-1, 160, 160, 3])
+  o = model.predict(t, steps=1)#[0][0]
+  print(o)
+  o = o[0][0]
+  inference_time = datetime.timedelta(seconds=current_time - prev_time)
+  payload = {
+    'time': inference_time.total_seconds(),
+    'prediction': 'burrito' if o > 0.5 else 'tacos',
+    'scores': str(o)
+  }

-    print('Input ({}), Prediction ({})'.format(post['image'], payload))
+  print('Input ({}), Prediction ({})'.format(post['image'], payload))

-    return payload
+  return payload

 def process_image(path, image_size):
-    # Extract image (from web or path)
-    if(path.startswith('http')):
-        response = requests.get(path)
-        img = np.array(Image.open(BytesIO(response.content)))
-    else:
-        img = np.array(Image.open(path))
+  # Extract image (from web or path)
+  if(path.startswith('http')):
+    response = requests.get(path)
+    img = np.array(Image.open(BytesIO(response.content)))
+  else:
+    img = np.array(Image.open(path))

-    img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
-    #tf.image.decode_jpeg(img_raw, channels=3)
-    img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
-    return img_final
-    
+  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
+  #tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final
+  
 def info(msg, char = "#", width = 75):
-    print("")
-    print(char * width)
-    print(char + "   %0*s" % ((-1*width)+5, msg) + char)
-    print(char * width)
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print(char * width)

 if __name__ == "__main__":
-    images = {
-        'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
-        'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
-    }
+  images = {
+    'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
+    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
+  }

-    init()
+  init()

-    for k, v in images.items():
-        print('{} => {}'.format(k, v))
+  for k, v in images.items():
+    print('{} => {}'.format(k, v))

-    info('Taco Test')
-    taco = json.dumps({ 'image': images['tacos'] })
-    print(taco)
-    run(taco)
+  info('Taco Test')
+  taco = json.dumps({ 'image': images['tacos'] })
+  print(taco)
+  run(taco)

-    info('Burrito Test')
-    burrito = json.dumps({ 'image': images['burrito'] })
-    print(burrito)
-    run(burrito)
+  info('Burrito Test')
+  burrito = json.dumps({ 'image': images['burrito'] })
+  print(burrito)
+  run(burrito)
--- a/pipelines/azurepipeline/code/pipeline.py
+++ b/pipelines/azurepipeline/code/pipeline.py
@ -1,75 +1,73 @@
 import kfp.dsl as dsl
+import kfp.compiler as compiler
 from kubernetes import client as k8s_client


@dsl.pipeline(
-    name='Tacos vs. Burritos',
-    description='Simple TF CNN for binary classifier between burritos and tacos'
+  name='Tacos vs. Burritos',
+  description='Simple TF CNN for binary classifier between burritos and tacos'
 )
 def tacosandburritos_train(
-    tenant_id,
-    service_principal_id,
-    service_principal_password,
-    subscription_id,
-    resource_group,
-    workspace,
-    persistent_volume_name='azure',
-    persistent_volume_path='/mnt/azure',
-    data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
-    epochs=5,
-    batch=32,
-    learning_rate=0.0001,
-    imagetag='latest',
-    model_name='tacosandburritos',
-    profile_name='tacoprofile'
+        tenant_id,
+        service_principal_id,
+        service_principal_password,
+        subscription_id,
+        resource_group,
+        workspace,
+        persistent_volume_path='/mnt/azure',
+        data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
+        epochs=5,
+        batch=32,
+        learning_rate=0.0001,
+        model_name='tacosandburritos',
+        profile_name='tacoprofile'
 ):
+  operations = {}
+  image_size = 160
+  training_folder = 'train'
+  training_dataset = 'train.txt'
+  model_folder = 'model'

-    operations = {}
-    image_size = 160
-    training_folder = 'train'
-    training_dataset = 'train.txt'
-    model_folder = 'model'
-
-    # preprocess data
-    operations['preprocess'] = dsl.ContainerOp(
-        name='preprocess',
-        image='insert your image here',
-        command=['python'],
-        arguments=[
+  # preprocess data
+  operations['preprocess'] = dsl.ContainerOp(
+      name='preprocess',
+      image='insert your image here',
+      command=['python'],
+      arguments=[
            '/scripts/data.py',
            '--base_path', persistent_volume_path,
            '--data', training_folder,
            '--target', training_dataset,
            '--img_size', image_size,
            '--zipfile', data_download
-        ]
-    )
+      ]
+  )

-    #train
-    operations['training'] = dsl.ContainerOp(
-        name='training',
-        image='insert your image here',
-        command=['python'],
-        arguments=[
+  # train
+  operations['training'] = dsl.ContainerOp(
+      name='training',
+      image='insert your image here',
+      command=['python'],
+      arguments=[
            '/scripts/train.py',
            '--base_path', persistent_volume_path,
-            '--data', training_folder, 
-            '--epochs', epochs, 
-            '--batch', batch, 
-            '--image_size', image_size, 
-            '--lr', learning_rate, 
-            '--outputs', model_folder, 
+            '--data', training_folder,
+            '--epochs', epochs,
+            '--batch', batch,
+            '--image_size', image_size,
+            '--lr', learning_rate,
+            '--outputs', model_folder,
            '--dataset', training_dataset
-        ]
-    )
-    operations['training'].after(operations['preprocess'])
+      ]
+  )
+  operations['training'].after(operations['preprocess'])

-    # register model
-    operations['register'] = dsl.ContainerOp(
-        name='register',
-        image='insert your image here',
-        command=['python'],
-        arguments=[
+  # register model
+  operations['register'] = dsl.ContainerOp(
+      name='register',
+      image='insert your image here',
+      command=['python'],
+      arguments=[
            '/scripts/register.py',
            '--base_path', persistent_volume_path,
            '--model', 'latest.h5',
@ -80,15 +78,15 @@ def tacosandburritos_train(
            '--subscription_id', subscription_id,
            '--resource_group', resource_group,
            '--workspace', workspace
-        ]
-    )
-    operations['register'].after(operations['training'])
+      ]
+  )
+  operations['register'].after(operations['training'])

-    operations['profile'] = dsl.ContainerOp(
-        name='profile',
-        image='insert your image here',
-        command=['sh'],
-        arguments=[
+  operations['profile'] = dsl.ContainerOp(
+      name='profile',
+      image='insert your image here',
+      command=['sh'],
+      arguments=[
            '/scripts/profile.sh',
            '-n', profile_name,
            '-m', model_name,
@ -101,45 +99,43 @@ def tacosandburritos_train(
            '-p', service_principal_password,
            '-u', subscription_id,
            '-b', persistent_volume_path
-        ]
-    )
-    operations['profile'].after(operations['register'])
+      ]
+  )
+  operations['profile'].after(operations['register'])

-    operations['deploy'] = dsl.ContainerOp(
-        name='deploy',
-        image='insert your image here',
-        command=['sh'],
-        arguments=[
+  operations['deploy'] = dsl.ContainerOp(
+      name='deploy',
+      image='insert your image here',
+      command=['sh'],
+      arguments=[
            '/scripts/deploy.sh',
            '-n', model_name,
            '-m', model_name,
            '-i', '/scripts/inferenceconfig.json',
            '-d', '/scripts/deploymentconfig.json',
            '-t', tenant_id,
-            '-r', resource_group,
-            '-w', workspace,
-            '-s', service_principal_id,
-            '-p', service_principal_password,
-            '-u', subscription_id,
-            '-b', persistent_volume_path
-        ]
+          '-r', resource_group,
+          '-w', workspace,
+          '-s', service_principal_id,
+          '-p', service_principal_password,
+          '-u', subscription_id,
+          '-b', persistent_volume_path
+      ]
+  )
+  operations['deploy'].after(operations['profile'])
+  for _, op in operations.items():
+    op.container.set_image_pull_policy("Always")
+    op.add_volume(
+        k8s_client.V1Volume(
+            name='azure',
+            persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
+                claim_name='azure-managed-disk')
+        )
+    ).add_volume_mount(k8s_client.V1VolumeMount(
+        mount_path='/mnt/azure',
+        name='azure')
    )
-    operations['deploy'].after(operations['profile'])
-
-    for _, op in operations.items():
-        op.container.set_image_pull_policy("Always")
-        op.add_volume(
-            k8s_client.V1Volume(
-                name='azure',
-                persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
-                    claim_name='azure-managed-disk')
-                )
-            ).add_volume_mount(k8s_client.V1VolumeMount(
-                mount_path='/mnt/azure', 
-                name='azure')
-            )


 if __name__ == '__main__':
-   import kfp.compiler as compiler
-   compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
+  compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
--- a/pipelines/azurepipeline/code/preprocess/data.py
+++ b/pipelines/azurepipeline/code/preprocess/data.py
@ -1,5 +1,4 @@
 import os
-import time
 import shutil
 import wget
 import zipfile
@ -9,108 +8,108 @@ import tensorflow as tf
 from pathlib2 import Path

 def check_dir(path, check=False):
-    if check:
-        assert os.path.exists(path), '{} does not exist!'.format(path)
-    else:
-        if not os.path.exists(path):
-            os.makedirs(path)
-        return Path(path).resolve(strict=False)
+  if check:
+    assert os.path.exists(path), '{} does not exist!'.format(path)
+  else:
+    if not os.path.exists(path):
+      os.makedirs(path)
+    return Path(path).resolve(strict=False)

 def download(source, target, force_clear=False):
-    if force_clear and os.path.exists(target):
-        print('Removing {}...'.format(target))
-        shutil.rmtree(target)
+  if force_clear and os.path.exists(target):
+    print('Removing {}...'.format(target))
+    shutil.rmtree(target)

-    check_dir(target)
-    
-    targt_file = str(Path(target).joinpath('data.zip'))
-    if os.path.exists(targt_file) and not force_clear:
-        print('data already exists, skipping download')
-        return
+  check_dir(target)
+  
+  targt_file = str(Path(target).joinpath('data.zip'))
+  if os.path.exists(targt_file) and not force_clear:
+    print('data already exists, skipping download')
+    return

-    if source.startswith('http'):
-        print("Downloading from {} to {}".format(source, target))
-        wget.download(source, targt_file)  
-        print("Done!")
-    else:
-        print("Copying from {} to {}".format(source, target))
-        shutil.copyfile(source, targt_file)
+  if source.startswith('http'):
+    print("Downloading from {} to {}".format(source, target))
+    wget.download(source, targt_file)  
+    print("Done!")
+  else:
+    print("Copying from {} to {}".format(source, target))
+    shutil.copyfile(source, targt_file)

-    print('Unzipping {}'.format(targt_file))
-    zipr = zipfile.ZipFile(targt_file)
-    zipr.extractall(target)
-    zipr.close()
+  print('Unzipping {}'.format(targt_file))
+  zipr = zipfile.ZipFile(targt_file)
+  zipr.extractall(target)
+  zipr.close()

 def process_image(path, image_size=160):
-    img_raw = tf.io.read_file(path)
-    img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
-    img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
-    return img_final
+  img_raw = tf.io.read_file(path)
+  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final

-def walk_images(base_path, image_size=160):
-    images = []
-    print('Scanning {}'.format(base_path))
-    # find subdirectories in base path
-    # (they should be the labels)
-    labels = []
-    for (_, dirs, _) in os.walk(base_path):
-        print('Found {}'.format(dirs))
-        labels = dirs
-        break
+def walk_images(path, image_size=160):
+  images = []
+  print('Scanning {}'.format(path))
+  # find subdirectories in base path
+  # (they should be the labels)
+  labels = []
+  for (_, dirs, _) in os.walk(path):
+    print('Found {}'.format(dirs))
+    labels = dirs
+    break

-    for d in labels:
-        path = os.path.join(base_path, d)
-        print('Processing {}'.format(path))
-        # only care about files in directory
-        for item in os.listdir(path):
-            if not item.lower().endswith('.jpg'):
-                print('skipping {}'.format(item))
-                continue
+  for d in labels:
+    path = os.path.join(path, d)
+    print('Processing {}'.format(path))
+    # only care about files in directory
+    for item in os.listdir(path):
+      if not item.lower().endswith('.jpg'):
+        print('skipping {}'.format(item))
+        continue

-            image = os.path.join(path, item)
-            try:
-                img = process_image(image, image_size)
-                assert img.shape[2] == 3, "Invalid channel count"
-                # write out good images
-                images.append(image)
-            except Exception as e:
-                print('{}\n{}\n'.format(e, image))
+      image = os.path.join(path, item)
+      try:
+        img = process_image(image, image_size)
+        assert img.shape[2] == 3, "Invalid channel count"
+        # write out good images
+        images.append(image)
+      except Exception as e:
+        print('{}\n{}\n'.format(e, image))

-    return images
+  return images

 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='data cleaning for binary image task')
-    parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
-    parser.add_argument('-d', '--data', help='directory to training data', default='train')
-    parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
-    parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
-    parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
-    parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
-    args = parser.parse_args()
-    print(args)
+  parser = argparse.ArgumentParser(description='data cleaning for binary image task')
+  parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
+  parser.add_argument('-d', '--data', help='directory to training data', default='train')
+  parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
+  parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
+  parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
+  parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
+  args = parser.parse_args()
+  print(args)

-    print('Using TensorFlow v.{}'.format(tf.__version__))
+  print('Using TensorFlow v.{}'.format(tf.__version__))

-    base_path = Path(args.base_path).resolve(strict=False)
-    print('Base Path:  {}'.format(base_path))
-    data_path = base_path.joinpath(args.data).resolve(strict=False)
-    print('Train Path: {}'.format(data_path))
-    target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
-    print('Train File: {}'.format(target_path))
-    zip_path = args.zipfile
+  base_path = Path(args.base_path).resolve(strict=False)
+  print('Base Path:  {}'.format(base_path))
+  data_path = base_path.joinpath(args.data).resolve(strict=False)
+  print('Train Path: {}'.format(data_path))
+  target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
+  print('Train File: {}'.format(target_path))
+  zip_path = args.zipfile

-    print('Acquiring data...')
-    download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
+  print('Acquiring data...')
+  download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)

-    if os.path.exists(str(target_path)):
-        print('dataset text file already exists, skipping check')
-    else:
-        print('Testing images...')
-        images = walk_images(str(data_path), args.img_size)
+  if os.path.exists(str(target_path)):
+    print('dataset text file already exists, skipping check')
+  else:
+    print('Testing images...')
+    images = walk_images(str(data_path), args.img_size)

-        # save file
-        print('writing dataset to {}'.format(target_path))
-        with open(str(target_path), 'w+') as f:
-            f.write('\n'.join(images))
+    # save file
+    print('writing dataset to {}'.format(target_path))
+    with open(str(target_path), 'w+') as f:
+      f.write('\n'.join(images))

-    # python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
+  # python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
--- a/pipelines/azurepipeline/code/profile/score.py
+++ b/pipelines/azurepipeline/code/profile/score.py
@ -1,8 +1,8 @@
 import json
 import time
 import requests
-import datetime
 import numpy as np
+import datetime
 from PIL import Image
 from io import BytesIO
 import tensorflow as tf
@ -10,81 +10,80 @@ import tensorflow as tf
 from azureml.core.model import Model

 def init():
-    global model
+  global model

-    try:
-        model_path = Model.get_model_path('tacosandburritos')
-    except:
-        model_path = '/model/latest.h5'
+  try:
+    model_path = Model.get_model_path('tacosandburritos')
+  except:
+    model_path = '/model/latest.h5'

-    print('Attempting to load model')
-    model = tf.keras.models.load_model(model_path)
-    model.summary()
-    print('Done!')
+  print('Attempting to load model')
+  model = tf.keras.models.load_model(model_path)
+  model.summary()
+  print('Done!')

-    print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))

 def run(raw_data):
-    global model
-    prev_time = time.time()
-          
-    post = json.loads(raw_data)
-    img_path = post['image']
+  prev_time = time.time()
+      
+  post = json.loads(raw_data)
+  img_path = post['image']

-    current_time = time.time()
+  current_time = time.time()

-    tensor = process_image(img_path, 160)
-    t = tf.reshape(tensor, [-1, 160, 160, 3])
-    o = model.predict(t, steps=1)#[0][0]
-    print(o)
-    o = o[0][0]
-    inference_time = datetime.timedelta(seconds=current_time - prev_time)
-    payload = {
-        'time': inference_time.total_seconds(),
-        'prediction': 'burrito' if o > 0.5 else 'tacos',
-        'scores': str(o)
-    }
+  tensor = process_image(img_path, 160)
+  t = tf.reshape(tensor, [-1, 160, 160, 3])
+  o = model.predict(t, steps=1)#[0][0]
+  print(o)
+  o = o[0][0]
+  inference_time = datetime.timedelta(seconds=current_time - prev_time)
+  payload = {
+    'time': inference_time.total_seconds(),
+    'prediction': 'burrito' if o > 0.5 else 'tacos',
+    'scores': str(o)
+  }

-    print('Input ({}), Prediction ({})'.format(post['image'], payload))
+  print('Input ({}), Prediction ({})'.format(post['image'], payload))

-    return payload
+  return payload

 def process_image(path, image_size):
-    # Extract image (from web or path)
-    if(path.startswith('http')):
-        response = requests.get(path)
-        img = np.array(Image.open(BytesIO(response.content)))
-    else:
-        img = np.array(Image.open(path))
+  # Extract image (from web or path)
+  if(path.startswith('http')):
+    response = requests.get(path)
+    img = np.array(Image.open(BytesIO(response.content)))
+  else:
+    img = np.array(Image.open(path))

-    img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
-    #tf.image.decode_jpeg(img_raw, channels=3)
-    img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
-    return img_final
-    
+  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
+  #tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final
+  
 def info(msg, char = "#", width = 75):
-    print("")
-    print(char * width)
-    print(char + "   %0*s" % ((-1*width)+5, msg) + char)
-    print(char * width)
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print(char * width)

 if __name__ == "__main__":
-    images = {
-        'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
-        'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
-    }
+  images = {
+    'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
+    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
+  }

-    init()
+  init()

-    for k, v in images.items():
-        print('{} => {}'.format(k, v))
+  for k, v in images.items():
+    print('{} => {}'.format(k, v))

-    info('Taco Test')
-    taco = json.dumps({ 'image': images['tacos'] })
-    print(taco)
-    run(taco)
+  info('Taco Test')
+  taco = json.dumps({ 'image': images['tacos'] })
+  print(taco)
+  run(taco)

-    info('Burrito Test')
-    burrito = json.dumps({ 'image': images['burrito'] })
-    print(burrito)
-    run(burrito)
+  info('Burrito Test')
+  burrito = json.dumps({ 'image': images['burrito'] })
+  print(burrito)
+  run(burrito)
--- a/pipelines/azurepipeline/code/register/register.py
+++ b/pipelines/azurepipeline/code/register/register.py
@ -1,4 +1,3 @@
-import os
 import json
 from os.path import relpath
 import azureml
@ -11,84 +10,84 @@ from azureml.core.webservice import Webservice, AciWebservice
 from azureml.core.authentication import ServicePrincipalAuthentication 

 def info(msg, char = "#", width = 75):
-    print("")
-    print(char * width)
-    print(char + "   %0*s" % ((-1*width)+5, msg) + char)
-    print(char * width)
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print(char * width)

 def run(model_path, model_name, tenant_id, service_principal_id,
-        service_principal_password, subscription_id, resource_group, workspace, tags):
-    auth_args = {
-        'tenant_id': tenant_id,
-        'service_principal_id': service_principal_id,
-        'service_principal_password': service_principal_password
-    }
+    service_principal_password, subscription_id, resource_group, workspace, tags):
+  auth_args = {
+    'tenant_id': tenant_id,
+    'service_principal_id': service_principal_id,
+    'service_principal_password': service_principal_password
+  }

-    ws_args = {
-        'auth': ServicePrincipalAuthentication(**auth_args),
-        'subscription_id': subscription_id,
-        'resource_group': resource_group
-    }
+  ws_args = {
+    'auth': ServicePrincipalAuthentication(**auth_args),
+    'subscription_id': subscription_id,
+    'resource_group': resource_group
+  }

-    ws = Workspace.get(workspace, **ws_args)
+  ws = Workspace.get(workspace, **ws_args)

-    print(ws.get_details())
+  print(ws.get_details())

-    print('\nSaving model {} to {}'.format(model_path, model_name))
+  print('\nSaving model {} to {}'.format(model_path, model_name))

-    # Model Path needs to be relative
-    model_path = relpath(model_path, '.')
+  # Model Path needs to be relative
+  model_path = relpath(model_path, '.')

-    model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
-    print('Done!')
+  model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
+  print('Done!')

 if __name__ == "__main__":
-    # argparse stuff for model path and model name
-    parser = argparse.ArgumentParser(description='sanity check on model')
-    parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
-    parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
-    parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
-    parser.add_argument('-t', '--tenant_id', help='tenant_id')
-    parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
-    parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
-    parser.add_argument('-u', '--subscription_id', help='subscription_id')
-    parser.add_argument('-r', '--resource_group', help='resource_group')
-    parser.add_argument('-w', '--workspace', help='workspace')
-    args = parser.parse_args()
-    
-    print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
-    args.model = 'model/' + args.model
-    model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
-    params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
-    rgs = {
-        'model_path': model_path,
-        'model_name': args.model_name,
-        'tenant_id': args.tenant_id,
-        'service_principal_id': args.service_principal_id,
-        'service_principal_password': args.service_principal_password,
-        'subscription_id': args.subscription_id,
-        'resource_group': args.resource_group,
-        'workspace': args.workspace
-    }
+  # argparse stuff for model path and model name
+  parser = argparse.ArgumentParser(description='sanity check on model')
+  parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data')
+  parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5')
+  parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos')
+  parser.add_argument('-t', '--tenant_id', help='tenant_id')
+  parser.add_argument('-s', '--service_principal_id', help='service_principal_id')
+  parser.add_argument('-p', '--service_principal_password', help='service_principal_password')
+  parser.add_argument('-u', '--subscription_id', help='subscription_id')
+  parser.add_argument('-r', '--resource_group', help='resource_group')
+  parser.add_argument('-w', '--workspace', help='workspace')
+  args = parser.parse_args()
+  
+  print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
+  args.model = 'model/' + args.model
+  model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
+  params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
+  rgs = {
+    'model_path': model_path,
+    'model_name': args.model_name,
+    'tenant_id': args.tenant_id,
+    'service_principal_id': args.service_principal_id,
+    'service_principal_password': args.service_principal_password,
+    'subscription_id': args.subscription_id,
+    'resource_group': args.resource_group,
+    'workspace': args.workspace
+  }

-    # printing out args for posterity
-    for i in rgs:
-        if i == 'service_principal_password':
-            print('{} => **********'.format(i))
-        else:
-            print('{} => {}'.format(i, rgs[i]))
+  # printing out args for posterity
+  for i in rgs:
+    if i == 'service_principal_password':
+      print('{} => **********'.format(i))
+    else:
+      print('{} => {}'.format(i, rgs[i]))

-    with(open(str(params_path), 'r')) as f:
-        tags = json.load(f)
+  with(open(str(params_path), 'r')) as f:
+    tags = json.load(f)

-    print('\n\nUsing the following tags:')
-    for tag in tags:
-        print('{} => {}'.format(tag, tags[tag]))
+  print('\n\nUsing the following tags:')
+  for tag in tags:
+    print('{} => {}'.format(tag, tags[tag]))

-    rgs['tags'] = tags
+  rgs['tags'] = tags

-    run(**rgs)
+  run(**rgs)

-    # python register.py --model_path v --model_name c --tenant_id c
-    # --service_principal_id v --service_principal_password v
-    # --subscription_id v --resource_group x --workspace c
+  # python register.py --model_path v --model_name c --tenant_id c
+  # --service_principal_id v --service_principal_password v
+  # --subscription_id v --resource_group x --workspace c
--- a/pipelines/azurepipeline/code/training/train.py
+++ b/pipelines/azurepipeline/code/training/train.py
@ -15,179 +15,179 @@ from tensorflow.data import Dataset
 global image_size

 def info(msg, char = "#", width = 75):
-    print("")
-    print(char * width)
-    print(char + "   %0*s" % ((-1*width)+5, msg) + char)
-    print(char * width)
+  print("")
+  print(char * width)
+  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print(char * width)

 def check_dir(path, check=False):
-    if check:
-        assert os.path.exists(path), '{} does not exist!'.format(path)
-    else:
-        if not os.path.exists(path):
-            os.makedirs(path)
-        return Path(path).resolve(strict=False)
+  if check:
+    assert os.path.exists(path), '{} does not exist!'.format(path)
+  else:
+    if not os.path.exists(path):
+      os.makedirs(path)
+    return Path(path).resolve(strict=False)

 def process_image(path, label):
-    img_raw = tf.io.read_file(path)
-    img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
-    img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
-    return img_final, label
+  img_raw = tf.io.read_file(path)
+  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
+  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  return img_final, label

 def load_dataset(base_path, dataset, split=[8, 1, 1]):
-    # normalize splits
-    splits = np.array(split) / np.sum(np.array(split))
+  # normalize splits
+  splits = np.array(split) / np.sum(np.array(split))

-    # find labels - parent folder names
-    labels = {}
-    for (_, dirs, _) in os.walk(base_path):
-        print('found {}'.format(dirs))
-        labels = { k: v for (v, k) in enumerate(dirs) }
-        print('using {}'.format(labels))
-        break
-        
-    # load all files along with idx label
-    print('loading dataset from {}'.format(dataset))
-    with open(dataset, 'r') as d:
-        data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
+  # find labels - parent folder names
+  labels = {}
+  for (_, dirs, _) in os.walk(base_path):
+    print('found {}'.format(dirs))
+    labels = { k: v for (v, k) in enumerate(dirs) }
+    print('using {}'.format(labels))
+    break

-    print('dataset size: {}\nsuffling data...'.format(len(data)))
-    
-    # shuffle data
-    shuffle(data)
-    
-    print('splitting data...')
-    # split data
-    train_idx = int(len(data) * splits[0])
-    eval_idx = int(len(data) * splits[1])
-    
-    return data[:train_idx], \
-            data[train_idx:train_idx + eval_idx], \
-            data[train_idx + eval_idx:], \
-            labels
+  # load all files along with idx label
+  print('loading dataset from {}'.format(dataset))
+  with open(dataset, 'r') as d:
+    data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
+
+  print('dataset size: {}\nsuffling data...'.format(len(data)))
+
+  # shuffle data
+  shuffle(data)
+
+  print('splitting data...')
+  # split data
+  train_idx = int(len(data) * splits[0])
+  eval_idx = int(len(data) * splits[1])
+
+  return data[:train_idx], \
+    data[train_idx:train_idx + eval_idx], \
+    data[train_idx + eval_idx:], \
+    labels

 #@print_info
 def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None):
-    img_shape = (image_size, image_size, 3)
+  img_shape = (image_size, image_size, 3)

-    info('Loading Data Set')
-    # load dataset
-    train, test, val, labels = load_dataset(data_path, dataset)
+  info('Loading Data Set')
+  # load dataset
+  train, test, val, labels = load_dataset(data_path, dataset)

-    # training data
-    train_data, train_labels = zip(*train)
-    train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
-                            Dataset.from_tensor_slices(list(train_labels))))
+  # training data
+  train_data, train_labels = zip(*train)
+  train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
+                          Dataset.from_tensor_slices(list(train_labels))))

-    train_ds = train_ds.map(map_func=process_image, 
-                            num_parallel_calls=5)
+  train_ds = train_ds.map(map_func=process_image,
+                          num_parallel_calls=5)

-    train_ds = train_ds.apply(tf.data.experimental.ignore_errors())
+  train_ds = train_ds.apply(tf.data.experimental.ignore_errors())

-    train_ds = train_ds.batch(batch_size)
-    train_ds = train_ds.prefetch(buffer_size=5)
-    train_ds = train_ds.repeat()
+  train_ds = train_ds.batch(batch_size)
+  train_ds = train_ds.prefetch(buffer_size=5)
+  train_ds = train_ds.repeat()

-    # model
-    info('Creating Model')
-    base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
-                                               include_top=False, 
-                                               weights='imagenet')
-    base_model.trainable = True
+  # model
+  info('Creating Model')
+  base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
+                                              include_top=False,
+                                              weights='imagenet')
+  base_model.trainable = True

-    model = tf.keras.Sequential([
-        base_model,
-        tf.keras.layers.GlobalAveragePooling2D(),
-        tf.keras.layers.Dense(1, activation='sigmoid')
-    ])
+  model = tf.keras.Sequential([
+    base_model,
+    tf.keras.layers.GlobalAveragePooling2D(),
+    tf.keras.layers.Dense(1, activation='sigmoid')
+  ])

-    model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), 
-              loss='binary_crossentropy', 
-              metrics=['accuracy'])
+  model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
+            loss='binary_crossentropy',
+            metrics=['accuracy'])

-    model.summary()
+  model.summary()

-    # training
-    info('Training')
-    steps_per_epoch = math.ceil(len(train)/batch_size)
-    history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
+  # training
+  info('Training')
+  steps_per_epoch = math.ceil(len(train)/batch_size)
+  history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)

-    # save model
-    info('Saving Model')
-    
-    # check existence of base model folder
-    output = check_dir(output)
+  # save model
+  info('Saving Model')

-    print('Serializing into saved_model format')
-    tf.saved_model.save(model, str(output))
-    print('Done!')
+  # check existence of base model folder
+  output = check_dir(output)

-    # add time prefix folder
-    file_output = str(Path(output).joinpath('latest.h5'))
-    print('Serializing h5 model to:\n{}'.format(file_output))
-    model.save(file_output)
+  print('Serializing into saved_model format')
+  tf.saved_model.save(model, str(output))
+  print('Done!')
+
+  # add time prefix folder
+  file_output = str(Path(output).joinpath('latest.h5'))
+  print('Serializing h5 model to:\n{}'.format(file_output))
+  model.save(file_output)
+
+  return generate_hash(file_output, 'kf_pipeline')

-    return generate_hash(file_output, 'kf_pipeline')
-    

 def generate_hash(file, key):
-    print('Generating hash for {}'.format(file))
-    m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
-    BUF_SIZE = 65536
-    with open(str(file), 'rb') as f:
-        while True:
-            data = f.read(BUF_SIZE)
-            if not data: 
-                break
-            m.update(data)
+  print('Generating hash for {}'.format(file))
+  m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
+  BUF_SIZE = 65536
+  with open(str(file), 'rb') as f:
+    while True:
+      data = f.read(BUF_SIZE)
+      if not data:
+        break
+      m.update(data)

-    return m.hexdigest()
+  return m.hexdigest()

 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='transfer learning for binary image task')
-    parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
-    parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
-    parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
-    parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
-    parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
-    parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
-    parser.add_argument('-o', '--outputs', help='output directory', default='model')
-    parser.add_argument('-f', '--dataset', help='cleaned data listing')
-    args = parser.parse_args()
+  parser = argparse.ArgumentParser(description='transfer learning for binary image task')
+  parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
+  parser.add_argument('-d', '--data', help='directory to training and test data', default='train')
+  parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int)
+  parser.add_argument('-b', '--batch', help='batch size', default=32, type=int)
+  parser.add_argument('-i', '--image_size', help='image size', default=160, type=int)
+  parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float)
+  parser.add_argument('-o', '--outputs', help='output directory', default='model')
+  parser.add_argument('-f', '--dataset', help='cleaned data listing')
+  args = parser.parse_args()

-    info('Using TensorFlow v.{}'.format(tf.__version__))
-    
-    data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
-    target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
-    dataset = Path(args.base_path).joinpath(args.dataset)
-    image_size = args.image_size
+  info('Using TensorFlow v.{}'.format(tf.__version__))

-    params = Path(args.base_path).joinpath('params.json')
+  data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False)
+  target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs)
+  dataset = Path(args.base_path).joinpath(args.dataset)
+  image_size = args.image_size

-    args = {
-        "data_path": str(data_path), 
-        "image_size": image_size, 
-        "epochs": args.epochs, 
-        "batch_size": args.batch, 
-        "learning_rate": args.lr, 
-        "output": str(target_path), 
-        "dataset": str(dataset)
-    }
+  params = Path(args.base_path).joinpath('params.json')

-    dataset_signature = generate_hash(dataset, 'kf_pipeline')
-    # printing out args for posterity
-    for i in args:
-        print('{} => {}'.format(i, args[i]))
+  args = {
+      "data_path": str(data_path),
+      "image_size": image_size,
+      "epochs": args.epochs,
+      "batch_size": args.batch,
+      "learning_rate": args.lr,
+      "output": str(target_path),
+      "dataset": str(dataset)
+  }

-    model_signature = run(**args)
+  dataset_signature = generate_hash(dataset, 'kf_pipeline')
+  # printing out args for posterity
+  for i in args:
+    print('{} => {}'.format(i, args[i]))

-    args['dataset_signature'] = dataset_signature.upper()
-    args['model_signature'] = model_signature.upper()
-    args['model_type'] = 'tfv2-MobileNetV2'
-    print('Writing out params...', end='')
-    with open(str(params), 'w') as f:
-        json.dump(args, f)
+  model_signature = run(**args)

-    print(' Saved to {}'.format(str(params)))
+  args['dataset_signature'] = dataset_signature.upper()
+  args['model_signature'] = model_signature.upper()
+  args['model_type'] = 'tfv2-MobileNetV2'
+  print('Writing out params...', end='')
+  with open(str(params), 'w') as f:
+    json.dump(args, f)

-    # python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt
+  print(' Saved to {}'.format(str(params)))
+
+  # python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt