fixed some lint errors

2019-08-16 01:04:15 -04:00 · 2019-08-16 01:04:15 -04:00 · d81d83512a
parent 153d9d87aa
commit d81d83512a
6 changed files with 301 additions and 275 deletions
--- a/pipelines/azurepipeline/code/deploy/score.py
+++ b/pipelines/azurepipeline/code/deploy/score.py
@ -1,31 +1,31 @@
 import json
 import time
 from io import BytesIO
 import datetime
 import requests
 import numpy as np
 import datetime
 from PIL import Image
 from io import BytesIO
 import tensorflow as tf
 from azureml.core.model import Model
 def init():
  global model
-  try:
+def init():
  if Model.get_model_path('tacosandburritos'):
    model_path = Model.get_model_path('tacosandburritos')
-  except:
+  else:
    model_path = '/model/latest.h5'
-  print('Attempting to load model')
+  print 'Attempting to load model'
  model = tf.keras.models.load_model(model_path)
  model.summary()
-  print('Done!')
+  print 'Done!'
-  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
  return model
-def run(raw_data):
+
-  global model
+def run(raw_data, model):
  prev_time = time.time()
  post = json.loads(raw_data)
@ -35,8 +35,8 @@ def run(raw_data):
  tensor = process_image(img_path, 160)
  t = tf.reshape(tensor, [-1, 160, 160, 3])
-  o = model.predict(t, steps=1)#[0][0]
+  o = model.predict(t, steps=1)  # [0][0]
-  print(o)
+  print o
  o = o[0][0]
  inference_time = datetime.timedelta(seconds=current_time - prev_time)
  payload = {
@ -45,28 +45,31 @@ def run(raw_data):
    'scores': str(o)
  }
-  print('Input ({}), Prediction ({})'.format(post['image'], payload))
+  print 'Input ({}), Prediction ({})'.format(post['image'], payload)
  return payload
 def process_image(path, image_size):
  # Extract image (from web or path)
-  if(path.startswith('http')):
+  if path.startswith('http'):
    response = requests.get(path)
    img = np.array(Image.open(BytesIO(response.content)))
  else:
    img = np.array(Image.open(path))
  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
-  #tf.image.decode_jpeg(img_raw, channels=3)
+  # tf.image.decode_jpeg(img_raw, channels=3)
  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
  return img_final
-def info(msg, char = "#", width = 75):
+
-  print("")
+def info(msg, char="#", width=75):
-  print(char * width)
+  print ""
-  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print char * width
-  print(char * width)
+  print char + "   %0*s" % ((-1 * width) + 5, msg) + char
  print char * width
 if __name__ == "__main__":
  images = {
@ -74,17 +77,17 @@ if __name__ == "__main__":
    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
  }
-  init()
+  my_model = init()
  for k, v in images.items():
-    print('{} => {}'.format(k, v))
+    print '{} => {}'.format(k, v)
  info('Taco Test')
-  taco = json.dumps({ 'image': images['tacos'] })
+  taco = json.dumps({'image': images['tacos']})
-  print(taco)
+  print taco
-  run(taco)
+  run(taco, my_model)
  info('Burrito Test')
-  burrito = json.dumps({ 'image': images['burrito'] })
+  burrito = json.dumps({'image': images['burrito']})
-  print(burrito)
+  print burrito
-  run(burrito)
+  run(burrito, my_model)
--- a/pipelines/azurepipeline/code/pipeline.py
+++ b/pipelines/azurepipeline/code/pipeline.py
@ -1,11 +1,11 @@
 """Main pipeline file"""
 from kubernetes import client as k8s_client
 import kfp.dsl as dsl
 import kfp.compiler as compiler
 from kubernetes import client as k8s_client
@dsl.pipeline(
  name='Tacos vs. Burritos',
-  description='Simple TF CNN for binary classifier between burritos and tacos'
+  description='Simple TF CNN'
 )
 def tacosandburritos_train(
    tenant_id,
@ -13,15 +13,17 @@ def tacosandburritos_train(
    service_principal_password,
    subscription_id,
    resource_group,
-        workspace,
+    workspace
        persistent_volume_path='/mnt/azure',
        data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
        epochs=5,
        batch=32,
        learning_rate=0.0001,
        model_name='tacosandburritos',
        profile_name='tacoprofile'
 ):
  """Pipeline steps"""
  persistent_volume_path = '/mnt/azure'
  data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'
  epochs = 5
  batch = 32
  learning_rate = 0.0001
  model_name = 'tacosandburritos'
  profile_name = 'tacoprofile'
  operations = {}
  image_size = 160
  training_folder = 'train'
@ -123,19 +125,16 @@ def tacosandburritos_train(
    ]
  )
  operations['deploy'].after(operations['profile'])
-  for _, op in operations.items():
+  for _, op_1 in operations.items():
-    op.container.set_image_pull_policy("Always")
+    op_1.container.set_image_pull_policy("Always")
-    op.add_volume(
+    op_1.add_volume(
      k8s_client.V1Volume(
        name='azure',
        persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
          claim_name='azure-managed-disk')
      )
    ).add_volume_mount(k8s_client.V1VolumeMount(
-        mount_path='/mnt/azure',
+      mount_path='/mnt/azure', name='azure'))
        name='azure')
    )
 if __name__ == '__main__':
  compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
--- a/pipelines/azurepipeline/code/preprocess/data.py
+++ b/pipelines/azurepipeline/code/preprocess/data.py
@ -1,69 +1,69 @@
 import os
 import shutil
 import wget
 import zipfile
 import argparse
-import numpy as np
+import wget
 import tensorflow as tf
 from pathlib2 import Path
-def check_dir(path, check=False):
+
-  if check:
+def check_dir(path):
    assert os.path.exists(path), '{} does not exist!'.format(path)
  else:
  if not os.path.exists(path):
    os.makedirs(path)
  return Path(path).resolve(strict=False)
 def download(source, target, force_clear=False):
  if force_clear and os.path.exists(target):
-    print('Removing {}...'.format(target))
+    print 'Removing {}...'.format(target)
    shutil.rmtree(target)
  check_dir(target)
  targt_file = str(Path(target).joinpath('data.zip'))
  if os.path.exists(targt_file) and not force_clear:
-    print('data already exists, skipping download')
+    print 'data already exists, skipping download'
    return
  if source.startswith('http'):
-    print("Downloading from {} to {}".format(source, target))
+    print "Downloading from {} to {}".format(source, target)
    wget.download(source, targt_file)
-    print("Done!")
+    print "Done!"
  else:
-    print("Copying from {} to {}".format(source, target))
+    print "Copying from {} to {}".format(source, target)
    shutil.copyfile(source, targt_file)
-  print('Unzipping {}'.format(targt_file))
+  print 'Unzipping {}'.format(targt_file)
  zipr = zipfile.ZipFile(targt_file)
  zipr.extractall(target)
  zipr.close()
 def process_image(path, image_size=160):
  img_raw = tf.io.read_file(path)
  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
  return img_final
 def walk_images(path, image_size=160):
-  images = []
+  imgs = []
-  print('Scanning {}'.format(path))
+  print 'Scanning {}'.format(path)
  # find subdirectories in base path
  # (they should be the labels)
  labels = []
  for (_, dirs, _) in os.walk(path):
-    print('Found {}'.format(dirs))
+    print 'Found {}'.format(dirs)
    labels = dirs
    break
  for d in labels:
    path = os.path.join(path, d)
-    print('Processing {}'.format(path))
+    print 'Processing {}'.format(path)
    # only care about files in directory
    for item in os.listdir(path):
      if not item.lower().endswith('.jpg'):
-        print('skipping {}'.format(item))
+        print 'skipping {}'.format(item)
        continue
      image = os.path.join(path, item)
@ -71,11 +71,12 @@ def walk_images(path, image_size=160):
        img = process_image(image, image_size)
        assert img.shape[2] == 3, "Invalid channel count"
        # write out good images
-        images.append(image)
+        imgs.append(image)
      except Exception as e:
-        print('{}\n{}\n'.format(e, image))
+        print '{}\n{}\n'.format(e, image)
  return imgs
  return images
 if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='data cleaning for binary image task')
@ -84,31 +85,33 @@ if __name__ == "__main__":
  parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
  parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
  parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
-  parser.add_argument('-f', '--force', help='force clear all data', default=False, action='store_true')
+  parser.add_argument('-f', '--force',
                      help='force clear all data', default=False, action='store_true')
  args = parser.parse_args()
-  print(args)
+  print args
-  print('Using TensorFlow v.{}'.format(tf.__version__))
+  print 'Using TensorFlow v.{}'.format(tf.__version__)
  base_path = Path(args.base_path).resolve(strict=False)
-  print('Base Path:  {}'.format(base_path))
+  print 'Base Path:  {}'.format(base_path)
  data_path = base_path.joinpath(args.data).resolve(strict=False)
-  print('Train Path: {}'.format(data_path))
+  print 'Train Path: {}'.format(data_path)
  target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
-  print('Train File: {}'.format(target_path))
+  print 'Train File: {}'.format(target_path)
  zip_path = args.zipfile
-  print('Acquiring data...')
+  print 'Acquiring data...'
-  download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', str(base_path), args.force)
+  download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
           str(base_path), args.force)
  if os.path.exists(str(target_path)):
-    print('dataset text file already exists, skipping check')
+    print 'dataset text file already exists, skipping check'
  else:
-    print('Testing images...')
+    print 'Testing images...'
    images = walk_images(str(data_path), args.img_size)
    # save file
-    print('writing dataset to {}'.format(target_path))
+    print 'writing dataset to {}'.format(target_path)
    with open(str(target_path), 'w+') as f:
      f.write('\n'.join(images))
--- a/pipelines/azurepipeline/code/profile/score.py
+++ b/pipelines/azurepipeline/code/profile/score.py
@ -1,30 +1,33 @@
 import json
 import time
 import datetime
 from io import BytesIO
 import requests
 import numpy as np
-import datetime
+
 from PIL import Image
-from io import BytesIO
+
 import tensorflow as tf
 from azureml.core.model import Model
 def init():
  global model
-  try:
+def init():
  if Model.get_model_path('tacosandburritos'):
    model_path = Model.get_model_path('tacosandburritos')
-  except:
+  else:
    model_path = '/model/latest.h5'
-  print('Attempting to load model')
+  print 'Attempting to load model'
  model = tf.keras.models.load_model(model_path)
  model.summary()
-  print('Done!')
+  print 'Done!'
-  print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
+  print 'Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())
  return model
-def run(raw_data):
+
 def run(raw_data, model):
  prev_time = time.time()
  post = json.loads(raw_data)
@ -34,8 +37,8 @@ def run(raw_data):
  tensor = process_image(img_path, 160)
  t = tf.reshape(tensor, [-1, 160, 160, 3])
-  o = model.predict(t, steps=1)#[0][0]
+  o = model.predict(t, steps=1)  # [0][0]
-  print(o)
+  print o
  o = o[0][0]
  inference_time = datetime.timedelta(seconds=current_time - prev_time)
  payload = {
@ -44,28 +47,31 @@ def run(raw_data):
    'scores': str(o)
  }
-  print('Input ({}), Prediction ({})'.format(post['image'], payload))
+  print 'Input ({}), Prediction ({})'.format(post['image'], payload)
  return payload
 def process_image(path, image_size):
  # Extract image (from web or path)
-  if(path.startswith('http')):
+  if path.startswith('http'):
    response = requests.get(path)
    img = np.array(Image.open(BytesIO(response.content)))
  else:
    img = np.array(Image.open(path))
  img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
-  #tf.image.decode_jpeg(img_raw, channels=3)
+  # tf.image.decode_jpeg(img_raw, channels=3)
  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
  return img_final
-def info(msg, char = "#", width = 75):
+
-  print("")
+def info(msg, char="#", width=75):
-  print(char * width)
+  print ""
-  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print char * width
-  print(char * width)
+  print char + "   %0*s" % ((-1 * width) + 5, msg) + char
  print char * width
 if __name__ == "__main__":
  images = {
@ -73,17 +79,17 @@ if __name__ == "__main__":
    'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
  }
-  init()
+  my_model = init()
  for k, v in images.items():
-    print('{} => {}'.format(k, v))
+    print '{} => {}'.format(k, v)
  info('Taco Test')
-  taco = json.dumps({ 'image': images['tacos'] })
+  taco = json.dumps({'image': images['tacos']})
-  print(taco)
+  print taco
-  run(taco)
+  run(taco, my_model)
  info('Burrito Test')
-  burrito = json.dumps({ 'image': images['burrito'] })
+  burrito = json.dumps({'image': images['burrito']})
-  print(burrito)
+  print burrito
-  run(burrito)
+  run(burrito, my_model)
--- a/pipelines/azurepipeline/code/register/register.py
+++ b/pipelines/azurepipeline/code/register/register.py
@ -1,22 +1,22 @@
 import json
 from os.path import relpath
 import azureml
 import argparse
 from pathlib2 import Path
 import azureml
 from azureml.core import Workspace
 from azureml.core.model import Model
 from azureml.core.image import ContainerImage, Image
 from azureml.core.webservice import Webservice, AciWebservice
 from azureml.core.authentication import ServicePrincipalAuthentication
 def info(msg, char = "#", width = 75):
  print("")
  print(char * width)
  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
  print(char * width)
-def run(model_path, model_name, tenant_id, service_principal_id,
+def info(msg, char="#", width=75):
-    service_principal_password, subscription_id, resource_group, workspace, tags):
+  print ""
  print char * width
  print char + "   %0*s" % ((-1 * width) + 5, msg) + char
  print char * width
 def get_ws(tenant_id, service_principal_id,
        service_principal_password, subscription_id, resource_group, workspace):
  auth_args = {
    'tenant_id': tenant_id,
    'service_principal_id': service_principal_id,
@ -28,18 +28,21 @@ def run(model_path, model_name, tenant_id, service_principal_id,
    'subscription_id': subscription_id,
    'resource_group': resource_group
  }
  ws = Workspace.get(workspace, **ws_args)
  return ws
-  print(ws.get_details())
+def run(mdl_path, model_name, ws, tgs):
-  print('\nSaving model {} to {}'.format(model_path, model_name))
+  print ws.get_details()
  print '\nSaving model {} to {}'.format(mdl_path, model_name)
  # Model Path needs to be relative
-  model_path = relpath(model_path, '.')
+  mdl_path = relpath(mdl_path, '.')
  Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs)
  print 'Done!'
  model = Model.register(ws, model_name=model_name, model_path=model_path, tags=tags)
  print('Done!')
 if __name__ == "__main__":
  # argparse stuff for model path and model name
@ -55,13 +58,13 @@ if __name__ == "__main__":
  parser.add_argument('-w', '--workspace', help='workspace')
  args = parser.parse_args()
-  print('Azure ML SDK Version: {}'.format(azureml.core.VERSION))
+  print 'Azure ML SDK Version: {}'.format(azureml.core.VERSION)
  args.model = 'model/' + args.model
-  model_path = str(Path(args.base_path).resolve(strict=False).joinpath(args.model).resolve(strict=False))
+  model_path = str(Path(args.base_path).resolve(
-  params_path = str(Path(args.base_path).resolve(strict=False).joinpath('params.json').resolve(strict=False))
+    strict=False).joinpath(args.model).resolve(strict=False))
-  rgs = {
+  params_path = str(Path(args.base_path).resolve(
-    'model_path': model_path,
+    strict=False).joinpath('params.json').resolve(strict=False))
-    'model_name': args.model_name,
+  wsrgs = {
    'tenant_id': args.tenant_id,
    'service_principal_id': args.service_principal_id,
    'service_principal_password': args.service_principal_password,
@ -69,23 +72,29 @@ if __name__ == "__main__":
    'resource_group': args.resource_group,
    'workspace': args.workspace
  }
  rgs = {
    'mdl_path': model_path,
    'model_name': args.model_name
  }
  # printing out args for posterity
-  for i in rgs:
+  for i in wsrgs:
    if i == 'service_principal_password':
-      print('{} => **********'.format(i))
+      print '{} => **********'.format(i)
    else:
-      print('{} => {}'.format(i, rgs[i]))
+      print '{} => {}'.format(i, rgs[i])
  with(open(str(params_path), 'r')) as f:
    tags = json.load(f)
-  print('\n\nUsing the following tags:')
+  print '\n\nUsing the following tags:'
  for tag in tags:
-    print('{} => {}'.format(tag, tags[tag]))
+    print '{} => {}'.format(tag, tags[tag])
  rgs['tags'] = tags
  workspc = get_ws(**wsrgs)
  rgs['ws'] = workspc
  run(**rgs)
  # python register.py --model_path v --model_name c --tenant_id c
--- a/pipelines/azurepipeline/code/training/train.py
+++ b/pipelines/azurepipeline/code/training/train.py
@ -5,51 +5,52 @@ import hmac
 import json
 import hashlib
 import argparse
 from random import shuffle
 import numpy as np
 import tensorflow as tf
 from pathlib2 import Path
 from random import shuffle
 from datetime import datetime
 from tensorflow.data import Dataset
 from pathlib2 import Path
 global image_size
-def info(msg, char = "#", width = 75):
+def info(msg, char="#", width=75):
  print("")
  print(char * width)
-  print(char + "   %0*s" % ((-1*width)+5, msg) + char)
+  print(char + "   %0*s" % ((-1 * width) + 5, msg) + char)
  print(char * width)
-def check_dir(path, check=False):
+
-  if check:
+def check_dir(path):
    assert os.path.exists(path), '{} does not exist!'.format(path)
  else:
  if not os.path.exists(path):
    os.makedirs(path)
  return Path(path).resolve(strict=False)
-def process_image(path, label):
+
 def process_image(path, label, img_size):
  img_raw = tf.io.read_file(path)
  img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
-  img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
+  img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255
  return img_final, label
-def load_dataset(base_path, dataset, split=[8, 1, 1]):
+
 def load_dataset(base_path, dset, split=None):
  # normalize splits
  if split is None:
    split = [8, 1, 1]
  splits = np.array(split) / np.sum(np.array(split))
  # find labels - parent folder names
  labels = {}
  for (_, dirs, _) in os.walk(base_path):
    print('found {}'.format(dirs))
-    labels = { k: v for (v, k) in enumerate(dirs) }
+    labels = {k: v for (v, k) in enumerate(dirs)}
    print('using {}'.format(labels))
    break
  # load all files along with idx label
-  print('loading dataset from {}'.format(dataset))
+  print('loading dataset from {}'.format(dset))
-  with open(dataset, 'r') as d:
+  with open(dset, 'r') as d:
-    data = [(str(Path(f.strip()).absolute()), labels[Path(f.strip()).parent.name]) for f in d.readlines()]
+    data = [(str(Path(line.strip()).absolute()),
             labels[Path(line.strip()).parent.name]) for line in d.readlines()]
  print('dataset size: {}\nsuffling data...'.format(len(data)))
@ -59,25 +60,29 @@ def load_dataset(base_path, dataset, split=[8, 1, 1]):
  print('splitting data...')
  # split data
  train_idx = int(len(data) * splits[0])
  eval_idx = int(len(data) * splits[1])
-  return data[:train_idx], \
+  return data[:train_idx]
    data[train_idx:train_idx + eval_idx], \
    data[train_idx + eval_idx:], \
    labels
-#@print_info
+
-def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dataset=None):
+# @print_info
-  img_shape = (image_size, image_size, 3)
+def run(
    dpath,
    img_size=160,
    epochs=10,
    batch_size=32,
    learning_rate=0.0001,
    output='model',
    dset=None):
  img_shape = (img_size, img_size, 3)
  info('Loading Data Set')
  # load dataset
-  train, test, val, labels = load_dataset(data_path, dataset)
+  train = load_dataset(dpath, dset)
  # training data
  train_data, train_labels = zip(*train)
  train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)),
-                          Dataset.from_tensor_slices(list(train_labels))))
+                          Dataset.from_tensor_slices(list(train_labels)), img_size))
  train_ds = train_ds.map(map_func=process_image,
                          num_parallel_calls=5)
@ -109,8 +114,8 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
  # training
  info('Training')
-  steps_per_epoch = math.ceil(len(train)/batch_size)
+  steps_per_epoch = math.ceil(len(train) / batch_size)
-  history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
+  model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch)
  # save model
  info('Saving Model')
@ -130,19 +135,20 @@ def run(data_path, image_size=160, epochs=10, batch_size=32, learning_rate=0.000
  return generate_hash(file_output, 'kf_pipeline')
-def generate_hash(file, key):
+def generate_hash(dfile, key):
-  print('Generating hash for {}'.format(file))
+  print('Generating hash for {}'.format(dfile))
  m = hmac.new(str.encode(key), digestmod=hashlib.sha256)
  BUF_SIZE = 65536
-  with open(str(file), 'rb') as f:
+  with open(str(dfile), 'rb') as myfile:
    while True:
-      data = f.read(BUF_SIZE)
+      data = myfile.read(BUF_SIZE)
      if not data:
        break
      m.update(data)
  return m.hexdigest()
 if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='transfer learning for binary image task')
  parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data')
@ -165,13 +171,13 @@ if __name__ == "__main__":
  params = Path(args.base_path).joinpath('params.json')
  args = {
-      "data_path": str(data_path),
+    "dpath": str(data_path),
-      "image_size": image_size,
+    "img_size": image_size,
    "epochs": args.epochs,
    "batch_size": args.batch,
    "learning_rate": args.lr,
    "output": str(target_path),
-      "dataset": str(dataset)
+    "dset": str(dataset)
  }
  dataset_signature = generate_hash(dataset, 'kf_pipeline')