diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 00000000..697316db
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,399 @@
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=third_party
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Pickle collected data for later comparisons.
+persistent=no
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=4
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-whitelist=
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]". This option is deprecated
+# and it will be removed in Pylint 2.0.
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for function names
+function-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for variable names
+variable-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Naming hint for constant names
+const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression matching correct attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for attribute names
+attr-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for argument names
+argument-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Naming hint for class attribute names
+class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Naming hint for inline iteration names
+inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
+
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Naming hint for class names
+class-name-hint=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Naming hint for module names
+module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression matching correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for method names
+method-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+
+[ELIF]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,dict-separator
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+# Use 2 spaces consistent with TensorFlow style.
+indent-string='  '
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,future.builtins
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=7
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branches=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of boolean expressions in a if statement
+max-bool-expr=5
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
\ No newline at end of file
diff --git a/agents/trainer/task.py b/agents/trainer/task.py
index 4c82954e..fee5beb4 100644
--- a/agents/trainer/task.py
+++ b/agents/trainer/task.py
@@ -12,18 +12,18 @@
 
 """Provides an entrypoint for the training task."""
 
+#pylint: disable=unused-import
+
 from __future__ import absolute_import, division, print_function
 
-import argparse
 import datetime
 import logging
 import os
 import pprint
 import uuid
 
-import pip
-import tensorflow as tf
 from google.cloud import storage
+import tensorflow as tf
 
 import agents
 import pybullet_envs  # To make AntBulletEnv-v0 available.
@@ -113,39 +113,39 @@ def hparams_base():
   """Base hparams tf/Agents PPO """
 
   # General
-  algorithm = agents.ppo.PPOAlgorithm
-  num_agents = 30
-  eval_episodes = 30
-  use_gpu = False
+#  algorithm = agents.ppo.PPOAlgorithm
+#  num_agents = 30
+#  eval_episodes = 30
+#  use_gpu = False
 
   # Environment
-  env = 'KukaBulletEnv-v0'
-  normalize_ranges = True
-  max_length = 1000
+#  env = 'KukaBulletEnv-v0'
+#  normalize_ranges = True
+#  max_length = 1000
 
   # Network
-  network = agents.scripts.networks.feed_forward_gaussian
-  weight_summaries = dict(
-      all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
-  policy_layers = 200, 100
-  value_layers = 200, 100
-  init_output_factor = 0.1
-  init_logstd = -1
-  init_std = 0.35
+#  network = agents.scripts.networks.feed_forward_gaussian
+#  weight_summaries = dict(
+#      all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
+#  policy_layers = 200, 100
+#  value_layers = 200, 100
+#  init_output_factor = 0.1
+#  init_logstd = -1
+#  init_std = 0.35
 
   # Optimization
-  update_every = 60
-  update_epochs = 25
-  optimizer = tf.train.AdamOptimizer
-  learning_rate = 1e-4
-  steps = 3e7  # 30M
+#  update_every = 60
+#  update_epochs = 25
+#  optimizer = tf.train.AdamOptimizer
+#  learning_rate = 1e-4
+#  steps = 3e7  # 30M
 
   # Losses
-  discount = 0.995
-  kl_target = 1e-2
-  kl_cutoff_factor = 2
-  kl_cutoff_coef = 1000
-  kl_init_penalty = 1
+#  discount = 0.995
+#  kl_target = 1e-2
+#  kl_cutoff_factor = 2
+#  kl_cutoff_coef = 1000
+#  kl_init_penalty = 1
 
   return locals()
 
@@ -158,9 +158,9 @@ def _object_import_from_string(name):
   return mod
 
 
-def _realize_import_attrs(d, filter):
+def _realize_import_attrs(d, hparam_filter):
   for k, v in d.items():
-    if k in filter:
+    if k in hparam_filter:
       imported = _object_import_from_string(v)
       # TODO: Provide an appropriately informative error if the import fails
       # except ImportError as e:
@@ -170,7 +170,7 @@ def _realize_import_attrs(d, filter):
   return d
 
 
-def _get_agents_configuration(hparam_set_name, log_dir=None, is_chief=False):
+def _get_agents_configuration(log_dir=None):
   """Load hyperparameter config."""
   try:
     # Try to resume training.
@@ -243,23 +243,20 @@ def gcs_upload(local_dir, gcs_out_dir):
     blob.upload_from_filename(local_file_path)
 
 
-def main(unused_argv):
+def main(_):
   """Run training."""
   tf.logging.set_verbosity(tf.logging.INFO)
 
   if FLAGS.debug:
     tf.logging.set_verbosity(tf.logging.DEBUG)
 
-  run_config = tf.contrib.learn.RunConfig()
-
   log_dir = FLAGS.logdir
 
-  agents_config = _get_agents_configuration(
-      FLAGS.hparam_set_id, log_dir, run_config.is_chief)
+  agents_config = _get_agents_configuration(log_dir)
 
   if FLAGS.run_mode == 'train':
     for score in agents.scripts.train.train(agents_config, env_processes=True):
-      logging.info('Score {}.'.format(score))
+      logging.info('Score %s.', score)
   if FLAGS.run_mode == 'render':
     now = datetime.datetime.now()
     subdir = now.strftime("%m%d-%H%M") + "-" + uuid.uuid4().hex[0:4]
diff --git a/github_issue_summarization/docker/flask_web/app.py b/github_issue_summarization/docker/flask_web/app.py
index 993ece2c..4296dad6 100644
--- a/github_issue_summarization/docker/flask_web/app.py
+++ b/github_issue_summarization/docker/flask_web/app.py
@@ -2,38 +2,47 @@
 Simple app that parses predictions from a trained model and displays them.
 """
 
-from flask import Flask, json, render_template, request
 import requests
-app = Flask(__name__)
+from flask import Flask, json, render_template, request
+APP = Flask(__name__)
 
-@app.route("/")
+@APP.route("/")
 def index():
+  """Default route.
+
+  Placeholder, does nothing.
+  """
   return render_template("index.html")
 
-@app.route("/summary", methods=['GET', 'POST'])
+@APP.route("/summary", methods=['GET', 'POST'])
 def summary():
+  """Main prediction route.
+
+  Provides a machine-generated summary of the given text. Sends a request to a live
+  model trained on GitHub issues.
+  """
   if request.method == 'POST':
     issue_text = request.form["issue_text"]
 
     url = "http://ambassador:80/seldon/issue-summarization/api/v0.1/predictions"
-    headers = { 'content-type': 'application/json' }
+    headers = {'content-type': 'application/json'}
     json_data = {
-        "data" : {
-          "ndarray" : [[ issue_text ]]
-        }
+      "data" : {
+        "ndarray" : [[issue_text]]
+      }
     }
 
-    r = requests.post(url = url,
-                      headers = headers,
-                      data = json.dumps(json_data))
+    response = requests.post(url=url,
+                             headers=headers,
+                             data=json.dumps(json_data))
 
-    rjs = json.loads(r.text)
-    summary = rjs["data"]["ndarray"][0][0]
+    response_json = json.loads(response.text)
+    issue_summary = response_json["data"]["ndarray"][0][0]
 
-    return render_template("summary.html",
-                           issue_text = issue_text,
-                           summary = summary)
+    return render_template("issue_summary.html",
+                           issue_text=issue_text,
+                           issue_summary=issue_summary)
+  return ('', 204)
 
 if __name__ == '__main__':
-  app.run(debug = True, host = '0.0.0.0', port = 80)
-
+  APP.run(debug=True, host='0.0.0.0', port=80)
diff --git a/github_issue_summarization/notebooks/IssueSummarization.py b/github_issue_summarization/notebooks/IssueSummarization.py
deleted file mode 100644
index 4dc9bc04..00000000
--- a/github_issue_summarization/notebooks/IssueSummarization.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from __future__ import print_function
-
-import dill as dpickle
-import numpy as np
-from keras.models import load_model
-
-from seq2seq_utils import Seq2Seq_Inference
-
-
-class IssueSummarization(object):
-
-    def __init__(self):
-        with open('body_pp.dpkl', 'rb') as f:
-            body_pp = dpickle.load(f)
-        with open('title_pp.dpkl', 'rb') as f:
-            title_pp = dpickle.load(f)
-        self.model = Seq2Seq_Inference(encoder_preprocessor=body_pp,
-                                       decoder_preprocessor=title_pp,
-                                       seq2seq_model=load_model('seq2seq_model_tutorial.h5'))
-
-    def predict(self, X, feature_names):
-        return np.asarray([[self.model.generate_issue_title(body[0])[1]] for body in X])
diff --git a/github_issue_summarization/notebooks/issue_summarization.py b/github_issue_summarization/notebooks/issue_summarization.py
new file mode 100644
index 00000000..2286d444
--- /dev/null
+++ b/github_issue_summarization/notebooks/issue_summarization.py
@@ -0,0 +1,25 @@
+"""Generates predictions using a stored model.
+
+Uses trained model files to generate a prediction.
+"""
+
+from __future__ import print_function
+
+import numpy as np
+import dill as dpickle
+from keras.models import load_model
+from seq2seq_utils import Seq2Seq_Inference
+
+class IssueSummarization(object):
+
+  def __init__(self):
+    with open('body_pp.dpkl', 'rb') as body_file:
+      body_pp = dpickle.load(body_file)
+    with open('title_pp.dpkl', 'rb') as title_file:
+      title_pp = dpickle.load(title_file)
+    self.model = Seq2Seq_Inference(encoder_preprocessor=body_pp,
+                                   decoder_preprocessor=title_pp,
+                                   seq2seq_model=load_model('seq2seq_model_tutorial.h5'))
+
+  def predict(self, input_text):
+    return np.asarray([[self.model.generate_issue_title(body[0])[1]] for body in input_text])
diff --git a/github_issue_summarization/notebooks/seq2seq_utils.py b/github_issue_summarization/notebooks/seq2seq_utils.py
index 122ecc30..0ddaebfa 100644
--- a/github_issue_summarization/notebooks/seq2seq_utils.py
+++ b/github_issue_summarization/notebooks/seq2seq_utils.py
@@ -1,429 +1,432 @@
+import logging
+import dill as dpickle
+import numpy as np
 from matplotlib import pyplot as plt
 import tensorflow as tf
+from IPython.display import SVG, display
 from keras import backend as K
 from keras.layers import Input
 from keras.models import Model
-from IPython.display import SVG, display
 from keras.utils.vis_utils import model_to_dot
-import logging
-import numpy as np
-import dill as dpickle
 from annoy import AnnoyIndex
 from tqdm import tqdm, tqdm_notebook
-from random import random
 from nltk.translate.bleu_score import corpus_bleu
 
 
 def load_text_processor(fname='title_pp.dpkl'):
-    """
-    Load preprocessors from disk.
+  """
+  Load preprocessors from disk.
 
-    Parameters
-    ----------
-    fname: str
-        file name of ktext.proccessor object
+  Parameters
+  ----------
+  fname: str
+    file name of ktext.proccessor object
 
-    Returns
-    -------
-    num_tokens : int
-        size of vocabulary loaded into ktext.processor
-    pp : ktext.processor
-        the processor you are trying to load
+  Returns
+  -------
+  num_tokens : int
+    size of vocabulary loaded into ktext.processor
+  pp : ktext.processor
+    the processor you are trying to load
 
-    Typical Usage:
-    -------------
+  Typical Usage:
+  -------------
 
-    num_decoder_tokens, title_pp = load_text_processor(fname='title_pp.dpkl')
-    num_encoder_tokens, body_pp = load_text_processor(fname='body_pp.dpkl')
+  num_decoder_tokens, title_pp = load_text_processor(fname='title_pp.dpkl')
+  num_encoder_tokens, body_pp = load_text_processor(fname='body_pp.dpkl')
 
-    """
-    # Load files from disk
-    with open(fname, 'rb') as f:
-        pp = dpickle.load(f)
+  """
+  # Load files from disk
+  with open(fname, 'rb') as f:
+    pp = dpickle.load(f)
 
-    num_tokens = max(pp.id2token.keys()) + 1
-    print(f'Size of vocabulary for {fname}: {num_tokens:,}')
-    return num_tokens, pp
+  num_tokens = max(pp.id2token.keys()) + 1
+  print('Size of vocabulary for {}: {}'.format(fname, num_tokens))
+  return num_tokens, pp
 
 
 def load_decoder_inputs(decoder_np_vecs='train_title_vecs.npy'):
-    """
-    Load decoder inputs.
+  """
+  Load decoder inputs.
 
-    Parameters
-    ----------
-    decoder_np_vecs : str
-        filename of serialized numpy.array of decoder input (issue title)
+  Parameters
+  ----------
+  decoder_np_vecs : str
+    filename of serialized numpy.array of decoder input (issue title)
 
-    Returns
-    -------
-    decoder_input_data : numpy.array
-        The data fed to the decoder as input during training for teacher forcing.
-        This is the same as `decoder_np_vecs` except the last position.
-    decoder_target_data : numpy.array
-        The data that the decoder data is trained to generate (issue title).
-        Calculated by sliding `decoder_np_vecs` one position forward.
+  Returns
+  -------
+  decoder_input_data : numpy.array
+    The data fed to the decoder as input during training for teacher forcing.
+    This is the same as `decoder_np_vecs` except the last position.
+  decoder_target_data : numpy.array
+    The data that the decoder data is trained to generate (issue title).
+    Calculated by sliding `decoder_np_vecs` one position forward.
 
-    """
-    vectorized_title = np.load(decoder_np_vecs)
-    # For Decoder Input, you don't need the last word as that is only for prediction
-    # when we are training using Teacher Forcing.
-    decoder_input_data = vectorized_title[:, :-1]
+  """
+  vectorized_title = np.load(decoder_np_vecs)
+  # For Decoder Input, you don't need the last word as that is only for prediction
+  # when we are training using Teacher Forcing.
+  decoder_input_data = vectorized_title[:, :-1]
 
-    # Decoder Target Data Is Ahead By 1 Time Step From Decoder Input Data (Teacher Forcing)
-    decoder_target_data = vectorized_title[:, 1:]
+  # Decoder Target Data Is Ahead By 1 Time Step From Decoder Input Data (Teacher Forcing)
+  decoder_target_data = vectorized_title[:, 1:]
 
-    print(f'Shape of decoder input: {decoder_input_data.shape}')
-    print(f'Shape of decoder target: {decoder_target_data.shape}')
-    return decoder_input_data, decoder_target_data
+  print('Shape of decoder input: {}'.format(decoder_input_data.shape))
+  print('Shape of decoder target: {}'.format(decoder_target_data.shape))
+  return decoder_input_data, decoder_target_data
 
 
 def load_encoder_inputs(encoder_np_vecs='train_body_vecs.npy'):
-    """
-    Load variables & data that are inputs to encoder.
+  """
+  Load variables & data that are inputs to encoder.
 
-    Parameters
-    ----------
-    encoder_np_vecs : str
-        filename of serialized numpy.array of encoder input (issue title)
+  Parameters
+  ----------
+  encoder_np_vecs : str
+    filename of serialized numpy.array of encoder input (issue title)
 
-    Returns
-    -------
-    encoder_input_data : numpy.array
-        The issue body
-    doc_length : int
-        The standard document length of the input for the encoder after padding
-        the shape of this array will be (num_examples, doc_length)
+  Returns
+  -------
+  encoder_input_data : numpy.array
+    The issue body
+  doc_length : int
+    The standard document length of the input for the encoder after padding
+    the shape of this array will be (num_examples, doc_length)
 
-    """
-    vectorized_body = np.load(encoder_np_vecs)
-    # Encoder input is simply the body of the issue text
-    encoder_input_data = vectorized_body
-    doc_length = encoder_input_data.shape[1]
-    print(f'Shape of encoder input: {encoder_input_data.shape}')
-    return encoder_input_data, doc_length
+  """
+  vectorized_body = np.load(encoder_np_vecs)
+  # Encoder input is simply the body of the issue text
+  encoder_input_data = vectorized_body
+  doc_length = encoder_input_data.shape[1]
+  print('Shape of encoder input: {}'.format(encoder_input_data.shape))
+  return encoder_input_data, doc_length
 
 
 def viz_model_architecture(model):
-    """Visualize model architecture in Jupyter notebook."""
-    display(SVG(model_to_dot(model).create(prog='dot', format='svg')))
+  """Visualize model architecture in Jupyter notebook."""
+  display(SVG(model_to_dot(model).create(prog='dot', format='svg')))
 
 
 def free_gpu_mem():
-    """Attempt to free gpu memory."""
-    K.get_session().close()
-    cfg = K.tf.ConfigProto()
-    cfg.gpu_options.allow_growth = True
-    K.set_session(K.tf.Session(config=cfg))
+  """Attempt to free gpu memory."""
+  K.get_session().close()
+  cfg = K.tf.ConfigProto()
+  cfg.gpu_options.allow_growth = True
+  K.set_session(K.tf.Session(config=cfg))
 
 
 def test_gpu():
-    """Run a toy computation task in tensorflow to test GPU."""
-    config = tf.ConfigProto()
-    config.gpu_options.allow_growth = True
-    session = tf.Session(config=config)
-    hello = tf.constant('Hello, TensorFlow!')
-    print(session.run(hello))
+  """Run a toy computation task in tensorflow to test GPU."""
+  config = tf.ConfigProto()
+  config.gpu_options.allow_growth = True
+  session = tf.Session(config=config)
+  hello = tf.constant('Hello, TensorFlow!')
+  print(session.run(hello))
 
 
 def plot_model_training_history(history_object):
-    """Plots model train vs. validation loss."""
-    plt.title('model accuracy')
-    plt.ylabel('accuracy')
-    plt.xlabel('epoch')
-    plt.plot(history_object.history['loss'])
-    plt.plot(history_object.history['val_loss'])
-    plt.legend(['train', 'test'], loc='upper left')
-    plt.show()
+  """Plots model train vs. validation loss."""
+  plt.title('model accuracy')
+  plt.ylabel('accuracy')
+  plt.xlabel('epoch')
+  plt.plot(history_object.history['loss'])
+  plt.plot(history_object.history['val_loss'])
+  plt.legend(['train', 'test'], loc='upper left')
+  plt.show()
 
 
 def extract_encoder_model(model):
-    """
-    Extract the encoder from the original Sequence to Sequence Model.
+  """
+  Extract the encoder from the original Sequence to Sequence Model.
 
-    Returns a keras model object that has one input (body of issue) and one
-    output (encoding of issue, which is the last hidden state).
+  Returns a keras model object that has one input (body of issue) and one
+  output (encoding of issue, which is the last hidden state).
 
-    Input:
-    -----
-    model: keras model object
+  Input:
+  -----
+  model: keras model object
 
-    Returns:
-    -----
-    keras model object
+  Returns:
+  -----
+  keras model object
 
-    """
-    encoder_model = model.get_layer('Encoder-Model')
-    return encoder_model
+  """
+  encoder_model = model.get_layer('Encoder-Model')
+  return encoder_model
 
 
 def extract_decoder_model(model):
-    """
-    Extract the decoder from the original model.
+  """
+  Extract the decoder from the original model.
 
-    Inputs:
-    ------
-    model: keras model object
+  Inputs:
+  ------
+  model: keras model object
 
-    Returns:
-    -------
-    A Keras model object with the following inputs and outputs:
+  Returns:
+  -------
+  A Keras model object with the following inputs and outputs:
 
-    Inputs of Keras Model That Is Returned:
-    1: the embedding index for the last predicted word or the <Start> indicator
-    2: the last hidden state, or in the case of the first word the hidden state from the encoder
+  Inputs of Keras Model That Is Returned:
+  1: the embedding index for the last predicted word or the <Start> indicator
+  2: the last hidden state, or in the case of the first word the hidden state from the encoder
 
-    Outputs of Keras Model That Is Returned:
-    1.  Prediction (class probabilities) for the next word
-    2.  The hidden state of the decoder, to be fed back into the decoder at the next time step
+  Outputs of Keras Model That Is Returned:
+  1.  Prediction (class probabilities) for the next word
+  2.  The hidden state of the decoder, to be fed back into the decoder at the next time step
 
-    Implementation Notes:
-    ----------------------
-    Must extract relevant layers and reconstruct part of the computation graph
-    to allow for different inputs as we are not going to use teacher forcing at
-    inference time.
+  Implementation Notes:
+  ----------------------
+  Must extract relevant layers and reconstruct part of the computation graph
+  to allow for different inputs as we are not going to use teacher forcing at
+  inference time.
 
-    """
-    # the latent dimension is the same throughout the architecture so we are going to
-    # cheat and grab the latent dimension of the embedding because that is the same as what is
-    # output from the decoder
-    latent_dim = model.get_layer('Decoder-Word-Embedding').output_shape[-1]
+  """
+  # the latent dimension is the same throughout the architecture so we are going to
+  # cheat and grab the latent dimension of the embedding because that is the same as what is
+  # output from the decoder
+  latent_dim = model.get_layer('Decoder-Word-Embedding').output_shape[-1]
 
-    # Reconstruct the input into the decoder
-    decoder_inputs = model.get_layer('Decoder-Input').input
-    dec_emb = model.get_layer('Decoder-Word-Embedding')(decoder_inputs)
-    dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb)
+  # Reconstruct the input into the decoder
+  decoder_inputs = model.get_layer('Decoder-Input').input
+  dec_emb = model.get_layer('Decoder-Word-Embedding')(decoder_inputs)
+  dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb)
 
-    # Instead of setting the intial state from the encoder and forgetting about it, during inference
-    # we are not doing teacher forcing, so we will have to have a feedback loop from predictions back into
-    # the GRU, thus we define this input layer for the state so we can add this capability
-    gru_inference_state_input = Input(shape=(latent_dim,), name='hidden_state_input')
+  # Instead of setting the intial state from the encoder and forgetting about it, during inference
+  # we are not doing teacher forcing, so we will have to have a feedback loop from predictions back
+  # into the GRU, thus we define this input layer for the state so we can add this capability
+  gru_inference_state_input = Input(shape=(latent_dim,), name='hidden_state_input')
 
-    # we need to reuse the weights that is why we are getting this
-    # If you inspect the decoder GRU that we created for training, it will take as input
-    # 2 tensors -> (1) is the embedding layer output for the teacher forcing
-    #                  (which will now be the last step's prediction, and will be _start_ on the first time step)
-    #              (2) is the state, which we will initialize with the encoder on the first time step, but then
-    #                   grab the state after the first prediction and feed that back in again.
-    gru_out, gru_state_out = model.get_layer('Decoder-GRU')([dec_bn, gru_inference_state_input])
+  # we need to reuse the weights that is why we are getting this
+  # If you inspect the decoder GRU that we created for training, it will take as input
+  # 2 tensors -> (1) is the embedding layer output for the teacher forcing
+  #                  (which will now be the last step's prediction, and will be _start_ on the
+  #                  first time step)
+  #              (2) is the state, which we will initialize with the encoder on the first time step
+  #              but then grab the state after the first prediction and feed that back in again.
+  gru_out, gru_state_out = model.get_layer('Decoder-GRU')([dec_bn, gru_inference_state_input])
 
-    # Reconstruct dense layers
-    dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out)
-    dense_out = model.get_layer('Final-Output-Dense')(dec_bn2)
-    decoder_model = Model([decoder_inputs, gru_inference_state_input],
-                          [dense_out, gru_state_out])
-    return decoder_model
+  # Reconstruct dense layers
+  dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out)
+  dense_out = model.get_layer('Final-Output-Dense')(dec_bn2)
+  decoder_model = Model([decoder_inputs, gru_inference_state_input],
+                        [dense_out, gru_state_out])
+  return decoder_model
 
 
 class Seq2Seq_Inference(object):
-    def __init__(self,
-                 encoder_preprocessor,
-                 decoder_preprocessor,
-                 seq2seq_model):
 
-        self.pp_body = encoder_preprocessor
-        self.pp_title = decoder_preprocessor
-        self.seq2seq_model = seq2seq_model
-        self.encoder_model = extract_encoder_model(seq2seq_model)
-        self.decoder_model = extract_decoder_model(seq2seq_model)
-        self.default_max_len_title = self.pp_title.padding_maxlen
-        self.nn = None
-        self.rec_df = None
+  # pylint: disable=too-many-instance-attributes
 
-    def generate_issue_title(self,
-                             raw_input_text,
-                             max_len_title=None):
-        """
-        Use the seq2seq model to generate a title given the body of an issue.
+  def __init__(self,
+               encoder_preprocessor,
+               decoder_preprocessor,
+               seq2seq_model):
 
-        Inputs
-        ------
-        raw_input: str
-            The body of the issue text as an input string
+    self.pp_body = encoder_preprocessor
+    self.pp_title = decoder_preprocessor
+    self.seq2seq_model = seq2seq_model
+    self.encoder_model = extract_encoder_model(seq2seq_model)
+    self.decoder_model = extract_decoder_model(seq2seq_model)
+    self.default_max_len_title = self.pp_title.padding_maxlen
+    self.nn = None
+    self.rec_df = None
 
-        max_len_title: int (optional)
-            The maximum length of the title the model will generate
+  def generate_issue_title(self,
+                           raw_input_text,
+                           max_len_title=None):
+    """
+    Use the seq2seq model to generate a title given the body of an issue.
 
-        """
-        if max_len_title is None:
-            max_len_title = self.default_max_len_title
-        # get the encoder's features for the decoder
-        raw_tokenized = self.pp_body.transform([raw_input_text])
-        body_encoding = self.encoder_model.predict(raw_tokenized)
-        # we want to save the encoder's embedding before its updated by decoder
-        #   because we can use that as an embedding for other tasks.
-        original_body_encoding = body_encoding
-        state_value = np.array(self.pp_title.token2id['_start_']).reshape(1, 1)
+    Inputs
+    ------
+    raw_input: str
+        The body of the issue text as an input string
 
-        decoded_sentence = []
-        stop_condition = False
-        while not stop_condition:
-            preds, st = self.decoder_model.predict([state_value, body_encoding])
+    max_len_title: int (optional)
+        The maximum length of the title the model will generate
 
-            # We are going to ignore indices 0 (padding) and indices 1 (unknown)
-            # Argmax will return the integer index corresponding to the
-            #  prediction + 2 b/c we chopped off first two
-            pred_idx = np.argmax(preds[:, :, 2:]) + 2
+    """
+    if max_len_title is None:
+      max_len_title = self.default_max_len_title
+    # get the encoder's features for the decoder
+    raw_tokenized = self.pp_body.transform([raw_input_text])
+    body_encoding = self.encoder_model.predict(raw_tokenized)
+    # we want to save the encoder's embedding before its updated by decoder
+    #   because we can use that as an embedding for other tasks.
+    original_body_encoding = body_encoding
+    state_value = np.array(self.pp_title.token2id['_start_']).reshape(1, 1)
 
-            # retrieve word from index prediction
-            pred_word_str = self.pp_title.id2token[pred_idx]
+    decoded_sentence = []
+    stop_condition = False
+    while not stop_condition:
+      preds, st = self.decoder_model.predict([state_value, body_encoding])
 
-            if pred_word_str == '_end_' or len(decoded_sentence) >= max_len_title:
-                stop_condition = True
-                break
-            decoded_sentence.append(pred_word_str)
+      # We are going to ignore indices 0 (padding) and indices 1 (unknown)
+      # Argmax will return the integer index corresponding to the
+      #  prediction + 2 b/c we chopped off first two
+      pred_idx = np.argmax(preds[:, :, 2:]) + 2
 
-            # update the decoder for the next word
-            body_encoding = st
-            state_value = np.array(pred_idx).reshape(1, 1)
+      # retrieve word from index prediction
+      pred_word_str = self.pp_title.id2token[pred_idx]
 
-        return original_body_encoding, ' '.join(decoded_sentence)
+      if pred_word_str == '_end_' or len(decoded_sentence) >= max_len_title:
+        stop_condition = True
+        break
+      decoded_sentence.append(pred_word_str)
+
+      # update the decoder for the next word
+      body_encoding = st
+      state_value = np.array(pred_idx).reshape(1, 1)
+
+    return original_body_encoding, ' '.join(decoded_sentence)
 
 
-    def print_example(self,
-                      i,
-                      body_text,
-                      title_text,
-                      url,
-                      threshold):
-        """
-        Prints an example of the model's prediction for manual inspection.
-        """
-        if i:
-            print('\n\n==============================================')
-            print(f'============== Example # {i} =================\n')
+  def print_example(self,
+                    i,
+                    body_text,
+                    title_text,
+                    url,
+                    threshold):
+    """
+    Prints an example of the model's prediction for manual inspection.
+    """
+    if i:
+      print('\n\n==============================================')
+      print('============== Example # {} =================\n'.format(i))
 
-        if url:
-            print(url)
+    if url:
+      print(url)
 
-        print(f"Issue Body:\n {body_text} \n")
+    print("Issue Body:\n {} \n".format(body_text))
 
-        if title_text:
-            print(f"Original Title:\n {title_text}")
+    if title_text:
+      print("Original Title:\n {}".format(title_text))
 
-        emb, gen_title = self.generate_issue_title(body_text)
-        print(f"\n****** Machine Generated Title (Prediction) ******:\n {gen_title}")
+    emb, gen_title = self.generate_issue_title(body_text)
+    print("\n****** Machine Generated Title (Prediction) ******:\n {}".format(gen_title))
 
-        if self.nn:
-            # return neighbors and distances
-            n, d = self.nn.get_nns_by_vector(emb.flatten(), n=4,
-                                             include_distances=True)
-            neighbors = n[1:]
-            dist = d[1:]
+    if self.nn:
+      # return neighbors and distances
+      n, d = self.nn.get_nns_by_vector(emb.flatten(), n=4,
+                                       include_distances=True)
+      neighbors = n[1:]
+      dist = d[1:]
 
-            if min(dist) <= threshold:
-                cols = ['issue_url', 'issue_title', 'body']
-                dfcopy = self.rec_df.iloc[neighbors][cols].copy(deep=True)
-                dfcopy['dist'] = dist
-                similar_issues_df = dfcopy.query(f'dist <= {threshold}')
+      if min(dist) <= threshold:
+        cols = ['issue_url', 'issue_title', 'body']
+        dfcopy = self.rec_df.iloc[neighbors][cols].copy(deep=True)
+        dfcopy['dist'] = dist
+        similar_issues_df = dfcopy.query('dist <= {}'.format(threshold))
 
-                print("\n**** Similar Issues (using encoder embedding) ****:\n")
-                display(similar_issues_df)
+        print("\n**** Similar Issues (using encoder embedding) ****:\n")
+        display(similar_issues_df)
 
 
-    def demo_model_predictions(self,
-                               n,
-                               issue_df,
-                               threshold=1):
-        """
-        Pick n random Issues and display predictions.
+  def demo_model_predictions(self,
+                             n,
+                             issue_df,
+                             threshold=1):
+    """
+    Pick n random Issues and display predictions.
 
-        Input:
-        ------
-        n : int
-            Number of issues to display from issue_df
-        issue_df : pandas DataFrame
-            DataFrame that contains two columns: `body` and `issue_title`.
-        threshold : float
-            distance threshold for recommendation of similar issues.
+    Input:
+    ------
+    n : int
+      Number of issues to display from issue_df
+    issue_df : pandas DataFrame
+      DataFrame that contains two columns: `body` and `issue_title`.
+    threshold : float
+      distance threshold for recommendation of similar issues.
 
-        Returns:
-        --------
-        None
-            Prints the original issue body and the model's prediction.
-        """
-        # Extract body and title from DF
-        body_text = issue_df.body.tolist()
-        title_text = issue_df.issue_title.tolist()
-        url = issue_df.issue_url.tolist()
+    Returns:
+    --------
+    None
+      Prints the original issue body and the model's prediction.
+    """
+    # Extract body and title from DF
+    body_text = issue_df.body.tolist()
+    title_text = issue_df.issue_title.tolist()
+    url = issue_df.issue_url.tolist()
 
-        demo_list = np.random.randint(low=1, high=len(body_text), size=n)
-        for i in demo_list:
-            self.print_example(i,
-                               body_text=body_text[i],
-                               title_text=title_text[i],
-                               url=url[i],
-                               threshold=threshold)
+    demo_list = np.random.randint(low=1, high=len(body_text), size=n)
+    for i in demo_list:
+      self.print_example(i,
+                         body_text=body_text[i],
+                         title_text=title_text[i],
+                         url=url[i],
+                         threshold=threshold)
 
-    def prepare_recommender(self, vectorized_array, original_df):
-        """
-        Use the annoy library to build recommender
+  def prepare_recommender(self, vectorized_array, original_df):
+    """
+    Use the annoy library to build recommender
 
-        Parameters
-        ----------
-        vectorized_array : List[List[int]]
-            This is the list of list of integers that represents your corpus
-            that is fed into the seq2seq model for training.
-        original_df : pandas.DataFrame
-            This is the original dataframe that has the columns
-            ['issue_url', 'issue_title', 'body']
+    Parameters
+    ----------
+    vectorized_array : List[List[int]]
+      This is the list of list of integers that represents your corpus
+      that is fed into the seq2seq model for training.
+    original_df : pandas.DataFrame
+      This is the original dataframe that has the columns
+      ['issue_url', 'issue_title', 'body']
 
-        Returns
-        -------
-        annoy.AnnoyIndex  object (see https://github.com/spotify/annoy)
-        """
-        self.rec_df = original_df
-        emb = self.encoder_model.predict(x=vectorized_array,
-                                         batch_size=vectorized_array.shape[0]//200)
+    Returns
+    -------
+    annoy.AnnoyIndex  object (see https://github.com/spotify/annoy)
+    """
+    self.rec_df = original_df
+    emb = self.encoder_model.predict(x=vectorized_array,
+                                     batch_size=vectorized_array.shape[0]//200)
 
-        f = emb.shape[1]
-        self.nn = AnnoyIndex(f)
-        logging.warning('Adding embeddings')
-        for i in tqdm(range(len(emb))):
-            self.nn.add_item(i, emb[i])
-        logging.warning('Building trees for similarity lookup.')
-        self.nn.build(50)
-        return self.nn
+    f = emb.shape[1]
+    self.nn = AnnoyIndex(f)
+    logging.warning('Adding embeddings')
+    for i in tqdm(range(len(emb))):
+      self.nn.add_item(i, emb[i])
+    logging.warning('Building trees for similarity lookup.')
+    self.nn.build(50)
+    return self.nn
 
-    def set_recsys_data(self, original_df):
-        self.rec_df = original_df
+  def set_recsys_data(self, original_df):
+    self.rec_df = original_df
 
-    def set_recsys_annoyobj(self, annoyobj):
-        self.nn = annoyobj
+  def set_recsys_annoyobj(self, annoyobj):
+    self.nn = annoyobj
 
-    def evaluate_model(self, holdout_bodies, holdout_titles):
-        """
-        Method for calculating BLEU Score.
+  def evaluate_model(self, holdout_bodies, holdout_titles):
+    """
+    Method for calculating BLEU Score.
 
-        Parameters
-        ----------
-        holdout_bodies : List[str]
-            These are the issue bodies that we want to summarize
-        holdout_titles : List[str]
-            This is the ground truth we are trying to predict --> issue titles
+    Parameters
+    ----------
+    holdout_bodies : List[str]
+      These are the issue bodies that we want to summarize
+    holdout_titles : List[str]
+      This is the ground truth we are trying to predict --> issue titles
 
-        Returns
-        -------
-        bleu : float
-            The BLEU Score
+    Returns
+    -------
+    bleu : float
+      The BLEU Score
 
-        """
-        actual, predicted = list(), list()
-        assert len(holdout_bodies) == len(holdout_titles)
-        num_examples = len(holdout_bodies)
+    """
+    actual, predicted = list(), list()
+    assert len(holdout_bodies) == len(holdout_titles)
+    num_examples = len(holdout_bodies)
 
-        logging.warning('Generating predictions.')
-        # step over the whole set TODO: parallelize this
-        for i in tqdm_notebook(range(num_examples)):
-            _, yhat = self.generate_issue_title(holdout_bodies[i])
+    logging.warning('Generating predictions.')
+    # step over the whole set TODO: parallelize this
+    for i in tqdm_notebook(range(num_examples)):
+      _, yhat = self.generate_issue_title(holdout_bodies[i])
 
-            actual.append(self.pp_title.process_text([holdout_titles[i]])[0])
-            predicted.append(self.pp_title.process_text([yhat])[0])
-            
-        # calculate BLEU score
-        logging.warning('Calculating BLEU.')
-        #must be careful with nltk api for corpus_bleu!, 
-        # expects List[List[List[str]]] for ground truth, using List[List[str]] will give you
-        # erroneous results.
-        bleu = corpus_bleu([[a] for a in actual], predicted)
-        return bleu
+      actual.append(self.pp_title.process_text([holdout_titles[i]])[0])
+      predicted.append(self.pp_title.process_text([yhat])[0])
+
+    # calculate BLEU score
+    logging.warning('Calculating BLEU.')
+    #must be careful with nltk api for corpus_bleu!,
+    # expects List[List[List[str]]] for ground truth, using List[List[str]] will give you
+    # erroneous results.
+    bleu = corpus_bleu([[a] for a in actual], predicted)
+    return bleu
diff --git a/github_issue_summarization/workflow/workspace/src/prediction.py b/github_issue_summarization/workflow/workspace/src/prediction.py
index 5539f6c0..3f11813c 100644
--- a/github_issue_summarization/workflow/workspace/src/prediction.py
+++ b/github_issue_summarization/workflow/workspace/src/prediction.py
@@ -1,8 +1,6 @@
 import argparse
 import keras
 import pandas as pd
-from seq2seq_utils import load_decoder_inputs
-from seq2seq_utils import load_encoder_inputs
 from seq2seq_utils import load_text_processor
 from seq2seq_utils import Seq2Seq_Inference
 
@@ -29,5 +27,5 @@ seq2seq_inf = Seq2Seq_Inference(encoder_preprocessor=body_pp,
                                  decoder_preprocessor=title_pp,
                                  seq2seq_model=seq2seq_Model)
 
-# Output predictions for n random rows in the test set. 
+# Output predictions for n random rows in the test set.
 seq2seq_inf.demo_model_predictions(n=args.input_prediction_count, issue_df=testdf)
diff --git a/github_issue_summarization/workflow/workspace/src/preprocess_data_for_deep_learning.py b/github_issue_summarization/workflow/workspace/src/preprocess_data_for_deep_learning.py
index 125717e0..4928d4a6 100644
--- a/github_issue_summarization/workflow/workspace/src/preprocess_data_for_deep_learning.py
+++ b/github_issue_summarization/workflow/workspace/src/preprocess_data_for_deep_learning.py
@@ -1,7 +1,7 @@
 import argparse
 import dill as dpickle
-from ktext.preprocess import processor
 import numpy as np
+from ktext.preprocess import processor
 import pandas as pd
 
 # Parsing flags.
@@ -30,7 +30,7 @@ print('Example body after pre-processing:', train_body_vecs[0])
 
 # Instantiate a text processor for the titles, with some different parameters.
 title_pp = processor(append_indicators=True, keep_n=4500,
-                     padding_maxlen=12, padding ='post')
+                     padding_maxlen=12, padding='post')
 
 # process the title data
 train_title_vecs = title_pp.fit_transform(train_title_raw)
@@ -40,10 +40,10 @@ print('Example title after pre-processing:', train_title_vecs[0])
 
 # Save the preprocessor.
 with open(args.output_body_preprocessor_dpkl, 'wb') as f:
-    dpickle.dump(body_pp, f)
+  dpickle.dump(body_pp, f)
 
 with open(args.output_title_preprocessor_dpkl, 'wb') as f:
-    dpickle.dump(title_pp, f)
+  dpickle.dump(title_pp, f)
 
 # Save the processed data.
 np.save(args.output_train_title_vecs_npy, train_title_vecs)
diff --git a/github_issue_summarization/workflow/workspace/src/process_data.py b/github_issue_summarization/workflow/workspace/src/process_data.py
index d6b27cf4..6b258a1f 100644
--- a/github_issue_summarization/workflow/workspace/src/process_data.py
+++ b/github_issue_summarization/workflow/workspace/src/process_data.py
@@ -1,6 +1,4 @@
 import argparse
-import glob
-import logging
 import pandas as pd
 from sklearn.model_selection import train_test_split
 
@@ -20,8 +18,8 @@ traindf, testdf = train_test_split(pd.read_csv(args.input_csv).sample(n=args.sam
                                    test_size=.10)
 
 # Print stats about the shape of the data.
-print(f'Train: {traindf.shape[0]:,} rows {traindf.shape[1]:,} columns')
-print(f'Test: {testdf.shape[0]:,} rows {testdf.shape[1]:,} columns')
+print('Train: {:,} rows {:,} columns'.format(traindf.shape[0], traindf.shape[1]))
+print('Test: {:,} rows {:,} columns'.format(testdf.shape[0], testdf.shape[1]))
 
 # Store output as CSV.
 traindf.to_csv(args.output_traindf_csv)
diff --git a/github_issue_summarization/workflow/workspace/src/recommend.py b/github_issue_summarization/workflow/workspace/src/recommend.py
index f755bb4f..03de81ae 100644
--- a/github_issue_summarization/workflow/workspace/src/recommend.py
+++ b/github_issue_summarization/workflow/workspace/src/recommend.py
@@ -1,8 +1,6 @@
 import argparse
 import keras
 import pandas as pd
-from seq2seq_utils import load_decoder_inputs
-from seq2seq_utils import load_encoder_inputs
 from seq2seq_utils import load_text_processor
 from seq2seq_utils import Seq2Seq_Inference
 
diff --git a/github_issue_summarization/workflow/workspace/src/seq2seq_utils.py b/github_issue_summarization/workflow/workspace/src/seq2seq_utils.py
deleted file mode 100644
index c278dfdb..00000000
--- a/github_issue_summarization/workflow/workspace/src/seq2seq_utils.py
+++ /dev/null
@@ -1,393 +0,0 @@
-from matplotlib import pyplot as plt
-import tensorflow as tf
-from keras import backend as K
-from keras.layers import Input
-from keras.models import Model
-from IPython.display import SVG, display
-from keras.utils.vis_utils import model_to_dot
-import logging
-import numpy as np
-import dill as dpickle
-from annoy import AnnoyIndex
-from tqdm import tqdm, tqdm_notebook
-from random import random
-from nltk.translate.bleu_score import corpus_bleu
-
-def load_text_processor(fname='title_pp.dpkl'):
-    """
-    Load preprocessors from disk.
-    Parameters
-    ----------
-    fname: str
-        file name of ktext.proccessor object
-    Returns
-    -------
-    num_tokens : int
-        size of vocabulary loaded into ktext.processor
-    pp : ktext.processor
-        the processor you are trying to load
-    Typical Usage:
-    -------------
-    num_decoder_tokens, title_pp = load_text_processor(fname='title_pp.dpkl')
-    num_encoder_tokens, body_pp = load_text_processor(fname='body_pp.dpkl')
-    """
-    # Load files from disk
-    with open(fname, 'rb') as f:
-        pp = dpickle.load(f)
-
-    num_tokens = max(pp.id2token.keys()) + 1
-    print(f'Size of vocabulary for {fname}: {num_tokens:,}')
-    return num_tokens, pp
-
-
-def load_decoder_inputs(decoder_np_vecs='train_title_vecs.npy'):
-    """
-    Load decoder inputs.
-    Parameters
-    ----------
-    decoder_np_vecs : str
-        filename of serialized numpy.array of decoder input (issue title)
-    Returns
-    -------
-    decoder_input_data : numpy.array
-        The data fed to the decoder as input during training for teacher forcing.
-        This is the same as `decoder_np_vecs` except the last position.
-    decoder_target_data : numpy.array
-        The data that the decoder data is trained to generate (issue title).
-        Calculated by sliding `decoder_np_vecs` one position forward.
-    """
-    vectorized_title = np.load(decoder_np_vecs)
-    # For Decoder Input, you don't need the last word as that is only for prediction
-    # when we are training using Teacher Forcing.
-    decoder_input_data = vectorized_title[:, :-1]
-
-    # Decoder Target Data Is Ahead By 1 Time Step From Decoder Input Data (Teacher Forcing)
-    decoder_target_data = vectorized_title[:, 1:]
-
-    print(f'Shape of decoder input: {decoder_input_data.shape}')
-    print(f'Shape of decoder target: {decoder_target_data.shape}')
-    return decoder_input_data, decoder_target_data
-
-
-def load_encoder_inputs(encoder_np_vecs='train_body_vecs.npy'):
-    """
-    Load variables & data that are inputs to encoder.
-    Parameters
-    ----------
-    encoder_np_vecs : str
-        filename of serialized numpy.array of encoder input (issue title)
-    Returns
-    -------
-    encoder_input_data : numpy.array
-        The issue body
-    doc_length : int
-        The standard document length of the input for the encoder after padding
-        the shape of this array will be (num_examples, doc_length)
-    """
-    vectorized_body = np.load(encoder_np_vecs)
-    # Encoder input is simply the body of the issue text
-    encoder_input_data = vectorized_body
-    doc_length = encoder_input_data.shape[1]
-    print(f'Shape of encoder input: {encoder_input_data.shape}')
-    return encoder_input_data, doc_length
-
-
-def viz_model_architecture(model):
-    """Visualize model architecture in Jupyter notebook."""
-    display(SVG(model_to_dot(model).create(prog='dot', format='svg')))
-
-
-def free_gpu_mem():
-    """Attempt to free gpu memory."""
-    K.get_session().close()
-    cfg = K.tf.ConfigProto()
-    cfg.gpu_options.allow_growth = True
-    K.set_session(K.tf.Session(config=cfg))
-
-
-def test_gpu():
-    """Run a toy computation task in tensorflow to test GPU."""
-    config = tf.ConfigProto()
-    config.gpu_options.allow_growth = True
-    session = tf.Session(config=config)
-    hello = tf.constant('Hello, TensorFlow!')
-    print(session.run(hello))
-
-
-def plot_model_training_history(history_object):
-    """Plots model train vs. validation loss."""
-    plt.title('model accuracy')
-    plt.ylabel('accuracy')
-    plt.xlabel('epoch')
-    plt.plot(history_object.history['loss'])
-    plt.plot(history_object.history['val_loss'])
-    plt.legend(['train', 'test'], loc='upper left')
-    plt.show()
-
-
-def extract_encoder_model(model):
-    """
-    Extract the encoder from the original Sequence to Sequence Model.
-    Returns a keras model object that has one input (body of issue) and one
-    output (encoding of issue, which is the last hidden state).
-    Input:
-    -----
-    model: keras model object
-    Returns:
-    -----
-    keras model object
-    """
-    encoder_model = model.get_layer('Encoder-Model')
-    return encoder_model
-
-
-def extract_decoder_model(model):
-    """
-    Extract the decoder from the original model.
-    Inputs:
-    ------
-    model: keras model object
-    Returns:
-    -------
-    A Keras model object with the following inputs and outputs:
-    Inputs of Keras Model That Is Returned:
-    1: the embedding index for the last predicted word or the <Start> indicator
-    2: the last hidden state, or in the case of the first word the hidden state from the encoder
-    Outputs of Keras Model That Is Returned:
-    1.  Prediction (class probabilities) for the next word
-    2.  The hidden state of the decoder, to be fed back into the decoder at the next time step
-    Implementation Notes:
-    ----------------------
-    Must extract relevant layers and reconstruct part of the computation graph
-    to allow for different inputs as we are not going to use teacher forcing at
-    inference time.
-    """
-    # the latent dimension is the same throughout the architecture so we are going to
-    # cheat and grab the latent dimension of the embedding because that is the same as what is
-    # output from the decoder
-    latent_dim = model.get_layer('Decoder-Word-Embedding').output_shape[-1]
-
-    # Reconstruct the input into the decoder
-    decoder_inputs = model.get_layer('Decoder-Input').input
-    dec_emb = model.get_layer('Decoder-Word-Embedding')(decoder_inputs)
-    dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb)
-
-    # Instead of setting the intial state from the encoder and forgetting about it, during inference
-    # we are not doing teacher forcing, so we will have to have a feedback loop from predictions back into
-    # the GRU, thus we define this input layer for the state so we can add this capability
-    gru_inference_state_input = Input(shape=(latent_dim,), name='hidden_state_input')
-
-    # we need to reuse the weights that is why we are getting this
-    # If you inspect the decoder GRU that we created for training, it will take as input
-    # 2 tensors -> (1) is the embedding layer output for the teacher forcing
-    #                  (which will now be the last step's prediction, and will be _start_ on the first time step)
-    #              (2) is the state, which we will initialize with the encoder on the first time step, but then
-    #                   grab the state after the first prediction and feed that back in again.
-    gru_out, gru_state_out = model.get_layer('Decoder-GRU')([dec_bn, gru_inference_state_input])
-
-    # Reconstruct dense layers
-    dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out)
-    dense_out = model.get_layer('Final-Output-Dense')(dec_bn2)
-    decoder_model = Model([decoder_inputs, gru_inference_state_input],
-                          [dense_out, gru_state_out])
-    return decoder_model
-
-
-class Seq2Seq_Inference(object):
-    def __init__(self,
-                 encoder_preprocessor,
-                 decoder_preprocessor,
-                 seq2seq_model):
-
-        self.pp_body = encoder_preprocessor
-        self.pp_title = decoder_preprocessor
-        self.seq2seq_model = seq2seq_model
-        self.encoder_model = extract_encoder_model(seq2seq_model)
-        self.decoder_model = extract_decoder_model(seq2seq_model)
-        self.default_max_len_title = self.pp_title.padding_maxlen
-        self.nn = None
-        self.rec_df = None
-
-    def generate_issue_title(self,
-                             raw_input_text,
-                             max_len_title=None):
-        """
-        Use the seq2seq model to generate a title given the body of an issue.
-        Inputs
-        ------
-        raw_input: str
-            The body of the issue text as an input string
-        max_len_title: int (optional)
-            The maximum length of the title the model will generate
-        """
-        if max_len_title is None:
-            max_len_title = self.default_max_len_title
-        # get the encoder's features for the decoder
-        raw_tokenized = self.pp_body.transform([raw_input_text])
-        body_encoding = self.encoder_model.predict(raw_tokenized)
-        # we want to save the encoder's embedding before its updated by decoder
-        #   because we can use that as an embedding for other tasks.
-        original_body_encoding = body_encoding
-        state_value = np.array(self.pp_title.token2id['_start_']).reshape(1, 1)
-
-        decoded_sentence = []
-        stop_condition = False
-        while not stop_condition:
-            preds, st = self.decoder_model.predict([state_value, body_encoding])
-
-            # We are going to ignore indices 0 (padding) and indices 1 (unknown)
-            # Argmax will return the integer index corresponding to the
-            #  prediction + 2 b/c we chopped off first two
-            pred_idx = np.argmax(preds[:, :, 2:]) + 2
-
-            # retrieve word from index prediction
-            pred_word_str = self.pp_title.id2token[pred_idx]
-
-            if pred_word_str == '_end_' or len(decoded_sentence) >= max_len_title:
-                stop_condition = True
-                break
-            decoded_sentence.append(pred_word_str)
-
-            # update the decoder for the next word
-            body_encoding = st
-            state_value = np.array(pred_idx).reshape(1, 1)
-
-        return original_body_encoding, ' '.join(decoded_sentence)
-
-
-    def print_example(self,
-                      i,
-                      body_text,
-                      title_text,
-                      url,
-                      threshold):
-        """
-        Prints an example of the model's prediction for manual inspection.
-        """
-        if i:
-            print('\n\n==============================================')
-            print(f'============== Example # {i} =================\n')
-
-        if url:
-            print(url)
-
-        print(f"Issue Body:\n {body_text} \n")
-
-        if title_text:
-            print(f"Original Title:\n {title_text}")
-
-        emb, gen_title = self.generate_issue_title(body_text)
-        print(f"\n****** Machine Generated Title (Prediction) ******:\n {gen_title}")
-
-        if self.nn:
-            # return neighbors and distances
-            n, d = self.nn.get_nns_by_vector(emb.flatten(), n=4,
-                                             include_distances=True)
-            neighbors = n[1:]
-            dist = d[1:]
-
-            if min(dist) <= threshold:
-                cols = ['issue_url', 'issue_title', 'body']
-                dfcopy = self.rec_df.iloc[neighbors][cols].copy(deep=True)
-                dfcopy['dist'] = dist
-                similar_issues_df = dfcopy.query(f'dist <= {threshold}')
-
-                print("\n**** Similar Issues (using encoder embedding) ****:\n")
-                display(similar_issues_df)
-
-
-    def demo_model_predictions(self,
-                               n,
-                               issue_df,
-                               threshold=1):
-        """
-        Pick n random Issues and display predictions.
-        Input:
-        ------
-        n : int
-            Number of issues to display from issue_df
-        issue_df : pandas DataFrame
-            DataFrame that contains two columns: `body` and `issue_title`.
-        threshold : float
-            distance threshold for recommendation of similar issues.
-        Returns:
-        --------
-        None
-            Prints the original issue body and the model's prediction.
-        """
-        # Extract body and title from DF
-        body_text = issue_df.body.tolist()
-        title_text = issue_df.issue_title.tolist()
-        url = issue_df.issue_url.tolist()
-
-        demo_list = np.random.randint(low=1, high=len(body_text), size=n)
-        for i in demo_list:
-            self.print_example(i,
-                               body_text=body_text[i],
-                               title_text=title_text[i],
-                               url=url[i],
-                               threshold=threshold)
-
-    def prepare_recommender(self, vectorized_array, original_df):
-        """
-        Use the annoy library to build recommender
-        Parameters
-        ----------
-        vectorized_array : List[List[int]]
-            This is the list of list of integers that represents your corpus
-            that is fed into the seq2seq model for training.
-        original_df : pandas.DataFrame
-            This is the original dataframe that has the columns
-            ['issue_url', 'issue_title', 'body']
-        Returns
-        -------
-        annoy.AnnoyIndex  object (see https://github.com/spotify/annoy)
-        """
-        self.rec_df = original_df
-        emb = self.encoder_model.predict(x=vectorized_array,
-                                         batch_size=vectorized_array.shape[0]//200)
-
-        f = emb.shape[1]
-        self.nn = AnnoyIndex(f)
-        logging.warning('Adding embeddings')
-        for i in tqdm(range(len(emb))):
-            self.nn.add_item(i, emb[i])
-        logging.warning('Building trees for similarity lookup.')
-        self.nn.build(50)
-        return self.nn
-
-    def set_recsys_data(self, original_df):
-        self.rec_df = original_df
-
-    def set_recsys_annoyobj(self, annoyobj):
-        self.nn = annoyobj
-
-    def evaluate_model(self, holdout_bodies, holdout_titles):
-        """
-        Method for calculating BLEU Score.
-        Parameters
-        ----------
-        holdout_bodies : List[str]
-            These are the issue bodies that we want to summarize
-        holdout_titles : List[str]
-            This is the ground truth we are trying to predict --> issue titles
-        Returns
-        -------
-        bleu : float
-            The BLEU Score
-        """
-        actual, predicted = list(), list()
-        assert len(holdout_bodies) == len(holdout_titles)
-        num_examples = len(holdout_bodies)
-
-        logging.warning('Generating predictions.')
-        # step over the whole set TODO: parallelize this
-        for i in tqdm_notebook(range(num_examples)):
-            _, yhat = self.generate_issue_title(holdout_bodies[i])
-
-            actual.append(self.pp_title.process_text([holdout_titles[i]])[0])
-            predicted.append(self.pp_title.process_text([yhat])[0])
-        # calculate BLEU score
-        logging.warning('Calculating BLEU.')
-        bleu = corpus_bleu(actual, predicted)
-        return bleu
diff --git a/github_issue_summarization/workflow/workspace/src/train.py b/github_issue_summarization/workflow/workspace/src/train.py
index 0969019a..87dc89ca 100644
--- a/github_issue_summarization/workflow/workspace/src/train.py
+++ b/github_issue_summarization/workflow/workspace/src/train.py
@@ -1,11 +1,10 @@
 import argparse
+import numpy as np
 from keras.callbacks import CSVLogger, ModelCheckpoint
-from keras.layers import Input, LSTM, GRU, Dense, Embedding, Bidirectional, BatchNormalization
+from keras.layers import Input, GRU, Dense, Embedding, BatchNormalization
 from keras.models import Model
 from keras import optimizers
-import numpy as np
 from seq2seq_utils import load_decoder_inputs, load_encoder_inputs, load_text_processor
-from seq2seq_utils import viz_model_architecture
 
 # Parsing flags.
 parser = argparse.ArgumentParser()
@@ -18,7 +17,7 @@ parser.add_argument("--learning_rate", default="0.001")
 args = parser.parse_args()
 print(args)
 
-learning_rate=float(args.learning_rate)
+learning_rate = float(args.learning_rate)
 
 encoder_input_data, doc_length = load_encoder_inputs(args.input_train_body_vecs_npy)
 decoder_input_data, decoder_target_data = load_decoder_inputs(args.input_train_title_vecs_npy)
@@ -35,7 +34,10 @@ latent_dim = 300
 encoder_inputs = Input(shape=(doc_length,), name='Encoder-Input')
 
 # Word embeding for encoder (ex: Issue Body)
-x = Embedding(num_encoder_tokens, latent_dim, name='Body-Word-Embedding', mask_zero=False)(encoder_inputs)
+x = Embedding(num_encoder_tokens,
+              latent_dim,
+              name='Body-Word-Embedding',
+              mask_zero=False)(encoder_inputs)
 x = BatchNormalization(name='Encoder-Batchnorm-1')(x)
 
 # We do not need the `encoder_output` just the hidden state.
@@ -53,7 +55,10 @@ seq2seq_encoder_out = encoder_model(encoder_inputs)
 decoder_inputs = Input(shape=(None,), name='Decoder-Input')  # for teacher forcing
 
 # Word Embedding For Decoder (ex: Issue Titles)
-dec_emb = Embedding(num_decoder_tokens, latent_dim, name='Decoder-Word-Embedding', mask_zero=False)(decoder_inputs)
+dec_emb = Embedding(num_decoder_tokens,
+                    latent_dim,
+                    name='Decoder-Word-Embedding',
+                    mask_zero=False)(decoder_inputs)
 dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)
 
 # Set up the decoder, using `decoder_state_input` as initial state.
@@ -71,21 +76,24 @@ decoder_outputs = decoder_dense(x)
 
 seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
 
-seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=learning_rate), loss='sparse_categorical_crossentropy')
+seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=learning_rate),
+                      loss='sparse_categorical_crossentropy')
 
 seq2seq_Model.summary()
 
 script_name_base = 'tutorial_seq2seq'
 csv_logger = CSVLogger('{:}.log'.format(script_name_base))
-model_checkpoint = ModelCheckpoint('{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base),
-                                   save_best_only=True)
+model_checkpoint = ModelCheckpoint(
+    '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base), save_best_only=True)
 
 batch_size = 1200
 epochs = 7
-history = seq2seq_Model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_target_data, -1),
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_split=0.12, callbacks=[csv_logger, model_checkpoint])
+history = seq2seq_Model.fit([encoder_input_data, decoder_input_data],
+                            np.expand_dims(decoder_target_data, -1),
+                            batch_size=batch_size,
+                            epochs=epochs,
+                            validation_split=0.12,
+                            callbacks=[csv_logger, model_checkpoint])
 
 #############
 # Save model.