From e94d0838987597e69199ec24e00a8ed9ef07b0ce Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Wed, 19 Oct 2022 03:55:54 +0800
Subject: [PATCH] [e2e] Support dump mode (#6850)

FEATURE
PERF
* [e2e] Support dump mode

Dump mode enchances model debug mode in three ways:
1. Support whole model dump in different dumpLevel: 0, dump close diffs. 1, dump any diffs. 2, dump all.
2. Support ops dump.
3. Support dumpLength: -1 means all.

Dump works in two steps:
1. Dump tensors into files according to dumpLevel. These dump files start with
"dumpmodel_".
2. When tensor diffs spotted, apply below to each tensor related op: use
the reference as input, run the op again under predict backend. Then
dump all the results into files. These dump files start with "dumpops_"

Example url parameter to turn on dump mode:
KEEP_INTERMEDIATE_TENSORS=true&dumpLevel=1&dumpLength=-1

Bug = https://github.com/tensorflow/tfjs/issues/6860

* Clean model config

* Clean

* Dump ops when diff occurs

* Fix comments

* Refine compare and nit

* Fix comments

* Clean unused var

* Fix comments

* Nit

* Fix comments

Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com>
---
 e2e/benchmarks/local-benchmark/dump.js    | 228 ++++++++++++++++++++++
 e2e/benchmarks/local-benchmark/index.html |  53 ++---
 e2e/benchmarks/local-benchmark/loader.js  |   1 +
 e2e/benchmarks/local-benchmark/util.js    |  22 ++-
 e2e/benchmarks/model_config.js            |  16 +-
 5 files changed, 279 insertions(+), 41 deletions(-)
 create mode 100644 e2e/benchmarks/local-benchmark/dump.js

diff --git a/e2e/benchmarks/local-benchmark/dump.js b/e2e/benchmarks/local-benchmark/dump.js
new file mode 100644
index 000000000..eb6dfb17c
--- /dev/null
+++ b/e2e/benchmarks/local-benchmark/dump.js
@@ -0,0 +1,228 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+/**
+ * DUMP_LEVEL.BIGDIFF: dumping when difference is greater than the default
+ * epsilon. DUMP_LEVEL.ANYDIFF: dumping when difference is greater than 0.
+ */
+const DUMP_LEVEL = {
+  BIGDIFF: 0,
+  ANYDIFF: 1,
+};
+
+function compareData(data1, data2, level = DUMP_LEVEL.BIGDIFF) {
+  let epsilon = level == DUMP_LEVEL.ANYDIFF ? 0 : -1;
+  let match = true;
+  try {
+    expectObjectsClose(data1, data2, epsilon);
+  } catch (e) {
+    match = false;
+  }
+  return match;
+}
+
+function getGraphModel(model) {
+  if (model instanceof tf.GraphModel) {
+    return model;
+  } else if (model.model instanceof tf.GraphModel) {
+    return model.model;
+  } else if (
+      model.baseModel && model.baseModel.model instanceof tf.GraphModel) {
+    return model.baseModel.model;
+  } else {
+    console.warn(`Model doesn't support dump!`);
+    return null;
+  }
+}
+
+async function getIntermediateTensorInfo(tensorsMap) {
+  if (!tensorsMap) {
+    return;
+  }
+  const jsonObject = {};
+  const keysOfTensors = Object.keys(tensorsMap);
+  for (let i = 0; i < keysOfTensors.length; i++) {
+    const key = keysOfTensors[i];
+    jsonObject[key] = [];
+    for (let j = 0; j < tensorsMap[key].length; j++) {
+      if (tensorsMap[key][j] == null) {
+        continue;
+      }
+      // For universal-sentence-encoder, its inputs are disposed by model.
+      try {
+        const data = await (tensorsMap[key][j]).data();
+        jsonObject[key].push({
+          value: data,
+          shape: tensorsMap[key][j].shape,
+          dtype: tensorsMap[key][j].dtype
+        });
+      } catch (e) {
+        console.error(`${keysOfTensors[i]} ` + e.message);
+      }
+    }
+  }
+  return jsonObject;
+}
+
+async function saveObjectsToFile(jsonObjects, prefix) {
+  let newPrefix = '';
+  if (prefix !== '') {
+    newPrefix = `${prefix.replace(/\//g, '-')}_`;
+  }
+  const backends = Object.keys(jsonObjects);
+  if (Object.keys(jsonObjects[backends[0]]).length == 0) {
+    return;
+  }
+  for (let i = 0; i < backends.length; i++) {
+    const object = jsonObjects[backends[i]];
+    const fileName = `${newPrefix}${backends[i]}.json`;
+    const a = document.createElement('a');
+    const file = new Blob([JSON.stringify(object)], {type: 'application/json'});
+    a.href = URL.createObjectURL(file);
+    a.download = fileName;
+    a.click();
+    // This log informs tools file has been saved.
+    console.log(fileName);
+  }
+}
+
+/**
+ * Create a NamedTensorMap from an output node name.
+ * @param outputNodeName Output node name.
+ * @param modelJson The parsed model.json.
+ * @param dumpedJson The dumped tensor infomation (including shape, dtype,
+ *     value).
+ *
+ * @returns A NamedTensorMap.
+ */
+async function createNamedTensorMap(outputNodeName, modelJson, dumpedJson) {
+  const modelNodes = modelJson['modelTopology']['node'];
+  let inputs = [];
+  for (let i = 0; i < modelNodes.length; i++) {
+    if (outputNodeName === modelNodes[i].name && modelNodes[i].input) {
+      inputs = modelNodes[i].input;
+      break;
+    }
+  }
+  // In
+  // https://storage.googleapis.com/tfhub-tfjs-modules/mediapipe/tfjs-model/face_landmarks_detection/attention_mesh/1/model.json,
+  // some inputs are prefixed with '^'.
+  if (!inputs || inputs.length == 0 || inputs[0].startsWith('^')) {
+    return null;
+  }
+
+  let tensorMap = {};
+  for (let i = 0; i < inputs.length; i++) {
+    const key = inputs[i].split(':')[0];
+    if (dumpedJson[key] == null || dumpedJson[key][0] == null) {
+      console.warn('Tensor ' + key + ' is null!');
+      return null;
+    }
+    const tensorInfo = dumpedJson[key][0];
+    const tensor = tf.tensor(
+        Object.values(tensorInfo.value), tensorInfo.shape, tensorInfo.dtype);
+    tensorMap[key] = tensor;
+  }
+
+  return tensorMap;
+}
+
+async function predictOp(
+    model, modelJson, dumpedJson, outputNodeName, backend) {
+  await tf.setBackend(backend);
+  const tensorMap =
+      await createNamedTensorMap(outputNodeName, modelJson, dumpedJson);
+  if (tensorMap == null) {
+    return null;
+  }
+  let prediction;
+  let savedKeepIntermediateTensors;
+  try {
+    savedKeepIntermediateTensors =
+        tf.env().getBool('KEEP_INTERMEDIATE_TENSORS');
+    tf.env().set('KEEP_INTERMEDIATE_TENSORS', false);
+  } catch (e) {
+    console.warn(e.message);
+  }
+  try {
+    // TODO(#6861): Support tensor with type conversion.
+    prediction = await model.executeAsync(tensorMap, outputNodeName);
+  } catch (e) {
+    tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
+    console.warn(e.message);
+    return null;
+  }
+
+  const predictOpObject = await getPredictionData(prediction, true);
+  tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
+  return predictOpObject;
+}
+
+/**
+ * Dump the predict results of two backends and save diffs to files.
+ * @param model The loaded model.
+ * @param input The actual and expected results from different backends.
+ * @param prefix Used for generating dump file name.
+ * @param level 0, dump big diffs. 1, dump any diffs.
+ * @param length Used for controlling how many tensors will be dumped. -1 dump
+ *     all.
+ */
+async function dump(
+    model, input, prefix = '', level = DUMP_LEVEL.BIGDIFF, length = 1) {
+  const graphModel = getGraphModel(model);
+  if (graphModel == null || length == 0) {
+    return;
+  }
+  const backends = Object.keys(input);
+  const actualObject = input[backends[0]];
+  const expectedObject = input[backends[1]];
+  const dumpActualObject = {};
+  const dumpExpectedObject = {};
+  const keys = Object.keys(actualObject);
+  prefix = `dump_${prefix}_${level}`;
+  let dumpCount = 0;
+  const modelJson = graphModel.artifacts;
+  for (let i = 0; i < keys.length; i++) {
+    const key = keys[i];
+    if (compareData(actualObject[key], expectedObject[key], level)) {
+      continue;
+    }
+    const predictOpObject = await predictOp(
+        graphModel, modelJson, expectedObject, key, backends[0]);
+    const [actualOpObject, expectedOpObject] = predictOpObject ?
+        [{...predictOpObject, i}, {...expectedObject[key], i}] :
+        [null, null];
+    if (compareData(actualOpObject, expectedOpObject, level)) {
+      continue;
+    }
+    if (actualOpObject && expectedOpObject) {
+      dumpActualObject[key] = actualOpObject;
+      dumpExpectedObject[key] = expectedOpObject;
+      dumpCount++;
+    }
+    // Break when diff count equals dumpLength to avoid downloading large file.
+    if (length != -1 && dumpCount == length) {
+      break;
+    }
+  }
+  const dumpData =
+      {[backends[0]]: dumpActualObject, [backends[1]]: dumpExpectedObject};
+  await saveObjectsToFile(dumpData, prefix);
+  if (dumpCount) {
+    console.log(`Total dumped ${dumpCount} item(s).`);
+  }
+}
diff --git a/e2e/benchmarks/local-benchmark/index.html b/e2e/benchmarks/local-benchmark/index.html
index 2c4d7dfa6..10d6b71dc 100644
--- a/e2e/benchmarks/local-benchmark/index.html
+++ b/e2e/benchmarks/local-benchmark/index.html
@@ -171,27 +171,18 @@ limitations under the License.
       }
     }
 
-    async function printTensors(tensorsMap) {
-      if (!tensorsMap) {
-        return;
-      }
-      const keysOfTensors = Object.keys(tensorsMap);
-      for (let i = 0; i < keysOfTensors.length; i++) {
-        console.warn(keysOfTensors[i]);
-        for (let j = 0; j < tensorsMap[keysOfTensors[i]].length; j++) {
-          console.warn(await (tensorsMap[keysOfTensors[i]][j]).data());
+    async function predictAndGetData(predict, model, inferenceInput, enableDump) {
+      const prediction = await predict(model, inferenceInput);
+      let intermediateData = {};
+      if (enableDump) {
+        const graphModel = getGraphModel(model);
+        if (graphModel) {
+          intermediateData = await getIntermediateTensorInfo(graphModel.getIntermediateTensors());
+          graphModel.disposeIntermediateTensors();
         }
       }
-    }
-
-    async function predictAndGetPredictionData(predict, model, inferenceInput, debug) {
-      const prediction = await predict(model, inferenceInput);
-      if (debug) {
-        await printTensors(model.getIntermediateTensors());
-        model.disposeIntermediateTensors();
-      }
       const predictionData = await getPredictionData(prediction);
-      return predictionData;
+      return {data: predictionData, intermediateData};
     }
 
     const state = {
@@ -218,19 +209,20 @@ limitations under the License.
           await showGpuInfo();
         }
 
-        let match, predictionData, referenceData;
+        let match, actualData, expectedData;
         await cleanUpTable();
 
         // load model and run inference
         try {
-          tf.setBackend('cpu');
+          const expectedBackend = 'cpu';
+          tf.setBackend(expectedBackend);
           await loadModelAndRecordTime();
           await showMsg('Testing correctness');
           await showInputs();
           await showCorrectnessTestParameters();
 
           let inferenceInput;
-          await showMsg('Runing on cpu');
+          await showMsg(`Runing on ${expectedBackend}`);
           if (state.benchmark === 'custom') {
             inferenceInput = generateInputFromDef(
               state.inputs, model instanceof tf.GraphModel);
@@ -243,8 +235,9 @@ limitations under the License.
             console.warn(e.message);
           }
 
-          const debug = keepIntermediateTensors & (benchmarks[state.benchmark].supportDebug !== false);
-          referenceData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
+          const enableDump = keepIntermediateTensors & (benchmarks[state.benchmark].supportDump !== false);
+          const expectedResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
+          expectedData = expectedResult['data'];
 
           await tf.setBackend(state.backend);
           await showMsg(`Runing on ${state.backend}`);
@@ -257,8 +250,18 @@ limitations under the License.
             tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', true);
           }
 
-          predictionData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
+          const actualResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
+          actualData = actualResult['data'];
+          if (enableDump) {
+            const actualIntermediateObject = actualResult['intermediateData'];
+            const expectedIntermediateObject = expectedResult['intermediateData'];
 
+            const dumpLevel = urlState.has('dumpLevel') ? Number(urlState.get('dumpLevel')) : 0;
+            const dumpLength = urlState.has('dumpLength') ? Number(urlState.get('dumpLength')) : 1;
+            const dumpPrefix = state.benchmark + '_'+ state.architecture + '_' + state.inputType + '_' + state.inputSize;
+            const dumpInput = {[state.backend] : actualIntermediateObject, [expectedBackend] : expectedIntermediateObject};
+            await dump(model, dumpInput, dumpPrefix, dumpLevel, dumpLength);
+          }
           if (state.backend === 'webgl' || state.backend === 'webgpu') {
             tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', savedWillReadFrequently);
           }
@@ -270,7 +273,7 @@ limitations under the License.
         // compare results
         try {
           await showMsg(null);
-          expectObjectsClose(predictionData, referenceData);
+          expectObjectsClose(actualData, expectedData);
           match = true;
         } catch (e) {
           match = false;
diff --git a/e2e/benchmarks/local-benchmark/loader.js b/e2e/benchmarks/local-benchmark/loader.js
index 6697d38e9..e2e132bd5 100644
--- a/e2e/benchmarks/local-benchmark/loader.js
+++ b/e2e/benchmarks/local-benchmark/loader.js
@@ -68,6 +68,7 @@ async function loadTFJS(localBuild) {
     '../benchmark_util.js',
     './util.js',
     './index.js',
+    './dump.js',
   ]);
 
   for (let url of urls) {
diff --git a/e2e/benchmarks/local-benchmark/util.js b/e2e/benchmarks/local-benchmark/util.js
index f4ff6b39a..b6eae0e67 100644
--- a/e2e/benchmarks/local-benchmark/util.js
+++ b/e2e/benchmarks/local-benchmark/util.js
@@ -15,29 +15,35 @@
  * =============================================================================
  */
 
-async function convertTensorToData(tensor) {
+
+async function convertTensorToData(tensor, needInfo = false) {
   const data = await tensor.data();
+
   tensor.dispose();
+  if (needInfo) {
+    return {value: data, shape: tensor.shape, dtype: tensor.dtype};
+  }
   return data;
 }
 
-async function getPredictionData(output) {
+async function getPredictionData(output, needInfo = false) {
   if (output instanceof Promise) {
     output = await output;
   }
 
   if (output instanceof tf.Tensor) {
-    output = await convertTensorToData(output);
+    output = [await convertTensorToData(output, needInfo)];
   } else if (Array.isArray(output)) {
     for (let i = 0; i < output.length; i++) {
       if (output[i] instanceof tf.Tensor) {
-        output[i] = await convertTensorToData(output[i]);
+        output[i] = await convertTensorToData(output[i], needInfo);
       }
     }
   } else if (output != null && typeof output === 'object') {
     for (const property in output) {
       if (output[property] instanceof tf.Tensor) {
-        output[property] = await convertTensorToData(output[property]);
+        output[property] =
+            await convertTensorToData(output[property], needInfo);
       }
     }
   }
@@ -117,8 +123,8 @@ function expectObjectsPredicate(actual, expected, epsilon, predicate) {
   return true;
 }
 
-function expectObjectsClose(actual, expected, epsilon) {
-  if (epsilon == null) {
+function expectObjectsClose(actual, expected, epsilon = -1) {
+  if (epsilon === -1) {
     epsilon = tf.test_util.testEpsilon();
   }
   expectObjectsPredicate(
@@ -159,7 +165,7 @@ function expectArraysPredicateFuzzy(actual, expected, predicate, errorRate) {
 
 // TODO: support relative comparison for array.
 function expectArraysClose(actual, expected, epsilon, key) {
-  if (epsilon == null) {
+  if (epsilon === -1) {
     epsilon = tf.test_util.testEpsilon();
   }
 
diff --git a/e2e/benchmarks/model_config.js b/e2e/benchmarks/model_config.js
index 06b3d22b8..7853b3437 100644
--- a/e2e/benchmarks/model_config.js
+++ b/e2e/benchmarks/model_config.js
@@ -231,7 +231,7 @@ const benchmarks = {
   'Coco-SSD': {
     type: 'GraphModel',
     // The model has has the dynamic ops, so it is supposed to use executeAsync.
-    supportDebug: false,
+    supportDump: false,
     architectures: ['MobileNetV2', 'MobileNetV1', 'liteMobileNetV2'],
     load: async (inputResolution = 227, modelArchitecture = 'MobileNetV2') => {
       const tfliteBased = modelArchitecture.split('MobileNetV')[0];
@@ -327,7 +327,7 @@ const benchmarks = {
   },
   'AutoML Image': {
     type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
     load: async () => {
       const url =
           'https://storage.googleapis.com/tfjs-testing/tfjs-automl/img_classification/model.json';
@@ -340,7 +340,7 @@ const benchmarks = {
   },
   'AutoML Object': {
     type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
     load: async () => {
       const url =
           'https://storage.googleapis.com/tfjs-testing/tfjs-automl/object_detection/model.json';
@@ -355,7 +355,7 @@ const benchmarks = {
   },
   'USE - batchsize 30': {
     type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
     load: async () => {
       return use.load();
     },
@@ -369,7 +369,7 @@ const benchmarks = {
   },
   'USE - batchsize 1': {
     type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
     load: async () => {
       return use.load();
     },
@@ -384,7 +384,7 @@ const benchmarks = {
   'TextToxicity': {
     type: 'GraphModel',
     // The model has has the dynamic ops, so it is supposed to use executeAsync.
-    supportDebug: false,
+    supportDump: false,
     load: async () => {
       const url =
           'https://storage.googleapis.com/tfhub-tfjs-modules/tensorflow/tfjs-model/toxicity/1/default/1/model.json';
@@ -425,7 +425,7 @@ const benchmarks = {
     inputSizes: [128, 256, 512, 1024],
     architectures: ['MobileNetV1', 'ResNet50'],
     inputTypes: ['image', 'tensor'],
-    supportDebug: false,
+    supportDump: false,
     load: async (
         inputResolution = 128, modelArchitecture = 'MobileNetV1',
         inputType = 'image') => {
@@ -461,7 +461,7 @@ const benchmarks = {
   },
   'bodypix': {
     type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
     // The ratio to the default camera size [480, 640].
     inputSizes: [0.25, 0.5, 0.75, 1.0],
     architectures: ['ResNet50'],