[e2e] Support dump mode (#6850)

FEATURE PERF * [e2e] Support dump mode Dump mode enchances model debug mode in three ways: 1. Support whole model dump in different dumpLevel: 0, dump close diffs. 1, dump any diffs. 2, dump all. 2. Support ops dump. 3. Support dumpLength: -1 means all. Dump works in two steps: 1. Dump tensors into files according to dumpLevel. These dump files start with "dumpmodel_". 2. When tensor diffs spotted, apply below to each tensor related op: use the reference as input, run the op again under predict backend. Then dump all the results into files. These dump files start with "dumpops_" Example url parameter to turn on dump mode: KEEP_INTERMEDIATE_TENSORS=true&dumpLevel=1&dumpLength=-1 Bug = https://github.com/tensorflow/tfjs/issues/6860 * Clean model config * Clean * Dump ops when diff occurs * Fix comments * Refine compare and nit * Fix comments * Clean unused var * Fix comments * Nit * Fix comments Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com>
2022-10-19 03:55:54 +08:00 · 2022-10-19 03:55:54 +08:00 · e94d083898
parent 99f6da4a1e
commit e94d083898
5 changed files with 279 additions and 41 deletions
--- a/e2e/benchmarks/local-benchmark/dump.js
+++ b/e2e/benchmarks/local-benchmark/dump.js
@ -0,0 +1,228 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+/**
+ * DUMP_LEVEL.BIGDIFF: dumping when difference is greater than the default
+ * epsilon. DUMP_LEVEL.ANYDIFF: dumping when difference is greater than 0.
+ */
+const DUMP_LEVEL = {
+  BIGDIFF: 0,
+  ANYDIFF: 1,
+};
+
+function compareData(data1, data2, level = DUMP_LEVEL.BIGDIFF) {
+  let epsilon = level == DUMP_LEVEL.ANYDIFF ? 0 : -1;
+  let match = true;
+  try {
+    expectObjectsClose(data1, data2, epsilon);
+  } catch (e) {
+    match = false;
+  }
+  return match;
+}
+
+function getGraphModel(model) {
+  if (model instanceof tf.GraphModel) {
+    return model;
+  } else if (model.model instanceof tf.GraphModel) {
+    return model.model;
+  } else if (
+      model.baseModel && model.baseModel.model instanceof tf.GraphModel) {
+    return model.baseModel.model;
+  } else {
+    console.warn(`Model doesn't support dump!`);
+    return null;
+  }
+}
+
+async function getIntermediateTensorInfo(tensorsMap) {
+  if (!tensorsMap) {
+    return;
+  }
+  const jsonObject = {};
+  const keysOfTensors = Object.keys(tensorsMap);
+  for (let i = 0; i < keysOfTensors.length; i++) {
+    const key = keysOfTensors[i];
+    jsonObject[key] = [];
+    for (let j = 0; j < tensorsMap[key].length; j++) {
+      if (tensorsMap[key][j] == null) {
+        continue;
+      }
+      // For universal-sentence-encoder, its inputs are disposed by model.
+      try {
+        const data = await (tensorsMap[key][j]).data();
+        jsonObject[key].push({
+          value: data,
+          shape: tensorsMap[key][j].shape,
+          dtype: tensorsMap[key][j].dtype
+        });
+      } catch (e) {
+        console.error(`${keysOfTensors[i]} ` + e.message);
+      }
+    }
+  }
+  return jsonObject;
+}
+
+async function saveObjectsToFile(jsonObjects, prefix) {
+  let newPrefix = '';
+  if (prefix !== '') {
+    newPrefix = `${prefix.replace(/\//g, '-')}_`;
+  }
+  const backends = Object.keys(jsonObjects);
+  if (Object.keys(jsonObjects[backends[0]]).length == 0) {
+    return;
+  }
+  for (let i = 0; i < backends.length; i++) {
+    const object = jsonObjects[backends[i]];
+    const fileName = `${newPrefix}${backends[i]}.json`;
+    const a = document.createElement('a');
+    const file = new Blob([JSON.stringify(object)], {type: 'application/json'});
+    a.href = URL.createObjectURL(file);
+    a.download = fileName;
+    a.click();
+    // This log informs tools file has been saved.
+    console.log(fileName);
+  }
+}
+
+/**
+ * Create a NamedTensorMap from an output node name.
+ * @param outputNodeName Output node name.
+ * @param modelJson The parsed model.json.
+ * @param dumpedJson The dumped tensor infomation (including shape, dtype,
+ *     value).
+ *
+ * @returns A NamedTensorMap.
+ */
+async function createNamedTensorMap(outputNodeName, modelJson, dumpedJson) {
+  const modelNodes = modelJson['modelTopology']['node'];
+  let inputs = [];
+  for (let i = 0; i < modelNodes.length; i++) {
+    if (outputNodeName === modelNodes[i].name && modelNodes[i].input) {
+      inputs = modelNodes[i].input;
+      break;
+    }
+  }
+  // In
+  // https://storage.googleapis.com/tfhub-tfjs-modules/mediapipe/tfjs-model/face_landmarks_detection/attention_mesh/1/model.json,
+  // some inputs are prefixed with '^'.
+  if (!inputs || inputs.length == 0 || inputs[0].startsWith('^')) {
+    return null;
+  }
+
+  let tensorMap = {};
+  for (let i = 0; i < inputs.length; i++) {
+    const key = inputs[i].split(':')[0];
+    if (dumpedJson[key] == null || dumpedJson[key][0] == null) {
+      console.warn('Tensor ' + key + ' is null!');
+      return null;
+    }
+    const tensorInfo = dumpedJson[key][0];
+    const tensor = tf.tensor(
+        Object.values(tensorInfo.value), tensorInfo.shape, tensorInfo.dtype);
+    tensorMap[key] = tensor;
+  }
+
+  return tensorMap;
+}
+
+async function predictOp(
+    model, modelJson, dumpedJson, outputNodeName, backend) {
+  await tf.setBackend(backend);
+  const tensorMap =
+      await createNamedTensorMap(outputNodeName, modelJson, dumpedJson);
+  if (tensorMap == null) {
+    return null;
+  }
+  let prediction;
+  let savedKeepIntermediateTensors;
+  try {
+    savedKeepIntermediateTensors =
+        tf.env().getBool('KEEP_INTERMEDIATE_TENSORS');
+    tf.env().set('KEEP_INTERMEDIATE_TENSORS', false);
+  } catch (e) {
+    console.warn(e.message);
+  }
+  try {
+    // TODO(#6861): Support tensor with type conversion.
+    prediction = await model.executeAsync(tensorMap, outputNodeName);
+  } catch (e) {
+    tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
+    console.warn(e.message);
+    return null;
+  }
+
+  const predictOpObject = await getPredictionData(prediction, true);
+  tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
+  return predictOpObject;
+}
+
+/**
+ * Dump the predict results of two backends and save diffs to files.
+ * @param model The loaded model.
+ * @param input The actual and expected results from different backends.
+ * @param prefix Used for generating dump file name.
+ * @param level 0, dump big diffs. 1, dump any diffs.
+ * @param length Used for controlling how many tensors will be dumped. -1 dump
+ *     all.
+ */
+async function dump(
+    model, input, prefix = '', level = DUMP_LEVEL.BIGDIFF, length = 1) {
+  const graphModel = getGraphModel(model);
+  if (graphModel == null || length == 0) {
+    return;
+  }
+  const backends = Object.keys(input);
+  const actualObject = input[backends[0]];
+  const expectedObject = input[backends[1]];
+  const dumpActualObject = {};
+  const dumpExpectedObject = {};
+  const keys = Object.keys(actualObject);
+  prefix = `dump_${prefix}_${level}`;
+  let dumpCount = 0;
+  const modelJson = graphModel.artifacts;
+  for (let i = 0; i < keys.length; i++) {
+    const key = keys[i];
+    if (compareData(actualObject[key], expectedObject[key], level)) {
+      continue;
+    }
+    const predictOpObject = await predictOp(
+        graphModel, modelJson, expectedObject, key, backends[0]);
+    const [actualOpObject, expectedOpObject] = predictOpObject ?
+        [{...predictOpObject, i}, {...expectedObject[key], i}] :
+        [null, null];
+    if (compareData(actualOpObject, expectedOpObject, level)) {
+      continue;
+    }
+    if (actualOpObject && expectedOpObject) {
+      dumpActualObject[key] = actualOpObject;
+      dumpExpectedObject[key] = expectedOpObject;
+      dumpCount++;
+    }
+    // Break when diff count equals dumpLength to avoid downloading large file.
+    if (length != -1 && dumpCount == length) {
+      break;
+    }
+  }
+  const dumpData =
+      {[backends[0]]: dumpActualObject, [backends[1]]: dumpExpectedObject};
+  await saveObjectsToFile(dumpData, prefix);
+  if (dumpCount) {
+    console.log(`Total dumped ${dumpCount} item(s).`);
+  }
+}
--- a/e2e/benchmarks/local-benchmark/index.html
+++ b/e2e/benchmarks/local-benchmark/index.html
@ -171,27 +171,18 @@ limitations under the License.
      }
    }

-    async function printTensors(tensorsMap) {
-      if (!tensorsMap) {
-        return;
-      }
-      const keysOfTensors = Object.keys(tensorsMap);
-      for (let i = 0; i < keysOfTensors.length; i++) {
-        console.warn(keysOfTensors[i]);
-        for (let j = 0; j < tensorsMap[keysOfTensors[i]].length; j++) {
-          console.warn(await (tensorsMap[keysOfTensors[i]][j]).data());
+    async function predictAndGetData(predict, model, inferenceInput, enableDump) {
+      const prediction = await predict(model, inferenceInput);
+      let intermediateData = {};
+      if (enableDump) {
+        const graphModel = getGraphModel(model);
+        if (graphModel) {
+          intermediateData = await getIntermediateTensorInfo(graphModel.getIntermediateTensors());
+          graphModel.disposeIntermediateTensors();
        }
      }
-    }
-
-    async function predictAndGetPredictionData(predict, model, inferenceInput, debug) {
-      const prediction = await predict(model, inferenceInput);
-      if (debug) {
-        await printTensors(model.getIntermediateTensors());
-        model.disposeIntermediateTensors();
-      }
      const predictionData = await getPredictionData(prediction);
-      return predictionData;
+      return {data: predictionData, intermediateData};
    }

    const state = {
@ -218,19 +209,20 @@ limitations under the License.
          await showGpuInfo();
        }

-        let match, predictionData, referenceData;
+        let match, actualData, expectedData;
        await cleanUpTable();

        // load model and run inference
        try {
-          tf.setBackend('cpu');
+          const expectedBackend = 'cpu';
+          tf.setBackend(expectedBackend);
          await loadModelAndRecordTime();
          await showMsg('Testing correctness');
          await showInputs();
          await showCorrectnessTestParameters();

          let inferenceInput;
-          await showMsg('Runing on cpu');
+          await showMsg(`Runing on ${expectedBackend}`);
          if (state.benchmark === 'custom') {
            inferenceInput = generateInputFromDef(
              state.inputs, model instanceof tf.GraphModel);
@ -243,8 +235,9 @@ limitations under the License.
            console.warn(e.message);
          }

-          const debug = keepIntermediateTensors & (benchmarks[state.benchmark].supportDebug !== false);
-          referenceData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
+          const enableDump = keepIntermediateTensors & (benchmarks[state.benchmark].supportDump !== false);
+          const expectedResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
+          expectedData = expectedResult['data'];

          await tf.setBackend(state.backend);
          await showMsg(`Runing on ${state.backend}`);
@ -257,8 +250,18 @@ limitations under the License.
            tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', true);
          }

-          predictionData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
+          const actualResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
+          actualData = actualResult['data'];
+          if (enableDump) {
+            const actualIntermediateObject = actualResult['intermediateData'];
+            const expectedIntermediateObject = expectedResult['intermediateData'];

+            const dumpLevel = urlState.has('dumpLevel') ? Number(urlState.get('dumpLevel')) : 0;
+            const dumpLength = urlState.has('dumpLength') ? Number(urlState.get('dumpLength')) : 1;
+            const dumpPrefix = state.benchmark + '_'+ state.architecture + '_' + state.inputType + '_' + state.inputSize;
+            const dumpInput = {[state.backend] : actualIntermediateObject, [expectedBackend] : expectedIntermediateObject};
+            await dump(model, dumpInput, dumpPrefix, dumpLevel, dumpLength);
+          }
          if (state.backend === 'webgl' || state.backend === 'webgpu') {
            tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', savedWillReadFrequently);
          }
@ -270,7 +273,7 @@ limitations under the License.
        // compare results
        try {
          await showMsg(null);
-          expectObjectsClose(predictionData, referenceData);
+          expectObjectsClose(actualData, expectedData);
          match = true;
        } catch (e) {
          match = false;
--- a/e2e/benchmarks/local-benchmark/loader.js
+++ b/e2e/benchmarks/local-benchmark/loader.js
@ -68,6 +68,7 @@ async function loadTFJS(localBuild) {
    '../benchmark_util.js',
    './util.js',
    './index.js',
+    './dump.js',
  ]);

  for (let url of urls) {
--- a/e2e/benchmarks/local-benchmark/util.js
+++ b/e2e/benchmarks/local-benchmark/util.js
@ -15,29 +15,35 @@
 * =============================================================================
 */

-async function convertTensorToData(tensor) {
+
+async function convertTensorToData(tensor, needInfo = false) {
  const data = await tensor.data();
+
  tensor.dispose();
+  if (needInfo) {
+    return {value: data, shape: tensor.shape, dtype: tensor.dtype};
+  }
  return data;
 }

-async function getPredictionData(output) {
+async function getPredictionData(output, needInfo = false) {
  if (output instanceof Promise) {
    output = await output;
  }

  if (output instanceof tf.Tensor) {
-    output = await convertTensorToData(output);
+    output = [await convertTensorToData(output, needInfo)];
  } else if (Array.isArray(output)) {
    for (let i = 0; i < output.length; i++) {
      if (output[i] instanceof tf.Tensor) {
-        output[i] = await convertTensorToData(output[i]);
+        output[i] = await convertTensorToData(output[i], needInfo);
      }
    }
  } else if (output != null && typeof output === 'object') {
    for (const property in output) {
      if (output[property] instanceof tf.Tensor) {
-        output[property] = await convertTensorToData(output[property]);
+        output[property] =
+            await convertTensorToData(output[property], needInfo);
      }
    }
  }
@ -117,8 +123,8 @@ function expectObjectsPredicate(actual, expected, epsilon, predicate) {
  return true;
 }

-function expectObjectsClose(actual, expected, epsilon) {
-  if (epsilon == null) {
+function expectObjectsClose(actual, expected, epsilon = -1) {
+  if (epsilon === -1) {
    epsilon = tf.test_util.testEpsilon();
  }
  expectObjectsPredicate(
@ -159,7 +165,7 @@ function expectArraysPredicateFuzzy(actual, expected, predicate, errorRate) {

 // TODO: support relative comparison for array.
 function expectArraysClose(actual, expected, epsilon, key) {
-  if (epsilon == null) {
+  if (epsilon === -1) {
    epsilon = tf.test_util.testEpsilon();
  }

--- a/e2e/benchmarks/model_config.js
+++ b/e2e/benchmarks/model_config.js
@ -231,7 +231,7 @@ const benchmarks = {
  'Coco-SSD': {
    type: 'GraphModel',
    // The model has has the dynamic ops, so it is supposed to use executeAsync.
-    supportDebug: false,
+    supportDump: false,
    architectures: ['MobileNetV2', 'MobileNetV1', 'liteMobileNetV2'],
    load: async (inputResolution = 227, modelArchitecture = 'MobileNetV2') => {
      const tfliteBased = modelArchitecture.split('MobileNetV')[0];
@ -327,7 +327,7 @@ const benchmarks = {
  },
  'AutoML Image': {
    type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
    load: async () => {
      const url =
          'https://storage.googleapis.com/tfjs-testing/tfjs-automl/img_classification/model.json';
@ -340,7 +340,7 @@ const benchmarks = {
  },
  'AutoML Object': {
    type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
    load: async () => {
      const url =
          'https://storage.googleapis.com/tfjs-testing/tfjs-automl/object_detection/model.json';
@ -355,7 +355,7 @@ const benchmarks = {
  },
  'USE - batchsize 30': {
    type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
    load: async () => {
      return use.load();
    },
@ -369,7 +369,7 @@ const benchmarks = {
  },
  'USE - batchsize 1': {
    type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
    load: async () => {
      return use.load();
    },
@ -384,7 +384,7 @@ const benchmarks = {
  'TextToxicity': {
    type: 'GraphModel',
    // The model has has the dynamic ops, so it is supposed to use executeAsync.
-    supportDebug: false,
+    supportDump: false,
    load: async () => {
      const url =
          'https://storage.googleapis.com/tfhub-tfjs-modules/tensorflow/tfjs-model/toxicity/1/default/1/model.json';
@ -425,7 +425,7 @@ const benchmarks = {
    inputSizes: [128, 256, 512, 1024],
    architectures: ['MobileNetV1', 'ResNet50'],
    inputTypes: ['image', 'tensor'],
-    supportDebug: false,
+    supportDump: false,
    load: async (
        inputResolution = 128, modelArchitecture = 'MobileNetV1',
        inputType = 'image') => {
@ -461,7 +461,7 @@ const benchmarks = {
  },
  'bodypix': {
    type: 'GraphModel',
-    supportDebug: false,
+    supportDump: false,
    // The ratio to the default camera size [480, 640].
    inputSizes: [0.25, 0.5, 0.75, 1.0],
    architectures: ['ResNet50'],