From e94d0838987597e69199ec24e00a8ed9ef07b0ce Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Wed, 19 Oct 2022 03:55:54 +0800 Subject: [PATCH] [e2e] Support dump mode (#6850) FEATURE PERF * [e2e] Support dump mode Dump mode enchances model debug mode in three ways: 1. Support whole model dump in different dumpLevel: 0, dump close diffs. 1, dump any diffs. 2, dump all. 2. Support ops dump. 3. Support dumpLength: -1 means all. Dump works in two steps: 1. Dump tensors into files according to dumpLevel. These dump files start with "dumpmodel_". 2. When tensor diffs spotted, apply below to each tensor related op: use the reference as input, run the op again under predict backend. Then dump all the results into files. These dump files start with "dumpops_" Example url parameter to turn on dump mode: KEEP_INTERMEDIATE_TENSORS=true&dumpLevel=1&dumpLength=-1 Bug = https://github.com/tensorflow/tfjs/issues/6860 * Clean model config * Clean * Dump ops when diff occurs * Fix comments * Refine compare and nit * Fix comments * Clean unused var * Fix comments * Nit * Fix comments Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com> --- e2e/benchmarks/local-benchmark/dump.js | 228 ++++++++++++++++++++++ e2e/benchmarks/local-benchmark/index.html | 53 ++--- e2e/benchmarks/local-benchmark/loader.js | 1 + e2e/benchmarks/local-benchmark/util.js | 22 ++- e2e/benchmarks/model_config.js | 16 +- 5 files changed, 279 insertions(+), 41 deletions(-) create mode 100644 e2e/benchmarks/local-benchmark/dump.js diff --git a/e2e/benchmarks/local-benchmark/dump.js b/e2e/benchmarks/local-benchmark/dump.js new file mode 100644 index 000000000..eb6dfb17c --- /dev/null +++ b/e2e/benchmarks/local-benchmark/dump.js @@ -0,0 +1,228 @@ +/** + * @license + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +/** + * DUMP_LEVEL.BIGDIFF: dumping when difference is greater than the default + * epsilon. DUMP_LEVEL.ANYDIFF: dumping when difference is greater than 0. + */ +const DUMP_LEVEL = { + BIGDIFF: 0, + ANYDIFF: 1, +}; + +function compareData(data1, data2, level = DUMP_LEVEL.BIGDIFF) { + let epsilon = level == DUMP_LEVEL.ANYDIFF ? 0 : -1; + let match = true; + try { + expectObjectsClose(data1, data2, epsilon); + } catch (e) { + match = false; + } + return match; +} + +function getGraphModel(model) { + if (model instanceof tf.GraphModel) { + return model; + } else if (model.model instanceof tf.GraphModel) { + return model.model; + } else if ( + model.baseModel && model.baseModel.model instanceof tf.GraphModel) { + return model.baseModel.model; + } else { + console.warn(`Model doesn't support dump!`); + return null; + } +} + +async function getIntermediateTensorInfo(tensorsMap) { + if (!tensorsMap) { + return; + } + const jsonObject = {}; + const keysOfTensors = Object.keys(tensorsMap); + for (let i = 0; i < keysOfTensors.length; i++) { + const key = keysOfTensors[i]; + jsonObject[key] = []; + for (let j = 0; j < tensorsMap[key].length; j++) { + if (tensorsMap[key][j] == null) { + continue; + } + // For universal-sentence-encoder, its inputs are disposed by model. + try { + const data = await (tensorsMap[key][j]).data(); + jsonObject[key].push({ + value: data, + shape: tensorsMap[key][j].shape, + dtype: tensorsMap[key][j].dtype + }); + } catch (e) { + console.error(`${keysOfTensors[i]} ` + e.message); + } + } + } + return jsonObject; +} + +async function saveObjectsToFile(jsonObjects, prefix) { + let newPrefix = ''; + if (prefix !== '') { + newPrefix = `${prefix.replace(/\//g, '-')}_`; + } + const backends = Object.keys(jsonObjects); + if (Object.keys(jsonObjects[backends[0]]).length == 0) { + return; + } + for (let i = 0; i < backends.length; i++) { + const object = jsonObjects[backends[i]]; + const fileName = `${newPrefix}${backends[i]}.json`; + const a = document.createElement('a'); + const file = new Blob([JSON.stringify(object)], {type: 'application/json'}); + a.href = URL.createObjectURL(file); + a.download = fileName; + a.click(); + // This log informs tools file has been saved. + console.log(fileName); + } +} + +/** + * Create a NamedTensorMap from an output node name. + * @param outputNodeName Output node name. + * @param modelJson The parsed model.json. + * @param dumpedJson The dumped tensor infomation (including shape, dtype, + * value). + * + * @returns A NamedTensorMap. + */ +async function createNamedTensorMap(outputNodeName, modelJson, dumpedJson) { + const modelNodes = modelJson['modelTopology']['node']; + let inputs = []; + for (let i = 0; i < modelNodes.length; i++) { + if (outputNodeName === modelNodes[i].name && modelNodes[i].input) { + inputs = modelNodes[i].input; + break; + } + } + // In + // https://storage.googleapis.com/tfhub-tfjs-modules/mediapipe/tfjs-model/face_landmarks_detection/attention_mesh/1/model.json, + // some inputs are prefixed with '^'. + if (!inputs || inputs.length == 0 || inputs[0].startsWith('^')) { + return null; + } + + let tensorMap = {}; + for (let i = 0; i < inputs.length; i++) { + const key = inputs[i].split(':')[0]; + if (dumpedJson[key] == null || dumpedJson[key][0] == null) { + console.warn('Tensor ' + key + ' is null!'); + return null; + } + const tensorInfo = dumpedJson[key][0]; + const tensor = tf.tensor( + Object.values(tensorInfo.value), tensorInfo.shape, tensorInfo.dtype); + tensorMap[key] = tensor; + } + + return tensorMap; +} + +async function predictOp( + model, modelJson, dumpedJson, outputNodeName, backend) { + await tf.setBackend(backend); + const tensorMap = + await createNamedTensorMap(outputNodeName, modelJson, dumpedJson); + if (tensorMap == null) { + return null; + } + let prediction; + let savedKeepIntermediateTensors; + try { + savedKeepIntermediateTensors = + tf.env().getBool('KEEP_INTERMEDIATE_TENSORS'); + tf.env().set('KEEP_INTERMEDIATE_TENSORS', false); + } catch (e) { + console.warn(e.message); + } + try { + // TODO(#6861): Support tensor with type conversion. + prediction = await model.executeAsync(tensorMap, outputNodeName); + } catch (e) { + tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors); + console.warn(e.message); + return null; + } + + const predictOpObject = await getPredictionData(prediction, true); + tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors); + return predictOpObject; +} + +/** + * Dump the predict results of two backends and save diffs to files. + * @param model The loaded model. + * @param input The actual and expected results from different backends. + * @param prefix Used for generating dump file name. + * @param level 0, dump big diffs. 1, dump any diffs. + * @param length Used for controlling how many tensors will be dumped. -1 dump + * all. + */ +async function dump( + model, input, prefix = '', level = DUMP_LEVEL.BIGDIFF, length = 1) { + const graphModel = getGraphModel(model); + if (graphModel == null || length == 0) { + return; + } + const backends = Object.keys(input); + const actualObject = input[backends[0]]; + const expectedObject = input[backends[1]]; + const dumpActualObject = {}; + const dumpExpectedObject = {}; + const keys = Object.keys(actualObject); + prefix = `dump_${prefix}_${level}`; + let dumpCount = 0; + const modelJson = graphModel.artifacts; + for (let i = 0; i < keys.length; i++) { + const key = keys[i]; + if (compareData(actualObject[key], expectedObject[key], level)) { + continue; + } + const predictOpObject = await predictOp( + graphModel, modelJson, expectedObject, key, backends[0]); + const [actualOpObject, expectedOpObject] = predictOpObject ? + [{...predictOpObject, i}, {...expectedObject[key], i}] : + [null, null]; + if (compareData(actualOpObject, expectedOpObject, level)) { + continue; + } + if (actualOpObject && expectedOpObject) { + dumpActualObject[key] = actualOpObject; + dumpExpectedObject[key] = expectedOpObject; + dumpCount++; + } + // Break when diff count equals dumpLength to avoid downloading large file. + if (length != -1 && dumpCount == length) { + break; + } + } + const dumpData = + {[backends[0]]: dumpActualObject, [backends[1]]: dumpExpectedObject}; + await saveObjectsToFile(dumpData, prefix); + if (dumpCount) { + console.log(`Total dumped ${dumpCount} item(s).`); + } +} diff --git a/e2e/benchmarks/local-benchmark/index.html b/e2e/benchmarks/local-benchmark/index.html index 2c4d7dfa6..10d6b71dc 100644 --- a/e2e/benchmarks/local-benchmark/index.html +++ b/e2e/benchmarks/local-benchmark/index.html @@ -171,27 +171,18 @@ limitations under the License. } } - async function printTensors(tensorsMap) { - if (!tensorsMap) { - return; - } - const keysOfTensors = Object.keys(tensorsMap); - for (let i = 0; i < keysOfTensors.length; i++) { - console.warn(keysOfTensors[i]); - for (let j = 0; j < tensorsMap[keysOfTensors[i]].length; j++) { - console.warn(await (tensorsMap[keysOfTensors[i]][j]).data()); + async function predictAndGetData(predict, model, inferenceInput, enableDump) { + const prediction = await predict(model, inferenceInput); + let intermediateData = {}; + if (enableDump) { + const graphModel = getGraphModel(model); + if (graphModel) { + intermediateData = await getIntermediateTensorInfo(graphModel.getIntermediateTensors()); + graphModel.disposeIntermediateTensors(); } } - } - - async function predictAndGetPredictionData(predict, model, inferenceInput, debug) { - const prediction = await predict(model, inferenceInput); - if (debug) { - await printTensors(model.getIntermediateTensors()); - model.disposeIntermediateTensors(); - } const predictionData = await getPredictionData(prediction); - return predictionData; + return {data: predictionData, intermediateData}; } const state = { @@ -218,19 +209,20 @@ limitations under the License. await showGpuInfo(); } - let match, predictionData, referenceData; + let match, actualData, expectedData; await cleanUpTable(); // load model and run inference try { - tf.setBackend('cpu'); + const expectedBackend = 'cpu'; + tf.setBackend(expectedBackend); await loadModelAndRecordTime(); await showMsg('Testing correctness'); await showInputs(); await showCorrectnessTestParameters(); let inferenceInput; - await showMsg('Runing on cpu'); + await showMsg(`Runing on ${expectedBackend}`); if (state.benchmark === 'custom') { inferenceInput = generateInputFromDef( state.inputs, model instanceof tf.GraphModel); @@ -243,8 +235,9 @@ limitations under the License. console.warn(e.message); } - const debug = keepIntermediateTensors & (benchmarks[state.benchmark].supportDebug !== false); - referenceData = await predictAndGetPredictionData(predict, model, inferenceInput, debug); + const enableDump = keepIntermediateTensors & (benchmarks[state.benchmark].supportDump !== false); + const expectedResult = await predictAndGetData(predict, model, inferenceInput, enableDump); + expectedData = expectedResult['data']; await tf.setBackend(state.backend); await showMsg(`Runing on ${state.backend}`); @@ -257,8 +250,18 @@ limitations under the License. tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', true); } - predictionData = await predictAndGetPredictionData(predict, model, inferenceInput, debug); + const actualResult = await predictAndGetData(predict, model, inferenceInput, enableDump); + actualData = actualResult['data']; + if (enableDump) { + const actualIntermediateObject = actualResult['intermediateData']; + const expectedIntermediateObject = expectedResult['intermediateData']; + const dumpLevel = urlState.has('dumpLevel') ? Number(urlState.get('dumpLevel')) : 0; + const dumpLength = urlState.has('dumpLength') ? Number(urlState.get('dumpLength')) : 1; + const dumpPrefix = state.benchmark + '_'+ state.architecture + '_' + state.inputType + '_' + state.inputSize; + const dumpInput = {[state.backend] : actualIntermediateObject, [expectedBackend] : expectedIntermediateObject}; + await dump(model, dumpInput, dumpPrefix, dumpLevel, dumpLength); + } if (state.backend === 'webgl' || state.backend === 'webgpu') { tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', savedWillReadFrequently); } @@ -270,7 +273,7 @@ limitations under the License. // compare results try { await showMsg(null); - expectObjectsClose(predictionData, referenceData); + expectObjectsClose(actualData, expectedData); match = true; } catch (e) { match = false; diff --git a/e2e/benchmarks/local-benchmark/loader.js b/e2e/benchmarks/local-benchmark/loader.js index 6697d38e9..e2e132bd5 100644 --- a/e2e/benchmarks/local-benchmark/loader.js +++ b/e2e/benchmarks/local-benchmark/loader.js @@ -68,6 +68,7 @@ async function loadTFJS(localBuild) { '../benchmark_util.js', './util.js', './index.js', + './dump.js', ]); for (let url of urls) { diff --git a/e2e/benchmarks/local-benchmark/util.js b/e2e/benchmarks/local-benchmark/util.js index f4ff6b39a..b6eae0e67 100644 --- a/e2e/benchmarks/local-benchmark/util.js +++ b/e2e/benchmarks/local-benchmark/util.js @@ -15,29 +15,35 @@ * ============================================================================= */ -async function convertTensorToData(tensor) { + +async function convertTensorToData(tensor, needInfo = false) { const data = await tensor.data(); + tensor.dispose(); + if (needInfo) { + return {value: data, shape: tensor.shape, dtype: tensor.dtype}; + } return data; } -async function getPredictionData(output) { +async function getPredictionData(output, needInfo = false) { if (output instanceof Promise) { output = await output; } if (output instanceof tf.Tensor) { - output = await convertTensorToData(output); + output = [await convertTensorToData(output, needInfo)]; } else if (Array.isArray(output)) { for (let i = 0; i < output.length; i++) { if (output[i] instanceof tf.Tensor) { - output[i] = await convertTensorToData(output[i]); + output[i] = await convertTensorToData(output[i], needInfo); } } } else if (output != null && typeof output === 'object') { for (const property in output) { if (output[property] instanceof tf.Tensor) { - output[property] = await convertTensorToData(output[property]); + output[property] = + await convertTensorToData(output[property], needInfo); } } } @@ -117,8 +123,8 @@ function expectObjectsPredicate(actual, expected, epsilon, predicate) { return true; } -function expectObjectsClose(actual, expected, epsilon) { - if (epsilon == null) { +function expectObjectsClose(actual, expected, epsilon = -1) { + if (epsilon === -1) { epsilon = tf.test_util.testEpsilon(); } expectObjectsPredicate( @@ -159,7 +165,7 @@ function expectArraysPredicateFuzzy(actual, expected, predicate, errorRate) { // TODO: support relative comparison for array. function expectArraysClose(actual, expected, epsilon, key) { - if (epsilon == null) { + if (epsilon === -1) { epsilon = tf.test_util.testEpsilon(); } diff --git a/e2e/benchmarks/model_config.js b/e2e/benchmarks/model_config.js index 06b3d22b8..7853b3437 100644 --- a/e2e/benchmarks/model_config.js +++ b/e2e/benchmarks/model_config.js @@ -231,7 +231,7 @@ const benchmarks = { 'Coco-SSD': { type: 'GraphModel', // The model has has the dynamic ops, so it is supposed to use executeAsync. - supportDebug: false, + supportDump: false, architectures: ['MobileNetV2', 'MobileNetV1', 'liteMobileNetV2'], load: async (inputResolution = 227, modelArchitecture = 'MobileNetV2') => { const tfliteBased = modelArchitecture.split('MobileNetV')[0]; @@ -327,7 +327,7 @@ const benchmarks = { }, 'AutoML Image': { type: 'GraphModel', - supportDebug: false, + supportDump: false, load: async () => { const url = 'https://storage.googleapis.com/tfjs-testing/tfjs-automl/img_classification/model.json'; @@ -340,7 +340,7 @@ const benchmarks = { }, 'AutoML Object': { type: 'GraphModel', - supportDebug: false, + supportDump: false, load: async () => { const url = 'https://storage.googleapis.com/tfjs-testing/tfjs-automl/object_detection/model.json'; @@ -355,7 +355,7 @@ const benchmarks = { }, 'USE - batchsize 30': { type: 'GraphModel', - supportDebug: false, + supportDump: false, load: async () => { return use.load(); }, @@ -369,7 +369,7 @@ const benchmarks = { }, 'USE - batchsize 1': { type: 'GraphModel', - supportDebug: false, + supportDump: false, load: async () => { return use.load(); }, @@ -384,7 +384,7 @@ const benchmarks = { 'TextToxicity': { type: 'GraphModel', // The model has has the dynamic ops, so it is supposed to use executeAsync. - supportDebug: false, + supportDump: false, load: async () => { const url = 'https://storage.googleapis.com/tfhub-tfjs-modules/tensorflow/tfjs-model/toxicity/1/default/1/model.json'; @@ -425,7 +425,7 @@ const benchmarks = { inputSizes: [128, 256, 512, 1024], architectures: ['MobileNetV1', 'ResNet50'], inputTypes: ['image', 'tensor'], - supportDebug: false, + supportDump: false, load: async ( inputResolution = 128, modelArchitecture = 'MobileNetV1', inputType = 'image') => { @@ -461,7 +461,7 @@ const benchmarks = { }, 'bodypix': { type: 'GraphModel', - supportDebug: false, + supportDump: false, // The ratio to the default camera size [480, 640]. inputSizes: [0.25, 0.5, 0.75, 1.0], architectures: ['ResNet50'],