[e2e] Support dump mode (#6850)

FEATURE
PERF
* [e2e] Support dump mode

Dump mode enchances model debug mode in three ways:
1. Support whole model dump in different dumpLevel: 0, dump close diffs. 1, dump any diffs. 2, dump all.
2. Support ops dump.
3. Support dumpLength: -1 means all.

Dump works in two steps:
1. Dump tensors into files according to dumpLevel. These dump files start with
"dumpmodel_".
2. When tensor diffs spotted, apply below to each tensor related op: use
the reference as input, run the op again under predict backend. Then
dump all the results into files. These dump files start with "dumpops_"

Example url parameter to turn on dump mode:
KEEP_INTERMEDIATE_TENSORS=true&dumpLevel=1&dumpLength=-1

Bug = https://github.com/tensorflow/tfjs/issues/6860

* Clean model config

* Clean

* Dump ops when diff occurs

* Fix comments

* Refine compare and nit

* Fix comments

* Clean unused var

* Fix comments

* Nit

* Fix comments

Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com>
This commit is contained in:
Xu Xing 2022-10-19 03:55:54 +08:00 committed by GitHub
parent 99f6da4a1e
commit e94d083898
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 279 additions and 41 deletions

View File

@ -0,0 +1,228 @@
/**
* @license
* Copyright 2022 Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
/**
* DUMP_LEVEL.BIGDIFF: dumping when difference is greater than the default
* epsilon. DUMP_LEVEL.ANYDIFF: dumping when difference is greater than 0.
*/
const DUMP_LEVEL = {
BIGDIFF: 0,
ANYDIFF: 1,
};
function compareData(data1, data2, level = DUMP_LEVEL.BIGDIFF) {
let epsilon = level == DUMP_LEVEL.ANYDIFF ? 0 : -1;
let match = true;
try {
expectObjectsClose(data1, data2, epsilon);
} catch (e) {
match = false;
}
return match;
}
function getGraphModel(model) {
if (model instanceof tf.GraphModel) {
return model;
} else if (model.model instanceof tf.GraphModel) {
return model.model;
} else if (
model.baseModel && model.baseModel.model instanceof tf.GraphModel) {
return model.baseModel.model;
} else {
console.warn(`Model doesn't support dump!`);
return null;
}
}
async function getIntermediateTensorInfo(tensorsMap) {
if (!tensorsMap) {
return;
}
const jsonObject = {};
const keysOfTensors = Object.keys(tensorsMap);
for (let i = 0; i < keysOfTensors.length; i++) {
const key = keysOfTensors[i];
jsonObject[key] = [];
for (let j = 0; j < tensorsMap[key].length; j++) {
if (tensorsMap[key][j] == null) {
continue;
}
// For universal-sentence-encoder, its inputs are disposed by model.
try {
const data = await (tensorsMap[key][j]).data();
jsonObject[key].push({
value: data,
shape: tensorsMap[key][j].shape,
dtype: tensorsMap[key][j].dtype
});
} catch (e) {
console.error(`${keysOfTensors[i]} ` + e.message);
}
}
}
return jsonObject;
}
async function saveObjectsToFile(jsonObjects, prefix) {
let newPrefix = '';
if (prefix !== '') {
newPrefix = `${prefix.replace(/\//g, '-')}_`;
}
const backends = Object.keys(jsonObjects);
if (Object.keys(jsonObjects[backends[0]]).length == 0) {
return;
}
for (let i = 0; i < backends.length; i++) {
const object = jsonObjects[backends[i]];
const fileName = `${newPrefix}${backends[i]}.json`;
const a = document.createElement('a');
const file = new Blob([JSON.stringify(object)], {type: 'application/json'});
a.href = URL.createObjectURL(file);
a.download = fileName;
a.click();
// This log informs tools file has been saved.
console.log(fileName);
}
}
/**
* Create a NamedTensorMap from an output node name.
* @param outputNodeName Output node name.
* @param modelJson The parsed model.json.
* @param dumpedJson The dumped tensor infomation (including shape, dtype,
* value).
*
* @returns A NamedTensorMap.
*/
async function createNamedTensorMap(outputNodeName, modelJson, dumpedJson) {
const modelNodes = modelJson['modelTopology']['node'];
let inputs = [];
for (let i = 0; i < modelNodes.length; i++) {
if (outputNodeName === modelNodes[i].name && modelNodes[i].input) {
inputs = modelNodes[i].input;
break;
}
}
// In
// https://storage.googleapis.com/tfhub-tfjs-modules/mediapipe/tfjs-model/face_landmarks_detection/attention_mesh/1/model.json,
// some inputs are prefixed with '^'.
if (!inputs || inputs.length == 0 || inputs[0].startsWith('^')) {
return null;
}
let tensorMap = {};
for (let i = 0; i < inputs.length; i++) {
const key = inputs[i].split(':')[0];
if (dumpedJson[key] == null || dumpedJson[key][0] == null) {
console.warn('Tensor ' + key + ' is null!');
return null;
}
const tensorInfo = dumpedJson[key][0];
const tensor = tf.tensor(
Object.values(tensorInfo.value), tensorInfo.shape, tensorInfo.dtype);
tensorMap[key] = tensor;
}
return tensorMap;
}
async function predictOp(
model, modelJson, dumpedJson, outputNodeName, backend) {
await tf.setBackend(backend);
const tensorMap =
await createNamedTensorMap(outputNodeName, modelJson, dumpedJson);
if (tensorMap == null) {
return null;
}
let prediction;
let savedKeepIntermediateTensors;
try {
savedKeepIntermediateTensors =
tf.env().getBool('KEEP_INTERMEDIATE_TENSORS');
tf.env().set('KEEP_INTERMEDIATE_TENSORS', false);
} catch (e) {
console.warn(e.message);
}
try {
// TODO(#6861): Support tensor with type conversion.
prediction = await model.executeAsync(tensorMap, outputNodeName);
} catch (e) {
tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
console.warn(e.message);
return null;
}
const predictOpObject = await getPredictionData(prediction, true);
tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
return predictOpObject;
}
/**
* Dump the predict results of two backends and save diffs to files.
* @param model The loaded model.
* @param input The actual and expected results from different backends.
* @param prefix Used for generating dump file name.
* @param level 0, dump big diffs. 1, dump any diffs.
* @param length Used for controlling how many tensors will be dumped. -1 dump
* all.
*/
async function dump(
model, input, prefix = '', level = DUMP_LEVEL.BIGDIFF, length = 1) {
const graphModel = getGraphModel(model);
if (graphModel == null || length == 0) {
return;
}
const backends = Object.keys(input);
const actualObject = input[backends[0]];
const expectedObject = input[backends[1]];
const dumpActualObject = {};
const dumpExpectedObject = {};
const keys = Object.keys(actualObject);
prefix = `dump_${prefix}_${level}`;
let dumpCount = 0;
const modelJson = graphModel.artifacts;
for (let i = 0; i < keys.length; i++) {
const key = keys[i];
if (compareData(actualObject[key], expectedObject[key], level)) {
continue;
}
const predictOpObject = await predictOp(
graphModel, modelJson, expectedObject, key, backends[0]);
const [actualOpObject, expectedOpObject] = predictOpObject ?
[{...predictOpObject, i}, {...expectedObject[key], i}] :
[null, null];
if (compareData(actualOpObject, expectedOpObject, level)) {
continue;
}
if (actualOpObject && expectedOpObject) {
dumpActualObject[key] = actualOpObject;
dumpExpectedObject[key] = expectedOpObject;
dumpCount++;
}
// Break when diff count equals dumpLength to avoid downloading large file.
if (length != -1 && dumpCount == length) {
break;
}
}
const dumpData =
{[backends[0]]: dumpActualObject, [backends[1]]: dumpExpectedObject};
await saveObjectsToFile(dumpData, prefix);
if (dumpCount) {
console.log(`Total dumped ${dumpCount} item(s).`);
}
}

View File

@ -171,27 +171,18 @@ limitations under the License.
}
}
async function printTensors(tensorsMap) {
if (!tensorsMap) {
return;
}
const keysOfTensors = Object.keys(tensorsMap);
for (let i = 0; i < keysOfTensors.length; i++) {
console.warn(keysOfTensors[i]);
for (let j = 0; j < tensorsMap[keysOfTensors[i]].length; j++) {
console.warn(await (tensorsMap[keysOfTensors[i]][j]).data());
async function predictAndGetData(predict, model, inferenceInput, enableDump) {
const prediction = await predict(model, inferenceInput);
let intermediateData = {};
if (enableDump) {
const graphModel = getGraphModel(model);
if (graphModel) {
intermediateData = await getIntermediateTensorInfo(graphModel.getIntermediateTensors());
graphModel.disposeIntermediateTensors();
}
}
}
async function predictAndGetPredictionData(predict, model, inferenceInput, debug) {
const prediction = await predict(model, inferenceInput);
if (debug) {
await printTensors(model.getIntermediateTensors());
model.disposeIntermediateTensors();
}
const predictionData = await getPredictionData(prediction);
return predictionData;
return {data: predictionData, intermediateData};
}
const state = {
@ -218,19 +209,20 @@ limitations under the License.
await showGpuInfo();
}
let match, predictionData, referenceData;
let match, actualData, expectedData;
await cleanUpTable();
// load model and run inference
try {
tf.setBackend('cpu');
const expectedBackend = 'cpu';
tf.setBackend(expectedBackend);
await loadModelAndRecordTime();
await showMsg('Testing correctness');
await showInputs();
await showCorrectnessTestParameters();
let inferenceInput;
await showMsg('Runing on cpu');
await showMsg(`Runing on ${expectedBackend}`);
if (state.benchmark === 'custom') {
inferenceInput = generateInputFromDef(
state.inputs, model instanceof tf.GraphModel);
@ -243,8 +235,9 @@ limitations under the License.
console.warn(e.message);
}
const debug = keepIntermediateTensors & (benchmarks[state.benchmark].supportDebug !== false);
referenceData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
const enableDump = keepIntermediateTensors & (benchmarks[state.benchmark].supportDump !== false);
const expectedResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
expectedData = expectedResult['data'];
await tf.setBackend(state.backend);
await showMsg(`Runing on ${state.backend}`);
@ -257,8 +250,18 @@ limitations under the License.
tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', true);
}
predictionData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
const actualResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
actualData = actualResult['data'];
if (enableDump) {
const actualIntermediateObject = actualResult['intermediateData'];
const expectedIntermediateObject = expectedResult['intermediateData'];
const dumpLevel = urlState.has('dumpLevel') ? Number(urlState.get('dumpLevel')) : 0;
const dumpLength = urlState.has('dumpLength') ? Number(urlState.get('dumpLength')) : 1;
const dumpPrefix = state.benchmark + '_'+ state.architecture + '_' + state.inputType + '_' + state.inputSize;
const dumpInput = {[state.backend] : actualIntermediateObject, [expectedBackend] : expectedIntermediateObject};
await dump(model, dumpInput, dumpPrefix, dumpLevel, dumpLength);
}
if (state.backend === 'webgl' || state.backend === 'webgpu') {
tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', savedWillReadFrequently);
}
@ -270,7 +273,7 @@ limitations under the License.
// compare results
try {
await showMsg(null);
expectObjectsClose(predictionData, referenceData);
expectObjectsClose(actualData, expectedData);
match = true;
} catch (e) {
match = false;

View File

@ -68,6 +68,7 @@ async function loadTFJS(localBuild) {
'../benchmark_util.js',
'./util.js',
'./index.js',
'./dump.js',
]);
for (let url of urls) {

View File

@ -15,29 +15,35 @@
* =============================================================================
*/
async function convertTensorToData(tensor) {
async function convertTensorToData(tensor, needInfo = false) {
const data = await tensor.data();
tensor.dispose();
if (needInfo) {
return {value: data, shape: tensor.shape, dtype: tensor.dtype};
}
return data;
}
async function getPredictionData(output) {
async function getPredictionData(output, needInfo = false) {
if (output instanceof Promise) {
output = await output;
}
if (output instanceof tf.Tensor) {
output = await convertTensorToData(output);
output = [await convertTensorToData(output, needInfo)];
} else if (Array.isArray(output)) {
for (let i = 0; i < output.length; i++) {
if (output[i] instanceof tf.Tensor) {
output[i] = await convertTensorToData(output[i]);
output[i] = await convertTensorToData(output[i], needInfo);
}
}
} else if (output != null && typeof output === 'object') {
for (const property in output) {
if (output[property] instanceof tf.Tensor) {
output[property] = await convertTensorToData(output[property]);
output[property] =
await convertTensorToData(output[property], needInfo);
}
}
}
@ -117,8 +123,8 @@ function expectObjectsPredicate(actual, expected, epsilon, predicate) {
return true;
}
function expectObjectsClose(actual, expected, epsilon) {
if (epsilon == null) {
function expectObjectsClose(actual, expected, epsilon = -1) {
if (epsilon === -1) {
epsilon = tf.test_util.testEpsilon();
}
expectObjectsPredicate(
@ -159,7 +165,7 @@ function expectArraysPredicateFuzzy(actual, expected, predicate, errorRate) {
// TODO: support relative comparison for array.
function expectArraysClose(actual, expected, epsilon, key) {
if (epsilon == null) {
if (epsilon === -1) {
epsilon = tf.test_util.testEpsilon();
}

View File

@ -231,7 +231,7 @@ const benchmarks = {
'Coco-SSD': {
type: 'GraphModel',
// The model has has the dynamic ops, so it is supposed to use executeAsync.
supportDebug: false,
supportDump: false,
architectures: ['MobileNetV2', 'MobileNetV1', 'liteMobileNetV2'],
load: async (inputResolution = 227, modelArchitecture = 'MobileNetV2') => {
const tfliteBased = modelArchitecture.split('MobileNetV')[0];
@ -327,7 +327,7 @@ const benchmarks = {
},
'AutoML Image': {
type: 'GraphModel',
supportDebug: false,
supportDump: false,
load: async () => {
const url =
'https://storage.googleapis.com/tfjs-testing/tfjs-automl/img_classification/model.json';
@ -340,7 +340,7 @@ const benchmarks = {
},
'AutoML Object': {
type: 'GraphModel',
supportDebug: false,
supportDump: false,
load: async () => {
const url =
'https://storage.googleapis.com/tfjs-testing/tfjs-automl/object_detection/model.json';
@ -355,7 +355,7 @@ const benchmarks = {
},
'USE - batchsize 30': {
type: 'GraphModel',
supportDebug: false,
supportDump: false,
load: async () => {
return use.load();
},
@ -369,7 +369,7 @@ const benchmarks = {
},
'USE - batchsize 1': {
type: 'GraphModel',
supportDebug: false,
supportDump: false,
load: async () => {
return use.load();
},
@ -384,7 +384,7 @@ const benchmarks = {
'TextToxicity': {
type: 'GraphModel',
// The model has has the dynamic ops, so it is supposed to use executeAsync.
supportDebug: false,
supportDump: false,
load: async () => {
const url =
'https://storage.googleapis.com/tfhub-tfjs-modules/tensorflow/tfjs-model/toxicity/1/default/1/model.json';
@ -425,7 +425,7 @@ const benchmarks = {
inputSizes: [128, 256, 512, 1024],
architectures: ['MobileNetV1', 'ResNet50'],
inputTypes: ['image', 'tensor'],
supportDebug: false,
supportDump: false,
load: async (
inputResolution = 128, modelArchitecture = 'MobileNetV1',
inputType = 'image') => {
@ -461,7 +461,7 @@ const benchmarks = {
},
'bodypix': {
type: 'GraphModel',
supportDebug: false,
supportDump: false,
// The ratio to the default camera size [480, 640].
inputSizes: [0.25, 0.5, 0.75, 1.0],
architectures: ['ResNet50'],