mirror of https://github.com/tensorflow/tfjs.git
[e2e] Support dump mode (#6850)
FEATURE PERF * [e2e] Support dump mode Dump mode enchances model debug mode in three ways: 1. Support whole model dump in different dumpLevel: 0, dump close diffs. 1, dump any diffs. 2, dump all. 2. Support ops dump. 3. Support dumpLength: -1 means all. Dump works in two steps: 1. Dump tensors into files according to dumpLevel. These dump files start with "dumpmodel_". 2. When tensor diffs spotted, apply below to each tensor related op: use the reference as input, run the op again under predict backend. Then dump all the results into files. These dump files start with "dumpops_" Example url parameter to turn on dump mode: KEEP_INTERMEDIATE_TENSORS=true&dumpLevel=1&dumpLength=-1 Bug = https://github.com/tensorflow/tfjs/issues/6860 * Clean model config * Clean * Dump ops when diff occurs * Fix comments * Refine compare and nit * Fix comments * Clean unused var * Fix comments * Nit * Fix comments Co-authored-by: Ping Yu <4018+pyu10055@users.noreply.github.com>
This commit is contained in:
parent
99f6da4a1e
commit
e94d083898
|
|
@ -0,0 +1,228 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2022 Google LLC.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* DUMP_LEVEL.BIGDIFF: dumping when difference is greater than the default
|
||||
* epsilon. DUMP_LEVEL.ANYDIFF: dumping when difference is greater than 0.
|
||||
*/
|
||||
const DUMP_LEVEL = {
|
||||
BIGDIFF: 0,
|
||||
ANYDIFF: 1,
|
||||
};
|
||||
|
||||
function compareData(data1, data2, level = DUMP_LEVEL.BIGDIFF) {
|
||||
let epsilon = level == DUMP_LEVEL.ANYDIFF ? 0 : -1;
|
||||
let match = true;
|
||||
try {
|
||||
expectObjectsClose(data1, data2, epsilon);
|
||||
} catch (e) {
|
||||
match = false;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
function getGraphModel(model) {
|
||||
if (model instanceof tf.GraphModel) {
|
||||
return model;
|
||||
} else if (model.model instanceof tf.GraphModel) {
|
||||
return model.model;
|
||||
} else if (
|
||||
model.baseModel && model.baseModel.model instanceof tf.GraphModel) {
|
||||
return model.baseModel.model;
|
||||
} else {
|
||||
console.warn(`Model doesn't support dump!`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function getIntermediateTensorInfo(tensorsMap) {
|
||||
if (!tensorsMap) {
|
||||
return;
|
||||
}
|
||||
const jsonObject = {};
|
||||
const keysOfTensors = Object.keys(tensorsMap);
|
||||
for (let i = 0; i < keysOfTensors.length; i++) {
|
||||
const key = keysOfTensors[i];
|
||||
jsonObject[key] = [];
|
||||
for (let j = 0; j < tensorsMap[key].length; j++) {
|
||||
if (tensorsMap[key][j] == null) {
|
||||
continue;
|
||||
}
|
||||
// For universal-sentence-encoder, its inputs are disposed by model.
|
||||
try {
|
||||
const data = await (tensorsMap[key][j]).data();
|
||||
jsonObject[key].push({
|
||||
value: data,
|
||||
shape: tensorsMap[key][j].shape,
|
||||
dtype: tensorsMap[key][j].dtype
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(`${keysOfTensors[i]} ` + e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
return jsonObject;
|
||||
}
|
||||
|
||||
async function saveObjectsToFile(jsonObjects, prefix) {
|
||||
let newPrefix = '';
|
||||
if (prefix !== '') {
|
||||
newPrefix = `${prefix.replace(/\//g, '-')}_`;
|
||||
}
|
||||
const backends = Object.keys(jsonObjects);
|
||||
if (Object.keys(jsonObjects[backends[0]]).length == 0) {
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < backends.length; i++) {
|
||||
const object = jsonObjects[backends[i]];
|
||||
const fileName = `${newPrefix}${backends[i]}.json`;
|
||||
const a = document.createElement('a');
|
||||
const file = new Blob([JSON.stringify(object)], {type: 'application/json'});
|
||||
a.href = URL.createObjectURL(file);
|
||||
a.download = fileName;
|
||||
a.click();
|
||||
// This log informs tools file has been saved.
|
||||
console.log(fileName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a NamedTensorMap from an output node name.
|
||||
* @param outputNodeName Output node name.
|
||||
* @param modelJson The parsed model.json.
|
||||
* @param dumpedJson The dumped tensor infomation (including shape, dtype,
|
||||
* value).
|
||||
*
|
||||
* @returns A NamedTensorMap.
|
||||
*/
|
||||
async function createNamedTensorMap(outputNodeName, modelJson, dumpedJson) {
|
||||
const modelNodes = modelJson['modelTopology']['node'];
|
||||
let inputs = [];
|
||||
for (let i = 0; i < modelNodes.length; i++) {
|
||||
if (outputNodeName === modelNodes[i].name && modelNodes[i].input) {
|
||||
inputs = modelNodes[i].input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// In
|
||||
// https://storage.googleapis.com/tfhub-tfjs-modules/mediapipe/tfjs-model/face_landmarks_detection/attention_mesh/1/model.json,
|
||||
// some inputs are prefixed with '^'.
|
||||
if (!inputs || inputs.length == 0 || inputs[0].startsWith('^')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let tensorMap = {};
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const key = inputs[i].split(':')[0];
|
||||
if (dumpedJson[key] == null || dumpedJson[key][0] == null) {
|
||||
console.warn('Tensor ' + key + ' is null!');
|
||||
return null;
|
||||
}
|
||||
const tensorInfo = dumpedJson[key][0];
|
||||
const tensor = tf.tensor(
|
||||
Object.values(tensorInfo.value), tensorInfo.shape, tensorInfo.dtype);
|
||||
tensorMap[key] = tensor;
|
||||
}
|
||||
|
||||
return tensorMap;
|
||||
}
|
||||
|
||||
async function predictOp(
|
||||
model, modelJson, dumpedJson, outputNodeName, backend) {
|
||||
await tf.setBackend(backend);
|
||||
const tensorMap =
|
||||
await createNamedTensorMap(outputNodeName, modelJson, dumpedJson);
|
||||
if (tensorMap == null) {
|
||||
return null;
|
||||
}
|
||||
let prediction;
|
||||
let savedKeepIntermediateTensors;
|
||||
try {
|
||||
savedKeepIntermediateTensors =
|
||||
tf.env().getBool('KEEP_INTERMEDIATE_TENSORS');
|
||||
tf.env().set('KEEP_INTERMEDIATE_TENSORS', false);
|
||||
} catch (e) {
|
||||
console.warn(e.message);
|
||||
}
|
||||
try {
|
||||
// TODO(#6861): Support tensor with type conversion.
|
||||
prediction = await model.executeAsync(tensorMap, outputNodeName);
|
||||
} catch (e) {
|
||||
tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
|
||||
console.warn(e.message);
|
||||
return null;
|
||||
}
|
||||
|
||||
const predictOpObject = await getPredictionData(prediction, true);
|
||||
tf.env().set('KEEP_INTERMEDIATE_TENSORS', savedKeepIntermediateTensors);
|
||||
return predictOpObject;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the predict results of two backends and save diffs to files.
|
||||
* @param model The loaded model.
|
||||
* @param input The actual and expected results from different backends.
|
||||
* @param prefix Used for generating dump file name.
|
||||
* @param level 0, dump big diffs. 1, dump any diffs.
|
||||
* @param length Used for controlling how many tensors will be dumped. -1 dump
|
||||
* all.
|
||||
*/
|
||||
async function dump(
|
||||
model, input, prefix = '', level = DUMP_LEVEL.BIGDIFF, length = 1) {
|
||||
const graphModel = getGraphModel(model);
|
||||
if (graphModel == null || length == 0) {
|
||||
return;
|
||||
}
|
||||
const backends = Object.keys(input);
|
||||
const actualObject = input[backends[0]];
|
||||
const expectedObject = input[backends[1]];
|
||||
const dumpActualObject = {};
|
||||
const dumpExpectedObject = {};
|
||||
const keys = Object.keys(actualObject);
|
||||
prefix = `dump_${prefix}_${level}`;
|
||||
let dumpCount = 0;
|
||||
const modelJson = graphModel.artifacts;
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
const key = keys[i];
|
||||
if (compareData(actualObject[key], expectedObject[key], level)) {
|
||||
continue;
|
||||
}
|
||||
const predictOpObject = await predictOp(
|
||||
graphModel, modelJson, expectedObject, key, backends[0]);
|
||||
const [actualOpObject, expectedOpObject] = predictOpObject ?
|
||||
[{...predictOpObject, i}, {...expectedObject[key], i}] :
|
||||
[null, null];
|
||||
if (compareData(actualOpObject, expectedOpObject, level)) {
|
||||
continue;
|
||||
}
|
||||
if (actualOpObject && expectedOpObject) {
|
||||
dumpActualObject[key] = actualOpObject;
|
||||
dumpExpectedObject[key] = expectedOpObject;
|
||||
dumpCount++;
|
||||
}
|
||||
// Break when diff count equals dumpLength to avoid downloading large file.
|
||||
if (length != -1 && dumpCount == length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const dumpData =
|
||||
{[backends[0]]: dumpActualObject, [backends[1]]: dumpExpectedObject};
|
||||
await saveObjectsToFile(dumpData, prefix);
|
||||
if (dumpCount) {
|
||||
console.log(`Total dumped ${dumpCount} item(s).`);
|
||||
}
|
||||
}
|
||||
|
|
@ -171,27 +171,18 @@ limitations under the License.
|
|||
}
|
||||
}
|
||||
|
||||
async function printTensors(tensorsMap) {
|
||||
if (!tensorsMap) {
|
||||
return;
|
||||
}
|
||||
const keysOfTensors = Object.keys(tensorsMap);
|
||||
for (let i = 0; i < keysOfTensors.length; i++) {
|
||||
console.warn(keysOfTensors[i]);
|
||||
for (let j = 0; j < tensorsMap[keysOfTensors[i]].length; j++) {
|
||||
console.warn(await (tensorsMap[keysOfTensors[i]][j]).data());
|
||||
async function predictAndGetData(predict, model, inferenceInput, enableDump) {
|
||||
const prediction = await predict(model, inferenceInput);
|
||||
let intermediateData = {};
|
||||
if (enableDump) {
|
||||
const graphModel = getGraphModel(model);
|
||||
if (graphModel) {
|
||||
intermediateData = await getIntermediateTensorInfo(graphModel.getIntermediateTensors());
|
||||
graphModel.disposeIntermediateTensors();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function predictAndGetPredictionData(predict, model, inferenceInput, debug) {
|
||||
const prediction = await predict(model, inferenceInput);
|
||||
if (debug) {
|
||||
await printTensors(model.getIntermediateTensors());
|
||||
model.disposeIntermediateTensors();
|
||||
}
|
||||
const predictionData = await getPredictionData(prediction);
|
||||
return predictionData;
|
||||
return {data: predictionData, intermediateData};
|
||||
}
|
||||
|
||||
const state = {
|
||||
|
|
@ -218,19 +209,20 @@ limitations under the License.
|
|||
await showGpuInfo();
|
||||
}
|
||||
|
||||
let match, predictionData, referenceData;
|
||||
let match, actualData, expectedData;
|
||||
await cleanUpTable();
|
||||
|
||||
// load model and run inference
|
||||
try {
|
||||
tf.setBackend('cpu');
|
||||
const expectedBackend = 'cpu';
|
||||
tf.setBackend(expectedBackend);
|
||||
await loadModelAndRecordTime();
|
||||
await showMsg('Testing correctness');
|
||||
await showInputs();
|
||||
await showCorrectnessTestParameters();
|
||||
|
||||
let inferenceInput;
|
||||
await showMsg('Runing on cpu');
|
||||
await showMsg(`Runing on ${expectedBackend}`);
|
||||
if (state.benchmark === 'custom') {
|
||||
inferenceInput = generateInputFromDef(
|
||||
state.inputs, model instanceof tf.GraphModel);
|
||||
|
|
@ -243,8 +235,9 @@ limitations under the License.
|
|||
console.warn(e.message);
|
||||
}
|
||||
|
||||
const debug = keepIntermediateTensors & (benchmarks[state.benchmark].supportDebug !== false);
|
||||
referenceData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
|
||||
const enableDump = keepIntermediateTensors & (benchmarks[state.benchmark].supportDump !== false);
|
||||
const expectedResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
|
||||
expectedData = expectedResult['data'];
|
||||
|
||||
await tf.setBackend(state.backend);
|
||||
await showMsg(`Runing on ${state.backend}`);
|
||||
|
|
@ -257,8 +250,18 @@ limitations under the License.
|
|||
tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', true);
|
||||
}
|
||||
|
||||
predictionData = await predictAndGetPredictionData(predict, model, inferenceInput, debug);
|
||||
const actualResult = await predictAndGetData(predict, model, inferenceInput, enableDump);
|
||||
actualData = actualResult['data'];
|
||||
if (enableDump) {
|
||||
const actualIntermediateObject = actualResult['intermediateData'];
|
||||
const expectedIntermediateObject = expectedResult['intermediateData'];
|
||||
|
||||
const dumpLevel = urlState.has('dumpLevel') ? Number(urlState.get('dumpLevel')) : 0;
|
||||
const dumpLength = urlState.has('dumpLength') ? Number(urlState.get('dumpLength')) : 1;
|
||||
const dumpPrefix = state.benchmark + '_'+ state.architecture + '_' + state.inputType + '_' + state.inputSize;
|
||||
const dumpInput = {[state.backend] : actualIntermediateObject, [expectedBackend] : expectedIntermediateObject};
|
||||
await dump(model, dumpInput, dumpPrefix, dumpLevel, dumpLength);
|
||||
}
|
||||
if (state.backend === 'webgl' || state.backend === 'webgpu') {
|
||||
tf.env().set('CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU', savedWillReadFrequently);
|
||||
}
|
||||
|
|
@ -270,7 +273,7 @@ limitations under the License.
|
|||
// compare results
|
||||
try {
|
||||
await showMsg(null);
|
||||
expectObjectsClose(predictionData, referenceData);
|
||||
expectObjectsClose(actualData, expectedData);
|
||||
match = true;
|
||||
} catch (e) {
|
||||
match = false;
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ async function loadTFJS(localBuild) {
|
|||
'../benchmark_util.js',
|
||||
'./util.js',
|
||||
'./index.js',
|
||||
'./dump.js',
|
||||
]);
|
||||
|
||||
for (let url of urls) {
|
||||
|
|
|
|||
|
|
@ -15,29 +15,35 @@
|
|||
* =============================================================================
|
||||
*/
|
||||
|
||||
async function convertTensorToData(tensor) {
|
||||
|
||||
async function convertTensorToData(tensor, needInfo = false) {
|
||||
const data = await tensor.data();
|
||||
|
||||
tensor.dispose();
|
||||
if (needInfo) {
|
||||
return {value: data, shape: tensor.shape, dtype: tensor.dtype};
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
async function getPredictionData(output) {
|
||||
async function getPredictionData(output, needInfo = false) {
|
||||
if (output instanceof Promise) {
|
||||
output = await output;
|
||||
}
|
||||
|
||||
if (output instanceof tf.Tensor) {
|
||||
output = await convertTensorToData(output);
|
||||
output = [await convertTensorToData(output, needInfo)];
|
||||
} else if (Array.isArray(output)) {
|
||||
for (let i = 0; i < output.length; i++) {
|
||||
if (output[i] instanceof tf.Tensor) {
|
||||
output[i] = await convertTensorToData(output[i]);
|
||||
output[i] = await convertTensorToData(output[i], needInfo);
|
||||
}
|
||||
}
|
||||
} else if (output != null && typeof output === 'object') {
|
||||
for (const property in output) {
|
||||
if (output[property] instanceof tf.Tensor) {
|
||||
output[property] = await convertTensorToData(output[property]);
|
||||
output[property] =
|
||||
await convertTensorToData(output[property], needInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -117,8 +123,8 @@ function expectObjectsPredicate(actual, expected, epsilon, predicate) {
|
|||
return true;
|
||||
}
|
||||
|
||||
function expectObjectsClose(actual, expected, epsilon) {
|
||||
if (epsilon == null) {
|
||||
function expectObjectsClose(actual, expected, epsilon = -1) {
|
||||
if (epsilon === -1) {
|
||||
epsilon = tf.test_util.testEpsilon();
|
||||
}
|
||||
expectObjectsPredicate(
|
||||
|
|
@ -159,7 +165,7 @@ function expectArraysPredicateFuzzy(actual, expected, predicate, errorRate) {
|
|||
|
||||
// TODO: support relative comparison for array.
|
||||
function expectArraysClose(actual, expected, epsilon, key) {
|
||||
if (epsilon == null) {
|
||||
if (epsilon === -1) {
|
||||
epsilon = tf.test_util.testEpsilon();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ const benchmarks = {
|
|||
'Coco-SSD': {
|
||||
type: 'GraphModel',
|
||||
// The model has has the dynamic ops, so it is supposed to use executeAsync.
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
architectures: ['MobileNetV2', 'MobileNetV1', 'liteMobileNetV2'],
|
||||
load: async (inputResolution = 227, modelArchitecture = 'MobileNetV2') => {
|
||||
const tfliteBased = modelArchitecture.split('MobileNetV')[0];
|
||||
|
|
@ -327,7 +327,7 @@ const benchmarks = {
|
|||
},
|
||||
'AutoML Image': {
|
||||
type: 'GraphModel',
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async () => {
|
||||
const url =
|
||||
'https://storage.googleapis.com/tfjs-testing/tfjs-automl/img_classification/model.json';
|
||||
|
|
@ -340,7 +340,7 @@ const benchmarks = {
|
|||
},
|
||||
'AutoML Object': {
|
||||
type: 'GraphModel',
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async () => {
|
||||
const url =
|
||||
'https://storage.googleapis.com/tfjs-testing/tfjs-automl/object_detection/model.json';
|
||||
|
|
@ -355,7 +355,7 @@ const benchmarks = {
|
|||
},
|
||||
'USE - batchsize 30': {
|
||||
type: 'GraphModel',
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async () => {
|
||||
return use.load();
|
||||
},
|
||||
|
|
@ -369,7 +369,7 @@ const benchmarks = {
|
|||
},
|
||||
'USE - batchsize 1': {
|
||||
type: 'GraphModel',
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async () => {
|
||||
return use.load();
|
||||
},
|
||||
|
|
@ -384,7 +384,7 @@ const benchmarks = {
|
|||
'TextToxicity': {
|
||||
type: 'GraphModel',
|
||||
// The model has has the dynamic ops, so it is supposed to use executeAsync.
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async () => {
|
||||
const url =
|
||||
'https://storage.googleapis.com/tfhub-tfjs-modules/tensorflow/tfjs-model/toxicity/1/default/1/model.json';
|
||||
|
|
@ -425,7 +425,7 @@ const benchmarks = {
|
|||
inputSizes: [128, 256, 512, 1024],
|
||||
architectures: ['MobileNetV1', 'ResNet50'],
|
||||
inputTypes: ['image', 'tensor'],
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
load: async (
|
||||
inputResolution = 128, modelArchitecture = 'MobileNetV1',
|
||||
inputType = 'image') => {
|
||||
|
|
@ -461,7 +461,7 @@ const benchmarks = {
|
|||
},
|
||||
'bodypix': {
|
||||
type: 'GraphModel',
|
||||
supportDebug: false,
|
||||
supportDump: false,
|
||||
// The ratio to the default camera size [480, 640].
|
||||
inputSizes: [0.25, 0.5, 0.75, 1.0],
|
||||
architectures: ['ResNet50'],
|
||||
|
|
|
|||
Loading…
Reference in New Issue