mirror of https://github.com/nodejs/node.git
esm: use Undici/`fetch` `data:` URL parser
Fixes: https://github.com/nodejs/node/issues/53775 PR-URL: https://github.com/nodejs/node/pull/54748 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
dcc2ed944f
commit
6c85d40593
|
@ -0,0 +1,352 @@
|
|||
'use strict';
|
||||
|
||||
const {
|
||||
RegExpPrototypeExec,
|
||||
RegExpPrototypeSymbolReplace,
|
||||
StringFromCharCodeApply,
|
||||
StringPrototypeCharCodeAt,
|
||||
StringPrototypeIndexOf,
|
||||
StringPrototypeSlice,
|
||||
TypedArrayPrototypeSubarray,
|
||||
Uint8Array,
|
||||
} = primordials;
|
||||
|
||||
const assert = require('internal/assert');
|
||||
const { Buffer } = require('buffer');
|
||||
const { MIMEType } = require('internal/mime');
|
||||
|
||||
let encoder;
|
||||
function lazyEncoder() {
|
||||
if (encoder === undefined) {
|
||||
const { TextEncoder } = require('internal/encoding');
|
||||
encoder = new TextEncoder();
|
||||
}
|
||||
|
||||
return encoder;
|
||||
}
|
||||
|
||||
const ASCII_WHITESPACE_REPLACE_REGEX = /[\u0009\u000A\u000C\u000D\u0020]/g // eslint-disable-line
|
||||
|
||||
// https://fetch.spec.whatwg.org/#data-url-processor
|
||||
/** @param {URL} dataURL */
|
||||
function dataURLProcessor(dataURL) {
|
||||
// 1. Assert: dataURL's scheme is "data".
|
||||
assert(dataURL.protocol === 'data:');
|
||||
|
||||
// 2. Let input be the result of running the URL
|
||||
// serializer on dataURL with exclude fragment
|
||||
// set to true.
|
||||
let input = URLSerializer(dataURL, true);
|
||||
|
||||
// 3. Remove the leading "data:" string from input.
|
||||
input = StringPrototypeSlice(input, 5);
|
||||
|
||||
// 4. Let position point at the start of input.
|
||||
const position = { position: 0 };
|
||||
|
||||
// 5. Let mimeType be the result of collecting a
|
||||
// sequence of code points that are not equal
|
||||
// to U+002C (,), given position.
|
||||
let mimeType = collectASequenceOfCodePointsFast(
|
||||
',',
|
||||
input,
|
||||
position,
|
||||
);
|
||||
|
||||
// 6. Strip leading and trailing ASCII whitespace
|
||||
// from mimeType.
|
||||
// Undici implementation note: we need to store the
|
||||
// length because if the mimetype has spaces removed,
|
||||
// the wrong amount will be sliced from the input in
|
||||
// step #9
|
||||
const mimeTypeLength = mimeType.length;
|
||||
mimeType = removeASCIIWhitespace(mimeType, true, true);
|
||||
|
||||
// 7. If position is past the end of input, then
|
||||
// return failure
|
||||
if (position.position >= input.length) {
|
||||
return 'failure';
|
||||
}
|
||||
|
||||
// 8. Advance position by 1.
|
||||
position.position++;
|
||||
|
||||
// 9. Let encodedBody be the remainder of input.
|
||||
const encodedBody = StringPrototypeSlice(input, mimeTypeLength + 1);
|
||||
|
||||
// 10. Let body be the percent-decoding of encodedBody.
|
||||
let body = stringPercentDecode(encodedBody);
|
||||
|
||||
// 11. If mimeType ends with U+003B (;), followed by
|
||||
// zero or more U+0020 SPACE, followed by an ASCII
|
||||
// case-insensitive match for "base64", then:
|
||||
if (RegExpPrototypeExec(/;(\u0020){0,}base64$/i, mimeType) !== null) {
|
||||
// 1. Let stringBody be the isomorphic decode of body.
|
||||
const stringBody = isomorphicDecode(body);
|
||||
|
||||
// 2. Set body to the forgiving-base64 decode of
|
||||
// stringBody.
|
||||
body = forgivingBase64(stringBody);
|
||||
|
||||
// 3. If body is failure, then return failure.
|
||||
if (body === 'failure') {
|
||||
return 'failure';
|
||||
}
|
||||
|
||||
// 4. Remove the last 6 code points from mimeType.
|
||||
mimeType = StringPrototypeSlice(mimeType, 0, -6);
|
||||
|
||||
// 5. Remove trailing U+0020 SPACE code points from mimeType,
|
||||
// if any.
|
||||
mimeType = RegExpPrototypeSymbolReplace(/(\u0020)+$/, mimeType, '');
|
||||
|
||||
// 6. Remove the last U+003B (;) code point from mimeType.
|
||||
mimeType = StringPrototypeSlice(mimeType, 0, -1);
|
||||
}
|
||||
|
||||
// 12. If mimeType starts with U+003B (;), then prepend
|
||||
// "text/plain" to mimeType.
|
||||
if (mimeType[0] === ';') {
|
||||
mimeType = 'text/plain' + mimeType;
|
||||
}
|
||||
|
||||
// 13. Let mimeTypeRecord be the result of parsing
|
||||
// mimeType.
|
||||
// 14. If mimeTypeRecord is failure, then set
|
||||
// mimeTypeRecord to text/plain;charset=US-ASCII.
|
||||
let mimeTypeRecord;
|
||||
|
||||
try {
|
||||
mimeTypeRecord = new MIMEType(mimeType);
|
||||
} catch {
|
||||
mimeTypeRecord = new MIMEType('text/plain;charset=US-ASCII');
|
||||
}
|
||||
|
||||
// 15. Return a new data: URL struct whose MIME
|
||||
// type is mimeTypeRecord and body is body.
|
||||
// https://fetch.spec.whatwg.org/#data-url-struct
|
||||
return { mimeType: mimeTypeRecord, body };
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-url-serializer
|
||||
/**
|
||||
* @param {URL} url
|
||||
* @param {boolean} excludeFragment
|
||||
*/
|
||||
function URLSerializer(url, excludeFragment = false) {
|
||||
const { href } = url;
|
||||
|
||||
if (!excludeFragment) {
|
||||
return href;
|
||||
}
|
||||
|
||||
const hashLength = url.hash.length;
|
||||
const serialized = hashLength === 0 ? href : StringPrototypeSlice(href, 0, href.length - hashLength);
|
||||
|
||||
if (!hashLength && href[href.length - 1] === '#') {
|
||||
return StringPrototypeSlice(serialized, 0, -1);
|
||||
}
|
||||
|
||||
return serialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* A faster collectASequenceOfCodePoints that only works when comparing a single character.
|
||||
* @param {string} char
|
||||
* @param {string} input
|
||||
* @param {{ position: number }} position
|
||||
*/
|
||||
function collectASequenceOfCodePointsFast(char, input, position) {
|
||||
const idx = StringPrototypeIndexOf(input, char, position.position);
|
||||
const start = position.position;
|
||||
|
||||
if (idx === -1) {
|
||||
position.position = input.length;
|
||||
return StringPrototypeSlice(input, start);
|
||||
}
|
||||
|
||||
position.position = idx;
|
||||
return StringPrototypeSlice(input, start, position.position);
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#string-percent-decode
|
||||
/** @param {string} input */
|
||||
function stringPercentDecode(input) {
|
||||
// 1. Let bytes be the UTF-8 encoding of input.
|
||||
const bytes = lazyEncoder().encode(input);
|
||||
|
||||
// 2. Return the percent-decoding of bytes.
|
||||
return percentDecode(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} byte
|
||||
*/
|
||||
function isHexCharByte(byte) {
|
||||
// 0-9 A-F a-f
|
||||
return (byte >= 0x30 && byte <= 0x39) || (byte >= 0x41 && byte <= 0x46) || (byte >= 0x61 && byte <= 0x66);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} byte
|
||||
*/
|
||||
function hexByteToNumber(byte) {
|
||||
return (
|
||||
// 0-9
|
||||
byte >= 0x30 && byte <= 0x39 ?
|
||||
(byte - 48) :
|
||||
// Convert to uppercase
|
||||
// ((byte & 0xDF) - 65) + 10
|
||||
((byte & 0xDF) - 55)
|
||||
);
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#percent-decode
|
||||
/** @param {Uint8Array} input */
|
||||
function percentDecode(input) {
|
||||
const length = input.length;
|
||||
// 1. Let output be an empty byte sequence.
|
||||
/** @type {Uint8Array} */
|
||||
const output = new Uint8Array(length);
|
||||
let j = 0;
|
||||
// 2. For each byte byte in input:
|
||||
for (let i = 0; i < length; ++i) {
|
||||
const byte = input[i];
|
||||
|
||||
// 1. If byte is not 0x25 (%), then append byte to output.
|
||||
if (byte !== 0x25) {
|
||||
output[j++] = byte;
|
||||
|
||||
// 2. Otherwise, if byte is 0x25 (%) and the next two bytes
|
||||
// after byte in input are not in the ranges
|
||||
// 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
|
||||
// and 0x61 (a) to 0x66 (f), all inclusive, append byte
|
||||
// to output.
|
||||
} else if (
|
||||
byte === 0x25 &&
|
||||
!(isHexCharByte(input[i + 1]) && isHexCharByte(input[i + 2]))
|
||||
) {
|
||||
output[j++] = 0x25;
|
||||
|
||||
// 3. Otherwise:
|
||||
} else {
|
||||
// 1. Let bytePoint be the two bytes after byte in input,
|
||||
// decoded, and then interpreted as hexadecimal number.
|
||||
// 2. Append a byte whose value is bytePoint to output.
|
||||
output[j++] = (hexByteToNumber(input[i + 1]) << 4) | hexByteToNumber(input[i + 2]);
|
||||
|
||||
// 3. Skip the next two bytes in input.
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Return output.
|
||||
return length === j ? output : TypedArrayPrototypeSubarray(output, 0, j);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#forgiving-base64-decode
|
||||
/** @param {string} data */
|
||||
function forgivingBase64(data) {
|
||||
// 1. Remove all ASCII whitespace from data.
|
||||
data = RegExpPrototypeSymbolReplace(ASCII_WHITESPACE_REPLACE_REGEX, data, '');
|
||||
|
||||
let dataLength = data.length;
|
||||
// 2. If data's code point length divides by 4 leaving
|
||||
// no remainder, then:
|
||||
if (dataLength % 4 === 0) {
|
||||
// 1. If data ends with one or two U+003D (=) code points,
|
||||
// then remove them from data.
|
||||
if (data[dataLength - 1] === '=') {
|
||||
--dataLength;
|
||||
if (data[dataLength - 1] === '=') {
|
||||
--dataLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. If data's code point length divides by 4 leaving
|
||||
// a remainder of 1, then return failure.
|
||||
if (dataLength % 4 === 1) {
|
||||
return 'failure';
|
||||
}
|
||||
|
||||
// 4. If data contains a code point that is not one of
|
||||
// U+002B (+)
|
||||
// U+002F (/)
|
||||
// ASCII alphanumeric
|
||||
// then return failure.
|
||||
if (RegExpPrototypeExec(/[^+/0-9A-Za-z]/, data.length === dataLength ? data : StringPrototypeSlice(data, 0, dataLength)) !== null) {
|
||||
return 'failure';
|
||||
}
|
||||
|
||||
const buffer = Buffer.from(data, 'base64');
|
||||
return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see https://infra.spec.whatwg.org/#ascii-whitespace
|
||||
* @param {number} char
|
||||
*/
|
||||
function isASCIIWhitespace(char) {
|
||||
// "\r\n\t\f "
|
||||
return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x00c || char === 0x020;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
|
||||
* @param {string} str
|
||||
* @param {boolean} [leading=true]
|
||||
* @param {boolean} [trailing=true]
|
||||
*/
|
||||
function removeASCIIWhitespace(str, leading = true, trailing = true) {
|
||||
return removeChars(str, leading, trailing, isASCIIWhitespace);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} str
|
||||
* @param {boolean} leading
|
||||
* @param {boolean} trailing
|
||||
* @param {(charCode: number) => boolean} predicate
|
||||
*/
|
||||
function removeChars(str, leading, trailing, predicate) {
|
||||
let lead = 0;
|
||||
let trail = str.length - 1;
|
||||
|
||||
if (leading) {
|
||||
while (lead < str.length && predicate(StringPrototypeCharCodeAt(str, lead))) lead++;
|
||||
}
|
||||
|
||||
if (trailing) {
|
||||
while (trail > 0 && predicate(StringPrototypeCharCodeAt(str, trail))) trail--;
|
||||
}
|
||||
|
||||
return lead === 0 && trail === str.length - 1 ? str : StringPrototypeSlice(str, lead, trail + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see https://infra.spec.whatwg.org/#isomorphic-decode
|
||||
* @param {Uint8Array} input
|
||||
* @returns {string}
|
||||
*/
|
||||
function isomorphicDecode(input) {
|
||||
// 1. To isomorphic decode a byte sequence input, return a string whose code point
|
||||
// length is equal to input's length and whose code points have the same values
|
||||
// as the values of input's bytes, in the same order.
|
||||
const length = input.length;
|
||||
if ((2 << 15) - 1 > length) {
|
||||
return StringFromCharCodeApply(input);
|
||||
}
|
||||
let result = ''; let i = 0;
|
||||
let addition = (2 << 15) - 1;
|
||||
while (i < length) {
|
||||
if (i + addition > length) {
|
||||
addition = length - i;
|
||||
}
|
||||
result += StringFromCharCodeApply(TypedArrayPrototypeSubarray(input, i, i += addition));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
dataURLProcessor,
|
||||
};
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
const {
|
||||
RegExpPrototypeExec,
|
||||
decodeURIComponent,
|
||||
} = primordials;
|
||||
const { kEmptyObject } = require('internal/util');
|
||||
|
||||
|
@ -27,7 +26,9 @@ const {
|
|||
ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING,
|
||||
} = require('internal/errors').codes;
|
||||
|
||||
const DATA_URL_PATTERN = /^[^/]+\/[^,;]+(?:[^,]*?)(;base64)?,([\s\S]*)$/;
|
||||
const {
|
||||
dataURLProcessor,
|
||||
} = require('internal/data_url');
|
||||
|
||||
/**
|
||||
* @param {URL} url URL to the module
|
||||
|
@ -42,12 +43,11 @@ async function getSource(url, context) {
|
|||
const { readFile: readFileAsync } = require('internal/fs/promises').exports;
|
||||
source = await readFileAsync(url);
|
||||
} else if (protocol === 'data:') {
|
||||
const match = RegExpPrototypeExec(DATA_URL_PATTERN, url.pathname);
|
||||
if (!match) {
|
||||
throw new ERR_INVALID_URL(responseURL);
|
||||
const result = dataURLProcessor(url);
|
||||
if (result === 'failure') {
|
||||
throw new ERR_INVALID_URL(responseURL, null);
|
||||
}
|
||||
const { 1: base64, 2: body } = match;
|
||||
source = BufferFrom(decodeURIComponent(body), base64 ? 'base64' : 'utf8');
|
||||
source = BufferFrom(result.body);
|
||||
} else {
|
||||
const supportedSchemes = ['file', 'data'];
|
||||
throw new ERR_UNSUPPORTED_ESM_URL_SCHEME(url, supportedSchemes);
|
||||
|
@ -67,12 +67,11 @@ function getSourceSync(url, context) {
|
|||
if (protocol === 'file:') {
|
||||
source = readFileSync(url);
|
||||
} else if (protocol === 'data:') {
|
||||
const match = RegExpPrototypeExec(DATA_URL_PATTERN, url.pathname);
|
||||
if (!match) {
|
||||
const result = dataURLProcessor(url);
|
||||
if (result === 'failure') {
|
||||
throw new ERR_INVALID_URL(responseURL);
|
||||
}
|
||||
const { 1: base64, 2: body } = match;
|
||||
source = BufferFrom(decodeURIComponent(body), base64 ? 'base64' : 'utf8');
|
||||
source = BufferFrom(result.body);
|
||||
} else {
|
||||
const supportedSchemes = ['file', 'data'];
|
||||
throw new ERR_UNSUPPORTED_ESM_URL_SCHEME(url, supportedSchemes);
|
||||
|
|
|
@ -96,12 +96,7 @@ function createBase64URL(mime, body) {
|
|||
{
|
||||
const body = 'null';
|
||||
const plainESMURL = createURL('invalid', body);
|
||||
try {
|
||||
await import(plainESMURL);
|
||||
common.mustNotCall()();
|
||||
} catch (e) {
|
||||
assert.strictEqual(e.code, 'ERR_INVALID_URL');
|
||||
}
|
||||
await assert.rejects(import(plainESMURL), { code: 'ERR_UNKNOWN_MODULE_FORMAT' });
|
||||
}
|
||||
{
|
||||
const plainESMURL = 'data:text/javascript,export%20default%202';
|
||||
|
@ -112,4 +107,8 @@ function createBase64URL(mime, body) {
|
|||
const plainESMURL = `data:text/javascript,${encodeURIComponent(`import ${JSON.stringify(fixtures.fileURL('es-module-url', 'empty.js'))}`)}`;
|
||||
await import(plainESMURL);
|
||||
}
|
||||
{
|
||||
const plainESMURL = 'data:text/javascript,var x = "hello world?"';
|
||||
await import(plainESMURL);
|
||||
}
|
||||
})().then(common.mustCall());
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
'use strict';
|
||||
// Flags: --expose-internals
|
||||
|
||||
require('../common');
|
||||
const fixtures = require('../common/fixtures');
|
||||
const assert = require('node:assert');
|
||||
const { test } = require('node:test');
|
||||
const { dataURLProcessor } = require('internal/data_url');
|
||||
|
||||
// https://github.com/web-platform-tests/wpt/blob/7c79d998ff42e52de90290cb847d1b515b3b58f7/fetch/data-urls/processing.any.js
|
||||
test('parsing data URLs', async () => {
|
||||
const tests = require(fixtures.path('wpt/fetch/data-urls/resources/data-urls.json'));
|
||||
|
||||
for (let i = 0; i < tests.length; i++) {
|
||||
const input = tests[i][0];
|
||||
const expectedMimeType = tests[i][1];
|
||||
const expectedBody = expectedMimeType !== null ? new Uint8Array(tests[i][2]) : null;
|
||||
|
||||
if (!URL.canParse(input)) {
|
||||
assert.strictEqual(expectedMimeType, null);
|
||||
} else if (expectedMimeType === null) {
|
||||
assert.strictEqual(dataURLProcessor(URL.parse(input)), 'failure');
|
||||
} else {
|
||||
const { mimeType, body } = dataURLProcessor(new URL(input));
|
||||
|
||||
assert.deepStrictEqual(expectedBody, body);
|
||||
assert.deepStrictEqual(expectedMimeType, mimeType.toString());
|
||||
}
|
||||
}
|
||||
});
|
Loading…
Reference in New Issue