mirror of https://github.com/nodejs/node.git
util: add fast path for Latin1 decoding
PR-URL: https://github.com/nodejs/node/pull/55275 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
This commit is contained in:
parent
52dfe5af4b
commit
9eccd7dba9
|
@ -3,7 +3,7 @@
|
|||
const common = require('../common.js');
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
|
||||
encoding: ['utf-8', 'windows-1252', 'iso-8859-3'],
|
||||
ignoreBOM: [0, 1],
|
||||
fatal: [0, 1],
|
||||
len: [256, 1024 * 16, 1024 * 128],
|
||||
|
|
|
@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
|
|||
const kEncoder = Symbol('encoder');
|
||||
const kFatal = Symbol('kFatal');
|
||||
const kUTF8FastPath = Symbol('kUTF8FastPath');
|
||||
const kLatin1FastPath = Symbol('kLatin1FastPath');
|
||||
const kIgnoreBOM = Symbol('kIgnoreBOM');
|
||||
|
||||
const {
|
||||
|
@ -55,6 +56,7 @@ const {
|
|||
encodeIntoResults,
|
||||
encodeUtf8String,
|
||||
decodeUTF8,
|
||||
decodeLatin1,
|
||||
} = binding;
|
||||
|
||||
const { Buffer } = require('buffer');
|
||||
|
@ -419,9 +421,10 @@ function makeTextDecoderICU() {
|
|||
this[kFatal] = Boolean(options?.fatal);
|
||||
// Only support fast path for UTF-8.
|
||||
this[kUTF8FastPath] = enc === 'utf-8';
|
||||
this[kLatin1FastPath] = enc === 'windows-1252';
|
||||
this[kHandle] = undefined;
|
||||
|
||||
if (!this[kUTF8FastPath]) {
|
||||
if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
|
||||
this.#prepareConverter();
|
||||
}
|
||||
}
|
||||
|
@ -438,11 +441,16 @@ function makeTextDecoderICU() {
|
|||
validateDecoder(this);
|
||||
|
||||
this[kUTF8FastPath] &&= !(options?.stream);
|
||||
this[kLatin1FastPath] &&= !(options?.stream);
|
||||
|
||||
if (this[kUTF8FastPath]) {
|
||||
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
|
||||
}
|
||||
|
||||
if (this[kLatin1FastPath]) {
|
||||
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
|
||||
}
|
||||
|
||||
this.#prepareConverter();
|
||||
|
||||
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "encoding_binding.h"
|
||||
#include "ada.h"
|
||||
#include "env-inl.h"
|
||||
#include "node_buffer.h"
|
||||
#include "node_errors.h"
|
||||
#include "node_external_reference.h"
|
||||
#include "simdutf.h"
|
||||
|
@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
|
|||
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
|
||||
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
|
||||
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
|
||||
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
|
||||
}
|
||||
|
||||
void BindingData::CreatePerContextProperties(Local<Object> target,
|
||||
|
@ -243,6 +245,50 @@ void BindingData::RegisterTimerExternalReferences(
|
|||
registry->Register(DecodeUTF8);
|
||||
registry->Register(ToASCII);
|
||||
registry->Register(ToUnicode);
|
||||
registry->Register(DecodeLatin1);
|
||||
}
|
||||
|
||||
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
|
||||
CHECK_GE(args.Length(), 1);
|
||||
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
|
||||
args[0]->IsArrayBufferView())) {
|
||||
return node::THROW_ERR_INVALID_ARG_TYPE(
|
||||
env->isolate(),
|
||||
"The \"input\" argument must be an instance of ArrayBuffer, "
|
||||
"SharedArrayBuffer, or ArrayBufferView.");
|
||||
}
|
||||
|
||||
bool ignore_bom = args[1]->IsTrue();
|
||||
bool has_fatal = args[2]->IsTrue();
|
||||
|
||||
ArrayBufferViewContents<uint8_t> buffer(args[0]);
|
||||
const uint8_t* data = buffer.data();
|
||||
size_t length = buffer.length();
|
||||
|
||||
if (ignore_bom && length > 0 && data[0] == 0xFF) {
|
||||
data++;
|
||||
length--;
|
||||
}
|
||||
|
||||
if (length == 0) {
|
||||
return args.GetReturnValue().SetEmptyString();
|
||||
}
|
||||
|
||||
std::string result(length * 2, '\0');
|
||||
|
||||
size_t written = simdutf::convert_latin1_to_utf8(
|
||||
reinterpret_cast<const char*>(data), length, result.data());
|
||||
|
||||
if (has_fatal && written == 0) {
|
||||
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
|
||||
env->isolate(), "The encoded data was not valid for encoding latin1");
|
||||
}
|
||||
|
||||
Local<Object> buffer_result =
|
||||
node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked();
|
||||
args.GetReturnValue().Set(buffer_result);
|
||||
}
|
||||
|
||||
} // namespace encoding_binding
|
||||
|
|
|
@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
|
|||
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
|
||||
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
#include "encoding_binding.h"
|
||||
#include "env-inl.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "node_test_fixture.h"
|
||||
#include "v8.h"
|
||||
|
||||
namespace node {
|
||||
namespace encoding_binding {
|
||||
|
||||
bool RunDecodeLatin1(Environment* env,
|
||||
Local<Value> args[],
|
||||
bool ignore_bom,
|
||||
bool has_fatal,
|
||||
Local<Value>* result) {
|
||||
Isolate* isolate = env->isolate();
|
||||
TryCatch try_catch(isolate);
|
||||
|
||||
Local<Boolean> ignoreBOMValue = Boolean::New(isolate, ignore_bom);
|
||||
Local<Boolean> fatalValue = Boolean::New(isolate, has_fatal);
|
||||
|
||||
Local<Value> updatedArgs[] = {args[0], ignoreBOMValue, fatalValue};
|
||||
|
||||
BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(updatedArgs));
|
||||
|
||||
if (try_catch.HasCaught()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*result = try_catch.Exception();
|
||||
return true;
|
||||
}
|
||||
|
||||
class EncodingBindingTest : public NodeTestFixture {};
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
|
||||
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
|
||||
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
|
||||
|
||||
String::Utf8Value utf8_result(isolate, result);
|
||||
EXPECT_STREQ(*utf8_result, "Áéó");
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
|
||||
|
||||
String::Utf8Value utf8_result(isolate, result);
|
||||
EXPECT_STREQ(*utf8_result, "");
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result));
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
|
||||
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
|
||||
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
|
||||
|
||||
String::Utf8Value utf8_result(isolate, result);
|
||||
EXPECT_STREQ(*utf8_result, "Áéó");
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF};
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(invalid_data));
|
||||
memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data));
|
||||
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(invalid_data));
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result));
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
|
||||
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
|
||||
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result));
|
||||
|
||||
String::Utf8Value utf8_result(isolate, result);
|
||||
EXPECT_STREQ(*utf8_result, "Áéó");
|
||||
}
|
||||
|
||||
TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) {
|
||||
Environment* env = CreateEnvironment();
|
||||
Isolate* isolate = env->isolate();
|
||||
HandleScope handle_scope(isolate);
|
||||
|
||||
const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3};
|
||||
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
|
||||
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
|
||||
|
||||
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
|
||||
Local<Value> args[] = {array};
|
||||
|
||||
Local<Value> result;
|
||||
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
|
||||
|
||||
String::Utf8Value utf8_result(isolate, result);
|
||||
EXPECT_STREQ(*utf8_result, "Áéó");
|
||||
}
|
||||
|
||||
} // namespace encoding_binding
|
||||
} // namespace node
|
Loading…
Reference in New Issue