mirror of https://github.com/nodejs/node.git
deps: update ada to 2.7.4
PR-URL: https://github.com/nodejs/node/pull/50815 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com>
This commit is contained in:
parent
177cfc88b9
commit
a76f3913a5
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on 2023-10-22 19:50:50 -0400. Do not edit! */
|
||||
/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */
|
||||
/* begin file src/ada.cpp */
|
||||
#include "ada.h"
|
||||
/* begin file src/checkers.cpp */
|
||||
|
@ -7,62 +7,79 @@
|
|||
namespace ada::checkers {
|
||||
|
||||
ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept {
|
||||
size_t last_dot = view.rfind('.');
|
||||
if (last_dot == view.size() - 1) {
|
||||
// The string is not empty and does not contain upper case ASCII characters.
|
||||
//
|
||||
// Optimization. To be considered as a possible ipv4, the string must end
|
||||
// with 'x' or a lowercase hex character.
|
||||
// Most of the time, this will be false so this simple check will save a lot
|
||||
// of effort.
|
||||
char last_char = view.back();
|
||||
// If the address ends with a dot, we need to prune it (special case).
|
||||
if (last_char == '.') {
|
||||
view.remove_suffix(1);
|
||||
last_dot = view.rfind('.');
|
||||
if (view.empty()) {
|
||||
return false;
|
||||
}
|
||||
last_char = view.back();
|
||||
}
|
||||
std::string_view number =
|
||||
(last_dot == std::string_view::npos) ? view : view.substr(last_dot + 1);
|
||||
if (number.empty()) {
|
||||
bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
|
||||
(last_char >= 'a' && last_char <= 'f') ||
|
||||
last_char == 'x';
|
||||
if (!possible_ipv4) {
|
||||
return false;
|
||||
}
|
||||
// From the last character, find the last dot.
|
||||
size_t last_dot = view.rfind('.');
|
||||
if (last_dot != std::string_view::npos) {
|
||||
// We have at least one dot.
|
||||
view = view.substr(last_dot + 1);
|
||||
}
|
||||
/** Optimization opportunity: we have basically identified the last number of
|
||||
the ipv4 if we return true here. We might as well parse it and have at
|
||||
least one number parsed when we get to parse_ipv4. */
|
||||
if (std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) {
|
||||
if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) {
|
||||
return true;
|
||||
}
|
||||
return (checkers::has_hex_prefix(number) &&
|
||||
std::all_of(number.begin() + 2, number.end(),
|
||||
ada::unicode::is_lowercase_hex));
|
||||
// It could be hex (0x), but not if there is a single character.
|
||||
if (view.size() == 1) {
|
||||
return false;
|
||||
}
|
||||
// It must start with 0x.
|
||||
if (!std::equal(view.begin(), view.begin() + 2, "0x")) {
|
||||
return false;
|
||||
}
|
||||
// We must allow "0x".
|
||||
if (view.size() == 2) {
|
||||
return true;
|
||||
}
|
||||
// We have 0x followed by some characters, we need to check that they are
|
||||
// hexadecimals.
|
||||
return std::all_of(view.begin() + 2, view.end(),
|
||||
ada::unicode::is_lowercase_hex);
|
||||
}
|
||||
|
||||
// for use with path_signature, we include all characters that need percent
|
||||
// encoding.
|
||||
static constexpr uint8_t path_signature_table[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
static_assert(path_signature_table[uint8_t('?')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('`')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('{')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('}')] == 1);
|
||||
//
|
||||
static_assert(path_signature_table[uint8_t(' ')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('?')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('"')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('#')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('<')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('>')] == 1);
|
||||
static_assert(path_signature_table[uint8_t('\\')] == 2);
|
||||
static_assert(path_signature_table[uint8_t('.')] == 4);
|
||||
static_assert(path_signature_table[uint8_t('%')] == 8);
|
||||
|
||||
//
|
||||
static_assert(path_signature_table[0] == 1);
|
||||
static_assert(path_signature_table[31] == 1);
|
||||
static_assert(path_signature_table[127] == 1);
|
||||
static_assert(path_signature_table[128] == 1);
|
||||
static_assert(path_signature_table[255] == 1);
|
||||
static constexpr std::array<uint8_t, 256> path_signature_table =
|
||||
[]() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
|
||||
i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d ||
|
||||
i > 0x7e) {
|
||||
result[i] = 1;
|
||||
} else if (i == 0x25) {
|
||||
result[i] = 8;
|
||||
} else if (i == 0x2e) {
|
||||
result[i] = 4;
|
||||
} else if (i == 0x5c) {
|
||||
result[i] = 2;
|
||||
} else {
|
||||
result[i] = 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
ada_really_inline constexpr uint8_t path_signature(
|
||||
std::string_view input) noexcept {
|
||||
|
@ -9912,56 +9929,36 @@ ada_really_inline bool has_tabs_or_newline(
|
|||
// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
|
||||
// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or
|
||||
// U+007C (|).
|
||||
constexpr static bool is_forbidden_host_code_point_table[] = {
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
static_assert(sizeof(is_forbidden_host_code_point_table) == 256);
|
||||
constexpr static std::array<uint8_t, 256> is_forbidden_host_code_point_table =
|
||||
[]() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<',
|
||||
'>', '?', '@', '[', '\\', ']', '^', '|'}) {
|
||||
result[c] = true;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
ada_really_inline constexpr bool is_forbidden_host_code_point(
|
||||
const char c) noexcept {
|
||||
return is_forbidden_host_code_point_table[uint8_t(c)];
|
||||
}
|
||||
|
||||
static_assert(unicode::is_forbidden_host_code_point('\0'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('\t'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('\n'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('\r'));
|
||||
static_assert(unicode::is_forbidden_host_code_point(' '));
|
||||
static_assert(unicode::is_forbidden_host_code_point('#'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('/'));
|
||||
static_assert(unicode::is_forbidden_host_code_point(':'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('?'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('@'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('['));
|
||||
static_assert(unicode::is_forbidden_host_code_point('?'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('<'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('>'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('\\'));
|
||||
static_assert(unicode::is_forbidden_host_code_point(']'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('^'));
|
||||
static_assert(unicode::is_forbidden_host_code_point('|'));
|
||||
|
||||
constexpr static uint8_t is_forbidden_domain_code_point_table[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
constexpr static std::array<uint8_t, 256> is_forbidden_domain_code_point_table =
|
||||
[]() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<',
|
||||
'>', '?', '@', '[', '\\', ']', '^', '|', '%'}) {
|
||||
result[c] = true;
|
||||
}
|
||||
for (uint8_t c = 0; c <= 32; c++) {
|
||||
result[c] = true;
|
||||
}
|
||||
for (size_t c = 127; c < 255; c++) {
|
||||
result[c] = true;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);
|
||||
|
||||
|
@ -9986,22 +9983,24 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
|
|||
return accumulator;
|
||||
}
|
||||
|
||||
constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
|
||||
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
|
||||
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);
|
||||
constexpr static std::array<uint8_t, 256>
|
||||
is_forbidden_domain_code_point_table_or_upper = []() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<',
|
||||
'>', '?', '@', '[', '\\', ']', '^', '|', '%'}) {
|
||||
result[c] = 1;
|
||||
}
|
||||
for (uint8_t c = 'A'; c <= 'Z'; c++) {
|
||||
result[c] = 2;
|
||||
}
|
||||
for (uint8_t c = 0; c <= 32; c++) {
|
||||
result[c] = 1;
|
||||
}
|
||||
for (size_t c = 127; c < 255; c++) {
|
||||
result[c] = 1;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
ada_really_inline constexpr uint8_t
|
||||
contains_forbidden_domain_code_point_or_upper(const char* input,
|
||||
|
@ -10025,41 +10024,22 @@ contains_forbidden_domain_code_point_or_upper(const char* input,
|
|||
return accumulator;
|
||||
}
|
||||
|
||||
static_assert(unicode::is_forbidden_domain_code_point('%'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\0'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\t'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\n'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\r'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point(' '));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('#'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('/'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point(':'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('?'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('@'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('['));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('?'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('<'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('>'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('\\'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point(']'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('^'));
|
||||
static_assert(unicode::is_forbidden_domain_code_point('|'));
|
||||
|
||||
constexpr static bool is_alnum_plus_table[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static_assert(sizeof(is_alnum_plus_table) == 256);
|
||||
// std::isalnum(c) || c == '+' || c == '-' || c == '.') is true for
|
||||
constexpr static std::array<bool, 256> is_alnum_plus_table = []() constexpr {
|
||||
std::array<bool, 256> result{};
|
||||
for (size_t c = 0; c < 256; c++) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
result[c] = true;
|
||||
} else if (c >= 'a' && c <= 'z') {
|
||||
result[c] = true;
|
||||
} else if (c >= 'A' && c <= 'Z') {
|
||||
result[c] = true;
|
||||
} else if (c == '+' || c == '-' || c == '.') {
|
||||
result[c] = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept {
|
||||
return is_alnum_plus_table[uint8_t(c)];
|
||||
|
@ -10067,13 +10047,6 @@ ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept {
|
|||
// following under most compilers: return
|
||||
// return (std::isalnum(c) || c == '+' || c == '-' || c == '.');
|
||||
}
|
||||
static_assert(unicode::is_alnum_plus('+'));
|
||||
static_assert(unicode::is_alnum_plus('-'));
|
||||
static_assert(unicode::is_alnum_plus('.'));
|
||||
static_assert(unicode::is_alnum_plus('0'));
|
||||
static_assert(unicode::is_alnum_plus('1'));
|
||||
static_assert(unicode::is_alnum_plus('a'));
|
||||
static_assert(unicode::is_alnum_plus('b'));
|
||||
|
||||
ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept {
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
|
||||
|
@ -10617,155 +10590,302 @@ ada_really_inline void resize(std::string_view& input, size_t pos) noexcept {
|
|||
input.remove_suffix(input.size() - pos);
|
||||
}
|
||||
|
||||
// Reverse the byte order.
|
||||
ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept {
|
||||
// performance: this often compiles to a single instruction (e.g., bswap)
|
||||
return ((((val)&0xff00000000000000ull) >> 56) |
|
||||
(((val)&0x00ff000000000000ull) >> 40) |
|
||||
(((val)&0x0000ff0000000000ull) >> 24) |
|
||||
(((val)&0x000000ff00000000ull) >> 8) |
|
||||
(((val)&0x00000000ff000000ull) << 8) |
|
||||
(((val)&0x0000000000ff0000ull) << 24) |
|
||||
(((val)&0x000000000000ff00ull) << 40) |
|
||||
(((val)&0x00000000000000ffull) << 56));
|
||||
}
|
||||
|
||||
ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept {
|
||||
// performance: under little-endian systems (most systems), this function
|
||||
// is free (just returns the input).
|
||||
#if ADA_IS_BIG_ENDIAN
|
||||
return swap_bytes(val);
|
||||
#else
|
||||
return val; // unchanged (trivial)
|
||||
#endif
|
||||
// computes the number of trailing zeroes
|
||||
// this is a private inline function only defined in this source file.
|
||||
ada_really_inline int trailing_zeroes(uint32_t input_num) noexcept {
|
||||
#ifdef ADA_REGULAR_VISUAL_STUDIO
|
||||
unsigned long ret;
|
||||
// Search the mask data from least significant bit (LSB)
|
||||
// to the most significant bit (MSB) for a set bit (1).
|
||||
_BitScanForward(&ret, input_num);
|
||||
return (int)ret;
|
||||
#else // ADA_REGULAR_VISUAL_STUDIO
|
||||
return __builtin_ctzl(input_num);
|
||||
#endif // ADA_REGULAR_VISUAL_STUDIO
|
||||
}
|
||||
|
||||
// starting at index location, this finds the next location of a character
|
||||
// :, /, \\, ? or [. If none is found, view.size() is returned.
|
||||
// For use within get_host_delimiter_location.
|
||||
#if ADA_NEON
|
||||
// The ada_make_uint8x16_t macro is necessary because Visual Studio does not
|
||||
// support direct initialization of uint8x16_t. See
|
||||
// https://developercommunity.visualstudio.com/t/error-C2078:-too-many-initializers-whe/402911?q=backend+neon
|
||||
#ifndef ada_make_uint8x16_t
|
||||
#define ada_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \
|
||||
x13, x14, x15, x16) \
|
||||
([=]() { \
|
||||
static uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \
|
||||
x9, x10, x11, x12, x13, x14, x15, x16}; \
|
||||
return vld1q_u8(array); \
|
||||
}())
|
||||
#endif
|
||||
|
||||
ada_really_inline size_t find_next_host_delimiter_special(
|
||||
std::string_view view, size_t location) noexcept {
|
||||
// performance: if you plan to call find_next_host_delimiter more than once,
|
||||
// you *really* want find_next_host_delimiter to be inlined, because
|
||||
// otherwise, the constants may get reloaded each time (bad).
|
||||
auto has_zero_byte = [](uint64_t v) {
|
||||
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
|
||||
};
|
||||
auto index_of_first_set_byte = [](uint64_t v) {
|
||||
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
|
||||
};
|
||||
auto broadcast = [](uint8_t v) -> uint64_t {
|
||||
return 0x101010101010101ull * v;
|
||||
// first check for short strings in which case we do it naively.
|
||||
if (view.size() - location < 16) { // slow path
|
||||
for (size_t i = location; i < view.size(); i++) {
|
||||
if (view[i] == ':' || view[i] == '/' || view[i] == '\\' ||
|
||||
view[i] == '?' || view[i] == '[') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
auto to_bitmask = [](uint8x16_t input) -> uint16_t {
|
||||
uint8x16_t bit_mask =
|
||||
ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
|
||||
0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
|
||||
uint8x16_t minput = vandq_u8(input, bit_mask);
|
||||
uint8x16_t tmp = vpaddq_u8(minput, minput);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
|
||||
};
|
||||
|
||||
// fast path for long strings (expected to be common)
|
||||
size_t i = location;
|
||||
uint64_t mask1 = broadcast(':');
|
||||
uint64_t mask2 = broadcast('/');
|
||||
uint64_t mask3 = broadcast('\\');
|
||||
uint64_t mask4 = broadcast('?');
|
||||
uint64_t mask5 = broadcast('[');
|
||||
// This loop will get autovectorized under many optimizing compilers,
|
||||
// so you get actually SIMD!
|
||||
for (; i + 7 < view.size(); i += 8) {
|
||||
uint64_t word{};
|
||||
// performance: the next memcpy translates into a single CPU instruction.
|
||||
memcpy(&word, view.data() + i, sizeof(word));
|
||||
// performance: on little-endian systems (most systems), this next line is
|
||||
// free.
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t xor5 = word ^ mask5;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor3) | has_zero_byte(xor4) |
|
||||
has_zero_byte(xor5);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
uint8x16_t low_mask =
|
||||
ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x01, 0x04, 0x04, 0x00, 0x00, 0x03);
|
||||
uint8x16_t high_mask =
|
||||
ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
uint8x16_t fmask = vmovq_n_u8(0xf);
|
||||
uint8x16_t zero{0};
|
||||
for (; i + 15 < view.size(); i += 16) {
|
||||
uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i);
|
||||
uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
|
||||
uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
|
||||
uint8x16_t classify = vandq_u8(lowpart, highpart);
|
||||
if (vmaxvq_u8(classify) != 0) {
|
||||
uint8x16_t is_zero = vceqq_u8(classify, zero);
|
||||
uint16_t is_non_zero = ~to_bitmask(is_zero);
|
||||
return i + trailing_zeroes(is_non_zero);
|
||||
}
|
||||
}
|
||||
|
||||
if (i < view.size()) {
|
||||
uint8x16_t word =
|
||||
vld1q_u8((const uint8_t*)view.data() + view.length() - 16);
|
||||
uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
|
||||
uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
|
||||
uint8x16_t classify = vandq_u8(lowpart, highpart);
|
||||
if (vmaxvq_u8(classify) != 0) {
|
||||
uint8x16_t is_zero = vceqq_u8(classify, zero);
|
||||
uint16_t is_non_zero = ~to_bitmask(is_zero);
|
||||
return view.length() - 16 + trailing_zeroes(is_non_zero);
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
#elif ADA_SSE2
|
||||
ada_really_inline size_t find_next_host_delimiter_special(
|
||||
std::string_view view, size_t location) noexcept {
|
||||
// first check for short strings in which case we do it naively.
|
||||
if (view.size() - location < 16) { // slow path
|
||||
for (size_t i = location; i < view.size(); i++) {
|
||||
if (view[i] == ':' || view[i] == '/' || view[i] == '\\' ||
|
||||
view[i] == '?' || view[i] == '[') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
// fast path for long strings (expected to be common)
|
||||
size_t i = location;
|
||||
const __m128i mask1 = _mm_set1_epi8(':');
|
||||
const __m128i mask2 = _mm_set1_epi8('/');
|
||||
const __m128i mask3 = _mm_set1_epi8('\\');
|
||||
const __m128i mask4 = _mm_set1_epi8('?');
|
||||
const __m128i mask5 = _mm_set1_epi8('[');
|
||||
|
||||
for (; i + 15 < view.size(); i += 16) {
|
||||
__m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i));
|
||||
__m128i m1 = _mm_cmpeq_epi8(word, mask1);
|
||||
__m128i m2 = _mm_cmpeq_epi8(word, mask2);
|
||||
__m128i m3 = _mm_cmpeq_epi8(word, mask3);
|
||||
__m128i m4 = _mm_cmpeq_epi8(word, mask4);
|
||||
__m128i m5 = _mm_cmpeq_epi8(word, mask5);
|
||||
__m128i m = _mm_or_si128(
|
||||
_mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5);
|
||||
int mask = _mm_movemask_epi8(m);
|
||||
if (mask != 0) {
|
||||
return i + trailing_zeroes(mask);
|
||||
}
|
||||
}
|
||||
if (i < view.size()) {
|
||||
uint64_t word{};
|
||||
// performance: the next memcpy translates into a function call, but
|
||||
// that is difficult to avoid. Might be a bit expensive.
|
||||
memcpy(&word, view.data() + i, view.size() - i);
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t xor5 = word ^ mask5;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor3) | has_zero_byte(xor4) |
|
||||
has_zero_byte(xor5);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
__m128i word =
|
||||
_mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16));
|
||||
__m128i m1 = _mm_cmpeq_epi8(word, mask1);
|
||||
__m128i m2 = _mm_cmpeq_epi8(word, mask2);
|
||||
__m128i m3 = _mm_cmpeq_epi8(word, mask3);
|
||||
__m128i m4 = _mm_cmpeq_epi8(word, mask4);
|
||||
__m128i m5 = _mm_cmpeq_epi8(word, mask5);
|
||||
__m128i m = _mm_or_si128(
|
||||
_mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5);
|
||||
int mask = _mm_movemask_epi8(m);
|
||||
if (mask != 0) {
|
||||
return view.length() - 16 + trailing_zeroes(mask);
|
||||
}
|
||||
}
|
||||
return view.size();
|
||||
return size_t(view.length());
|
||||
}
|
||||
#else
|
||||
// : / [ \\ ?
|
||||
static constexpr std::array<uint8_t, 256> special_host_delimiters =
|
||||
[]() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (int i : {':', '/', '[', '\\', '?'}) {
|
||||
result[i] = 1;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
// credit: @the-moisrex recommended a table-based approach
|
||||
ada_really_inline size_t find_next_host_delimiter_special(
|
||||
std::string_view view, size_t location) noexcept {
|
||||
auto const str = view.substr(location);
|
||||
for (auto pos = str.begin(); pos != str.end(); ++pos) {
|
||||
if (special_host_delimiters[(uint8_t)*pos]) {
|
||||
return pos - str.begin() + location;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
// starting at index location, this finds the next location of a character
|
||||
// :, /, ? or [. If none is found, view.size() is returned.
|
||||
// For use within get_host_delimiter_location.
|
||||
#if ADA_NEON
|
||||
ada_really_inline size_t find_next_host_delimiter(std::string_view view,
|
||||
size_t location) noexcept {
|
||||
// performance: if you plan to call find_next_host_delimiter more than once,
|
||||
// you *really* want find_next_host_delimiter to be inlined, because
|
||||
// otherwise, the constants may get reloaded each time (bad).
|
||||
auto has_zero_byte = [](uint64_t v) {
|
||||
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
|
||||
};
|
||||
auto index_of_first_set_byte = [](uint64_t v) {
|
||||
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
|
||||
};
|
||||
auto broadcast = [](uint8_t v) -> uint64_t {
|
||||
return 0x101010101010101ull * v;
|
||||
// first check for short strings in which case we do it naively.
|
||||
if (view.size() - location < 16) { // slow path
|
||||
for (size_t i = location; i < view.size(); i++) {
|
||||
if (view[i] == ':' || view[i] == '/' || view[i] == '?' ||
|
||||
view[i] == '[') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
auto to_bitmask = [](uint8x16_t input) -> uint16_t {
|
||||
uint8x16_t bit_mask =
|
||||
ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
|
||||
0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
|
||||
uint8x16_t minput = vandq_u8(input, bit_mask);
|
||||
uint8x16_t tmp = vpaddq_u8(minput, minput);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
|
||||
};
|
||||
|
||||
// fast path for long strings (expected to be common)
|
||||
size_t i = location;
|
||||
uint64_t mask1 = broadcast(':');
|
||||
uint64_t mask2 = broadcast('/');
|
||||
uint64_t mask4 = broadcast('?');
|
||||
uint64_t mask5 = broadcast('[');
|
||||
// This loop will get autovectorized under many optimizing compilers,
|
||||
// so you get actually SIMD!
|
||||
for (; i + 7 < view.size(); i += 8) {
|
||||
uint64_t word{};
|
||||
// performance: the next memcpy translates into a single CPU instruction.
|
||||
memcpy(&word, view.data() + i, sizeof(word));
|
||||
// performance: on little-endian systems (most systems), this next line is
|
||||
// free.
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t xor5 = word ^ mask5;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor4) | has_zero_byte(xor5);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
uint8x16_t low_mask =
|
||||
ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x03);
|
||||
uint8x16_t high_mask =
|
||||
ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
uint8x16_t fmask = vmovq_n_u8(0xf);
|
||||
uint8x16_t zero{0};
|
||||
for (; i + 15 < view.size(); i += 16) {
|
||||
uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i);
|
||||
uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
|
||||
uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
|
||||
uint8x16_t classify = vandq_u8(lowpart, highpart);
|
||||
if (vmaxvq_u8(classify) != 0) {
|
||||
uint8x16_t is_zero = vceqq_u8(classify, zero);
|
||||
uint16_t is_non_zero = ~to_bitmask(is_zero);
|
||||
return i + trailing_zeroes(is_non_zero);
|
||||
}
|
||||
}
|
||||
|
||||
if (i < view.size()) {
|
||||
uint8x16_t word =
|
||||
vld1q_u8((const uint8_t*)view.data() + view.length() - 16);
|
||||
uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask));
|
||||
uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4));
|
||||
uint8x16_t classify = vandq_u8(lowpart, highpart);
|
||||
if (vmaxvq_u8(classify) != 0) {
|
||||
uint8x16_t is_zero = vceqq_u8(classify, zero);
|
||||
uint16_t is_non_zero = ~to_bitmask(is_zero);
|
||||
return view.length() - 16 + trailing_zeroes(is_non_zero);
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
#elif ADA_SSE2
|
||||
ada_really_inline size_t find_next_host_delimiter(std::string_view view,
|
||||
size_t location) noexcept {
|
||||
// first check for short strings in which case we do it naively.
|
||||
if (view.size() - location < 16) { // slow path
|
||||
for (size_t i = location; i < view.size(); i++) {
|
||||
if (view[i] == ':' || view[i] == '/' || view[i] == '?' ||
|
||||
view[i] == '[') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
// fast path for long strings (expected to be common)
|
||||
size_t i = location;
|
||||
const __m128i mask1 = _mm_set1_epi8(':');
|
||||
const __m128i mask2 = _mm_set1_epi8('/');
|
||||
const __m128i mask4 = _mm_set1_epi8('?');
|
||||
const __m128i mask5 = _mm_set1_epi8('[');
|
||||
|
||||
for (; i + 15 < view.size(); i += 16) {
|
||||
__m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i));
|
||||
__m128i m1 = _mm_cmpeq_epi8(word, mask1);
|
||||
__m128i m2 = _mm_cmpeq_epi8(word, mask2);
|
||||
__m128i m4 = _mm_cmpeq_epi8(word, mask4);
|
||||
__m128i m5 = _mm_cmpeq_epi8(word, mask5);
|
||||
__m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5));
|
||||
int mask = _mm_movemask_epi8(m);
|
||||
if (mask != 0) {
|
||||
return i + trailing_zeroes(mask);
|
||||
}
|
||||
}
|
||||
if (i < view.size()) {
|
||||
uint64_t word{};
|
||||
// performance: the next memcpy translates into a function call, but
|
||||
// that is difficult to avoid. Might be a bit expensive.
|
||||
memcpy(&word, view.data() + i, view.size() - i);
|
||||
// performance: on little-endian systems (most systems), this next line is
|
||||
// free.
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t xor5 = word ^ mask5;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor4) | has_zero_byte(xor5);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
__m128i word =
|
||||
_mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16));
|
||||
__m128i m1 = _mm_cmpeq_epi8(word, mask1);
|
||||
__m128i m2 = _mm_cmpeq_epi8(word, mask2);
|
||||
__m128i m4 = _mm_cmpeq_epi8(word, mask4);
|
||||
__m128i m5 = _mm_cmpeq_epi8(word, mask5);
|
||||
__m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5));
|
||||
int mask = _mm_movemask_epi8(m);
|
||||
if (mask != 0) {
|
||||
return view.length() - 16 + trailing_zeroes(mask);
|
||||
}
|
||||
}
|
||||
return view.size();
|
||||
return size_t(view.length());
|
||||
}
|
||||
#else
|
||||
// : / [ ?
|
||||
static constexpr std::array<uint8_t, 256> host_delimiters = []() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (int i : {':', '/', '?', '['}) {
|
||||
result[i] = 1;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
// credit: @the-moisrex recommended a table-based approach
|
||||
ada_really_inline size_t find_next_host_delimiter(std::string_view view,
|
||||
size_t location) noexcept {
|
||||
auto const str = view.substr(location);
|
||||
for (auto pos = str.begin(); pos != str.end(); ++pos) {
|
||||
if (host_delimiters[(uint8_t)*pos]) {
|
||||
return pos - str.begin() + location;
|
||||
}
|
||||
}
|
||||
return size_t(view.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
ada_really_inline std::pair<size_t, bool> get_host_delimiter_location(
|
||||
const bool is_special, std::string_view& view) noexcept {
|
||||
|
@ -11040,101 +11160,47 @@ ada_really_inline void strip_trailing_spaces_from_opaque_path(
|
|||
url.update_base_pathname(path);
|
||||
}
|
||||
|
||||
// @ / \\ ?
|
||||
static constexpr std::array<uint8_t, 256> authority_delimiter_special =
|
||||
[]() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (int i : {'@', '/', '\\', '?'}) {
|
||||
result[i] = 1;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
// credit: @the-moisrex recommended a table-based approach
|
||||
ada_really_inline size_t
|
||||
find_authority_delimiter_special(std::string_view view) noexcept {
|
||||
auto has_zero_byte = [](uint64_t v) {
|
||||
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
|
||||
};
|
||||
auto index_of_first_set_byte = [](uint64_t v) {
|
||||
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
|
||||
};
|
||||
auto broadcast = [](uint8_t v) -> uint64_t {
|
||||
return 0x101010101010101ull * v;
|
||||
};
|
||||
size_t i = 0;
|
||||
uint64_t mask1 = broadcast('@');
|
||||
uint64_t mask2 = broadcast('/');
|
||||
uint64_t mask3 = broadcast('?');
|
||||
uint64_t mask4 = broadcast('\\');
|
||||
|
||||
for (; i + 7 < view.size(); i += 8) {
|
||||
uint64_t word{};
|
||||
memcpy(&word, view.data() + i, sizeof(word));
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor3) | has_zero_byte(xor4);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
// performance note: we might be able to gain further performance
|
||||
// with SIMD instrinsics.
|
||||
for (auto pos = view.begin(); pos != view.end(); ++pos) {
|
||||
if (authority_delimiter_special[(uint8_t)*pos]) {
|
||||
return pos - view.begin();
|
||||
}
|
||||
}
|
||||
|
||||
if (i < view.size()) {
|
||||
uint64_t word{};
|
||||
memcpy(&word, view.data() + i, view.size() - i);
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t xor4 = word ^ mask4;
|
||||
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) |
|
||||
has_zero_byte(xor3) | has_zero_byte(xor4);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
}
|
||||
}
|
||||
|
||||
return view.size();
|
||||
return size_t(view.size());
|
||||
}
|
||||
|
||||
// @ / ?
|
||||
static constexpr std::array<uint8_t, 256> authority_delimiter = []() constexpr {
|
||||
std::array<uint8_t, 256> result{};
|
||||
for (int i : {'@', '/', '?'}) {
|
||||
result[i] = 1;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
// credit: @the-moisrex recommended a table-based approach
|
||||
ada_really_inline size_t
|
||||
find_authority_delimiter(std::string_view view) noexcept {
|
||||
auto has_zero_byte = [](uint64_t v) {
|
||||
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
|
||||
};
|
||||
auto index_of_first_set_byte = [](uint64_t v) {
|
||||
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
|
||||
};
|
||||
auto broadcast = [](uint8_t v) -> uint64_t {
|
||||
return 0x101010101010101ull * v;
|
||||
};
|
||||
size_t i = 0;
|
||||
uint64_t mask1 = broadcast('@');
|
||||
uint64_t mask2 = broadcast('/');
|
||||
uint64_t mask3 = broadcast('?');
|
||||
|
||||
for (; i + 7 < view.size(); i += 8) {
|
||||
uint64_t word{};
|
||||
memcpy(&word, view.data() + i, sizeof(word));
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t is_match =
|
||||
has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
// performance note: we might be able to gain further performance
|
||||
// with SIMD instrinsics.
|
||||
for (auto pos = view.begin(); pos != view.end(); ++pos) {
|
||||
if (authority_delimiter[(uint8_t)*pos]) {
|
||||
return pos - view.begin();
|
||||
}
|
||||
}
|
||||
|
||||
if (i < view.size()) {
|
||||
uint64_t word{};
|
||||
memcpy(&word, view.data() + i, view.size() - i);
|
||||
word = swap_bytes_if_big_endian(word);
|
||||
uint64_t xor1 = word ^ mask1;
|
||||
uint64_t xor2 = word ^ mask2;
|
||||
uint64_t xor3 = word ^ mask3;
|
||||
uint64_t is_match =
|
||||
has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
|
||||
if (is_match) {
|
||||
return size_t(i + index_of_first_set_byte(is_match));
|
||||
}
|
||||
}
|
||||
|
||||
return view.size();
|
||||
return size_t(view.size());
|
||||
}
|
||||
|
||||
} // namespace ada::helpers
|
||||
|
@ -11143,6 +11209,7 @@ namespace ada {
|
|||
ada_warn_unused std::string to_string(ada::state state) {
|
||||
return ada::helpers::get_state(state);
|
||||
}
|
||||
#undef ada_make_uint8x16_t
|
||||
} // namespace ada
|
||||
/* end file src/helpers.cpp */
|
||||
/* begin file src/url.cpp */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on 2023-10-22 19:50:50 -0400. Do not edit! */
|
||||
/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */
|
||||
/* begin file include/ada.h */
|
||||
/**
|
||||
* @file ada.h
|
||||
|
@ -1670,18 +1670,6 @@ template <class url_type>
|
|||
ada_really_inline void strip_trailing_spaces_from_opaque_path(
|
||||
url_type& url) noexcept;
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Reverse the order of the bytes.
|
||||
*/
|
||||
ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept;
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Reverse the order of the bytes but only if the system is big endian
|
||||
*/
|
||||
ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept;
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Finds the delimiter of a view in authority state.
|
||||
|
@ -5032,7 +5020,9 @@ ada_really_inline bool begins_with(std::string_view view,
|
|||
std::string_view prefix);
|
||||
|
||||
/**
|
||||
* Returns true if an input is an ipv4 address.
|
||||
* Returns true if an input is an ipv4 address. It is assumed that the string
|
||||
* does not contain uppercase ASCII characters (the input should have been
|
||||
* lowered cased before calling this function) and is not empty.
|
||||
*/
|
||||
ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept;
|
||||
|
||||
|
@ -7088,14 +7078,14 @@ url_search_params_entries_iter::next() {
|
|||
#ifndef ADA_ADA_VERSION_H
|
||||
#define ADA_ADA_VERSION_H
|
||||
|
||||
#define ADA_VERSION "2.7.2"
|
||||
#define ADA_VERSION "2.7.4"
|
||||
|
||||
namespace ada {
|
||||
|
||||
enum {
|
||||
ADA_VERSION_MAJOR = 2,
|
||||
ADA_VERSION_MINOR = 7,
|
||||
ADA_VERSION_REVISION = 2,
|
||||
ADA_VERSION_REVISION = 4,
|
||||
};
|
||||
|
||||
} // namespace ada
|
||||
|
|
|
@ -9,7 +9,7 @@ All dependencies are located within the `deps` directory.
|
|||
This a list of all the dependencies:
|
||||
|
||||
* [acorn 8.11.2][]
|
||||
* [ada 2.7.2][]
|
||||
* [ada 2.7.4][]
|
||||
* [base64 0.5.1][]
|
||||
* [brotli 1.0.9][]
|
||||
* [c-ares 1.20.1][]
|
||||
|
@ -151,7 +151,7 @@ The [acorn](https://github.com/acornjs/acorn) dependency is a JavaScript parser.
|
|||
[acorn-walk](https://github.com/acornjs/acorn/tree/master/acorn-walk) is
|
||||
an abstract syntax tree walker for the ESTree format.
|
||||
|
||||
### ada 2.7.2
|
||||
### ada 2.7.4
|
||||
|
||||
The [ada](https://github.com/ada-url/ada) dependency is a
|
||||
fast and spec-compliant URL parser written in C++.
|
||||
|
@ -325,7 +325,7 @@ it comes from the Chromium team's zlib fork which incorporated
|
|||
performance improvements not currently available in standard zlib.
|
||||
|
||||
[acorn 8.11.2]: #acorn-8112
|
||||
[ada 2.7.2]: #ada-272
|
||||
[ada 2.7.4]: #ada-274
|
||||
[base64 0.5.1]: #base64-051
|
||||
[brotli 1.0.9]: #brotli-109
|
||||
[c-ares 1.20.1]: #c-ares-1201
|
||||
|
|
Loading…
Reference in New Issue