vendor/golang.org/x/text: Vendor collate and language

Following the vndr docs [1]:

  $ go get -u github.com/LK4D4/vndr
  $ vndr golang.org/x/text
  $ git add -A vendor/golang.org/x/text

The targeted 'git add' was because we seem to have versioned some test
files (e.g. vendor/github.com/varlink/go/varlink/varlink_test.go in
8493dba2 (Initial varlink implementation, 2018-03-26, #627).  I don't
know why, possibly an old vndr version?  But either way, I'm punting
that particular issue to a separate branch.

[1]: 1fc68ee0c8/README.md

Signed-off-by: W. Trevor King <wking@tremily.us>

Closes: #686
Approved by: mheon
This commit is contained in:
W. Trevor King 2018-05-10 14:26:55 -07:00 committed by Atomic Bot
parent 89430ffe65
commit c8208a845e
25 changed files with 83935 additions and 0 deletions

404
vendor/golang.org/x/text/collate/collate.go generated vendored Normal file
View File

@ -0,0 +1,404 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// TODO: remove hard-coded versions when we have implemented fractional weights.
// The current implementation is incompatible with later CLDR versions.
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0
// Package collate contains types for comparing and sorting Unicode strings
// according to a given collation order. Package locale provides a high-level
// interface to collation. Users should typically use that package instead.
package collate // import "golang.org/x/text/collate"
import (
"bytes"
"strings"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
)
// Collator provides functionality for comparing strings for a given
// collation order.
type Collator struct {
options
sorter sorter
_iter [2]iter
}
func (c *Collator) iter(i int) *iter {
// TODO: evaluate performance for making the second iterator optional.
return &c._iter[i]
}
// Supported returns the list of languages for which collating differs from its parent.
func Supported() []language.Tag {
// TODO: use language.Coverage instead.
t := make([]language.Tag, len(tags))
copy(t, tags)
return t
}
func init() {
ids := strings.Split(availableLocales, ",")
tags = make([]language.Tag, len(ids))
for i, s := range ids {
tags[i] = language.Raw.MustParse(s)
}
}
var tags []language.Tag
// New returns a new Collator initialized for the given locale.
func New(t language.Tag, o ...Option) *Collator {
index := colltab.MatchLang(t, tags)
c := newCollator(getTable(locales[index]))
// Set options from the user-supplied tag.
c.setFromTag(t)
// Set the user-supplied options.
c.setOptions(o)
c.init()
return c
}
// NewFromTable returns a new Collator for the given Weighter.
func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
c := newCollator(w)
c.setOptions(o)
c.init()
return c
}
func (c *Collator) init() {
if c.numeric {
c.t = colltab.NewNumericWeighter(c.t)
}
c._iter[0].init(c)
c._iter[1].init(c)
}
// Buffer holds keys generated by Key and KeyString.
type Buffer struct {
buf [4096]byte
key []byte
}
func (b *Buffer) init() {
if b.key == nil {
b.key = b.buf[:0]
}
}
// Reset clears the buffer from previous results generated by Key and KeyString.
func (b *Buffer) Reset() {
b.key = b.key[:0]
}
// Compare returns an integer comparing the two byte slices.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) Compare(a, b []byte) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInput(a)
c.iter(1).SetInput(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
return bytes.Compare(a, b)
}
return 0
}
// CompareString returns an integer comparing the two strings.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) CompareString(a, b string) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInputString(a)
c.iter(1).SetInputString(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
if a < b {
return -1
} else if a > b {
return 1
}
}
return 0
}
func compareLevel(f func(i *iter) int, a, b *iter) int {
a.pce = 0
b.pce = 0
for {
va := f(a)
vb := f(b)
if va != vb {
if va < vb {
return -1
}
return 1
} else if va == 0 {
break
}
}
return 0
}
func (c *Collator) compare() int {
ia, ib := c.iter(0), c.iter(1)
// Process primary level
if c.alternate != altShifted {
// TODO: implement script reordering
if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
return res
}
} else {
// TODO: handle shifted
}
if !c.ignore[colltab.Secondary] {
f := (*iter).nextSecondary
if c.backwards {
f = (*iter).prevSecondary
}
if res := compareLevel(f, ia, ib); res != 0 {
return res
}
}
// TODO: special case handling (Danish?)
if !c.ignore[colltab.Tertiary] || c.caseLevel {
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
return res
}
if !c.ignore[colltab.Quaternary] {
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
return res
}
}
}
return 0
}
// Key returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will remain
// valid until the next call to buf.Reset().
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElems(str))
}
// KeyFromString returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will retain
// valid until the next call to buf.ResetKeys().
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElemsString(str))
}
func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
processWeights(c.alternate, c.t.Top(), w)
kn := len(buf.key)
c.keyFromElems(buf, w)
return buf.key[kn:]
}
func (c *Collator) getColElems(str []byte) []colltab.Elem {
i := c.iter(0)
i.SetInput(str)
for i.Next() {
}
return i.Elems
}
func (c *Collator) getColElemsString(str string) []colltab.Elem {
i := c.iter(0)
i.SetInputString(str)
for i.Next() {
}
return i.Elems
}
type iter struct {
wa [512]colltab.Elem
colltab.Iter
pce int
}
func (i *iter) init(c *Collator) {
i.Weighter = c.t
i.Elems = i.wa[:0]
}
func (i *iter) nextPrimary() int {
for {
for ; i.pce < i.N; i.pce++ {
if v := i.Elems[i.pce].Primary(); v != 0 {
i.pce++
return v
}
}
if !i.Next() {
return 0
}
}
panic("should not reach here")
}
func (i *iter) nextSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) prevSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) nextTertiary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Tertiary(); v != 0 {
i.pce++
return int(v)
}
}
return 0
}
func (i *iter) nextQuaternary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Quaternary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func appendPrimary(key []byte, p int) []byte {
// Convert to variable length encoding; supports up to 23 bits.
if p <= 0x7FFF {
key = append(key, uint8(p>>8), uint8(p))
} else {
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
}
return key
}
// keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
for _, v := range ws {
if w := v.Primary(); w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
if !c.ignore[colltab.Secondary] {
buf.key = append(buf.key, 0, 0)
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.backwards {
for _, v := range ws {
if w := v.Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
} else {
for i := len(ws) - 1; i >= 0; i-- {
if w := ws[i].Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
}
} else if c.caseLevel {
buf.key = append(buf.key, 0, 0)
}
if !c.ignore[colltab.Tertiary] || c.caseLevel {
buf.key = append(buf.key, 0, 0)
for _, v := range ws {
if w := v.Tertiary(); w > 0 {
buf.key = append(buf.key, uint8(w))
}
}
// Derive the quaternary weights from the options and other levels.
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
// representation of a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly.
if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
if c.alternate == altShiftTrimmed {
lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
lastNonFFFF = len(buf.key)
}
}
buf.key = buf.key[:lastNonFFFF]
} else {
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
}
}
}
}
func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
ignore := false
vtop := int(top)
switch vw {
case altShifted, altShiftTrimmed:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && p != 0 {
wa[i] = colltab.MakeQuaternary(p)
ignore = true
} else if p == 0 {
if ignore {
wa[i] = colltab.Ignore
}
} else {
ignore = false
}
}
case altBlanked:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
wa[i] = colltab.Ignore
ignore = true
} else {
ignore = false
}
}
}
}

32
vendor/golang.org/x/text/collate/index.go generated vendored Normal file
View File

@ -0,0 +1,32 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import "golang.org/x/text/internal/colltab"
const blockSize = 64
func getTable(t tableIndex) *colltab.Table {
return &colltab.Table{
Index: colltab.Trie{
Index0: mainLookup[:][blockSize*t.lookupOffset:],
Values0: mainValues[:][blockSize*t.valuesOffset:],
Index: mainLookup[:],
Values: mainValues[:],
},
ExpandElem: mainExpandElem[:],
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
ContractElem: mainContractElem[:],
MaxContractLen: 18,
VariableTop: varTop,
}
}
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}

239
vendor/golang.org/x/text/collate/option.go generated vendored Normal file
View File

@ -0,0 +1,239 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"sort"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// newCollator creates a new collator with default options configured.
func newCollator(t colltab.Weighter) *Collator {
// Initialize a collator with default options.
c := &Collator{
options: options{
ignore: [colltab.NumLevels]bool{
colltab.Quaternary: true,
colltab.Identity: true,
},
f: norm.NFD,
t: t,
},
}
// TODO: store vt in tags or remove.
c.variableTop = t.Top()
return c
}
// An Option is used to change the behavior of a Collator. Options override the
// settings passed through the locale identifier.
type Option struct {
priority int
f func(o *options)
}
type prioritizedOptions []Option
func (p prioritizedOptions) Len() int {
return len(p)
}
func (p prioritizedOptions) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
func (p prioritizedOptions) Less(i, j int) bool {
return p[i].priority < p[j].priority
}
type options struct {
// ignore specifies which levels to ignore.
ignore [colltab.NumLevels]bool
// caseLevel is true if there is an additional level of case matching
// between the secondary and tertiary levels.
caseLevel bool
// backwards specifies the order of sorting at the secondary level.
// This option exists predominantly to support reverse sorting of accents in French.
backwards bool
// numeric specifies whether any sequence of decimal digits (category is Nd)
// is sorted at a primary level with its numeric value.
// For example, "A-21" < "A-123".
// This option is set by wrapping the main Weighter with NewNumericWeighter.
numeric bool
// alternate specifies an alternative handling of variables.
alternate alternateHandling
// variableTop is the largest primary value that is considered to be
// variable.
variableTop uint32
t colltab.Weighter
f norm.Form
}
func (o *options) setOptions(opts []Option) {
sort.Sort(prioritizedOptions(opts))
for _, x := range opts {
x.f(o)
}
}
// OptionsFromTag extracts the BCP47 collation options from the tag and
// configures a collator accordingly. These options are set before any other
// option.
func OptionsFromTag(t language.Tag) Option {
return Option{0, func(o *options) {
o.setFromTag(t)
}}
}
func (o *options) setFromTag(t language.Tag) {
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
o.backwards = ldmlBool(t, o.backwards, "kb")
o.numeric = ldmlBool(t, o.numeric, "kn")
// Extract settings from the BCP47 u extension.
switch t.TypeForKey("ks") { // strength
case "level1":
o.ignore[colltab.Secondary] = true
o.ignore[colltab.Tertiary] = true
case "level2":
o.ignore[colltab.Tertiary] = true
case "level3", "":
// The default.
case "level4":
o.ignore[colltab.Quaternary] = false
case "identic":
o.ignore[colltab.Quaternary] = false
o.ignore[colltab.Identity] = false
}
switch t.TypeForKey("ka") {
case "shifted":
o.alternate = altShifted
// The following two types are not official BCP47, but we support them to
// give access to this otherwise hidden functionality. The name blanked is
// derived from the LDML name blanked and posix reflects the main use of
// the shift-trimmed option.
case "blanked":
o.alternate = altBlanked
case "posix":
o.alternate = altShiftTrimmed
}
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
// Not used:
// - normalization ("kk", not necessary for this implementation)
// - hiraganaQuatenary ("kh", obsolete)
}
func ldmlBool(t language.Tag, old bool, key string) bool {
switch t.TypeForKey(key) {
case "true":
return true
case "false":
return false
default:
return old
}
}
var (
// IgnoreCase sets case-insensitive comparison.
IgnoreCase Option = ignoreCase
ignoreCase = Option{3, ignoreCaseF}
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
IgnoreDiacritics Option = ignoreDiacritics
ignoreDiacritics = Option{3, ignoreDiacriticsF}
// IgnoreWidth causes full-width characters to match their half-width
// equivalents.
IgnoreWidth Option = ignoreWidth
ignoreWidth = Option{2, ignoreWidthF}
// Loose sets the collator to ignore diacritics, case and weight.
Loose Option = loose
loose = Option{4, looseF}
// Force ordering if strings are equivalent but not equal.
Force Option = force
force = Option{5, forceF}
// Numeric specifies that numbers should sort numerically ("2" < "12").
Numeric Option = numeric
numeric = Option{5, numericF}
)
func ignoreWidthF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = true
}
func ignoreDiacriticsF(o *options) {
o.ignore[colltab.Secondary] = true
}
func ignoreCaseF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = false
}
func looseF(o *options) {
ignoreWidthF(o)
ignoreDiacriticsF(o)
ignoreCaseF(o)
}
func forceF(o *options) {
o.ignore[colltab.Identity] = false
}
func numericF(o *options) { o.numeric = true }
// Reorder overrides the pre-defined ordering of scripts and character sets.
func Reorder(s ...string) Option {
// TODO: need fractional weights to implement this.
panic("TODO: implement")
}
// TODO: consider making these public again. These options cannot be fully
// specified in BCP47, so an API interface seems warranted. Still a higher-level
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
// alternateHandling identifies the various ways in which variables are handled.
// A rune with a primary weight lower than the variable top is considered a
// variable.
// See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
type alternateHandling int
const (
// altNonIgnorable turns off special handling of variables.
altNonIgnorable alternateHandling = iota
// altBlanked sets variables and all subsequent primary ignorables to be
// ignorable at all levels. This is identical to removing all variables
// and subsequent primary ignorables from the input.
altBlanked
// altShifted sets variables to be ignorable for levels one through three and
// adds a fourth level based on the values of the ignored levels.
altShifted
// altShiftTrimmed is a slight variant of altShifted that is used to
// emulate POSIX.
altShiftTrimmed
)

81
vendor/golang.org/x/text/collate/sort.go generated vendored Normal file
View File

@ -0,0 +1,81 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"bytes"
"sort"
)
const (
maxSortBuffer = 40960
maxSortEntries = 4096
)
type swapper interface {
Swap(i, j int)
}
type sorter struct {
buf *Buffer
keys [][]byte
src swapper
}
func (s *sorter) init(n int) {
if s.buf == nil {
s.buf = &Buffer{}
s.buf.init()
}
if cap(s.keys) < n {
s.keys = make([][]byte, n)
}
s.keys = s.keys[0:n]
}
func (s *sorter) sort(src swapper) {
s.src = src
sort.Sort(s)
}
func (s sorter) Len() int {
return len(s.keys)
}
func (s sorter) Less(i, j int) bool {
return bytes.Compare(s.keys[i], s.keys[j]) == -1
}
func (s sorter) Swap(i, j int) {
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
s.src.Swap(i, j)
}
// A Lister can be sorted by Collator's Sort method.
type Lister interface {
Len() int
Swap(i, j int)
// Bytes returns the bytes of the text at index i.
Bytes(i int) []byte
}
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
func (c *Collator) Sort(x Lister) {
n := x.Len()
c.sorter.init(n)
for i := 0; i < n; i++ {
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
}
c.sorter.sort(x)
}
// SortStrings uses sort.Sort to sort the strings in x using the rules of c.
func (c *Collator) SortStrings(x []string) {
c.sorter.init(len(x))
for i, s := range x {
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
}
c.sorter.sort(sort.StringSlice(x))
}

73789
vendor/golang.org/x/text/collate/tables.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

371
vendor/golang.org/x/text/internal/colltab/collelem.go generated vendored Normal file
View File

@ -0,0 +1,371 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"unicode"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
NumLevels
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
MaxQuaternary = 0x1FFFFF // 21 bits.
)
// Elem is a representation of a collation element. This API provides ways to encode
// and decode Elems. Implementations of collation tables may use values greater
// or equal to PrivateUse for their own purposes. However, these should never be
// returned by AppendNext.
type Elem uint32
const (
maxCE Elem = 0xAFFFFFFF
PrivateUse = minContract
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
)
type ceType int
const (
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
ceContractionIndex // rune can be a start of a contraction
ceExpansionIndex // rune expands into a sequence of collation elements
ceDecompose // rune expands using NFKC decomposition
)
func (ce Elem) ctype() ceType {
if ce <= maxCE {
return ceNormal
}
if ce <= maxContract {
return ceContractionIndex
} else {
if ce <= maxExpand {
return ceExpansionIndex
}
return ceDecompose
}
panic("should not reach here")
return ceType(-1)
}
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the cannonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value
const (
ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000
ceIgnoreMask = 0xF00FFFFF
ceType1 = 0x40000000
ceType2 = 0x00000000
ceType3or4 = 0x80000000
ceType4 = 0xA0000000
ceTypeQ = 0xC0000000
Ignore = ceType4
firstNonPrimary = 0x80000000
lastSpecialPrimary = 0xA0000000
secondaryMask = 0x80000000
hasTertiaryMask = 0x40000000
primaryValueMask = 0x3FFFFE00
maxPrimaryBits = 21
compactPrimaryBits = 16
maxSecondaryBits = 12
maxTertiaryBits = 8
maxCCCBits = 8
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryCompactBits = 5
primaryShift = 9
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
)
func makeImplicitCE(primary int) Elem {
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
}
// MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := Elem(0)
if primary != 0 {
if ccc != 0 {
if primary >= 1<<compactPrimaryBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
}
if secondary != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
}
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
ce |= Elem(ccc) << compactPrimaryBits
ce |= Elem(primary)
ce |= ceType3or4
} else if tertiary == defaultTertiary {
if secondary >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
}
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
ce |= ceType1
} else {
d := secondary - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if tertiary >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
}
ce = Elem(primary<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
}
} else {
ce = Elem(secondary<<maxTertiaryBits + tertiary)
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= ceType4
}
return ce, nil
}
// MakeQuaternary returns an Elem with the given quaternary value.
func MakeQuaternary(v int) Elem {
return ceTypeQ | Elem(v<<primaryShift)
}
// Mask sets weights for any level smaller than l to 0.
// The resulting Elem can be used to test for equality with
// other Elems to which the same mask has been applied.
func (ce Elem) Mask(l Level) uint32 {
return 0
}
// CCC returns the canonical combining class associated with the underlying character,
// if applicable, or 0 otherwise.
func (ce Elem) CCC() uint8 {
if ce&ceType3or4 != 0 {
if ce&ceType4 == ceType3or4 {
return uint8(ce >> 16)
}
return uint8(ce >> 20)
}
return 0
}
// Primary returns the primary collation weight for ce.
func (ce Elem) Primary() int {
if ce >= firstNonPrimary {
if ce > lastSpecialPrimary {
return 0
}
return int(uint16(ce))
}
return int(ce&primaryValueMask) >> primaryShift
}
// Secondary returns the secondary collation weight for ce.
func (ce Elem) Secondary() int {
switch ce & ceTypeMask {
case ceType1:
return int(uint8(ce))
case ceType2:
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
case ceType3or4:
if ce < ceType4 {
return defaultSecondary
}
return int(ce>>8) & 0xFFF
case ceTypeQ:
return 0
}
panic("should not reach here")
}
// Tertiary returns the tertiary collation weight for ce.
func (ce Elem) Tertiary() uint8 {
if ce&hasTertiaryMask == 0 {
if ce&ceType3or4 == 0 {
return uint8(ce & 0x1F)
}
if ce&ceType4 == ceType4 {
return uint8(ce)
}
return uint8(ce>>24) & 0x1F // type 2
} else if ce&ceTypeMask == ceType1 {
return defaultTertiary
}
// ce is a quaternary value.
return 0
}
func (ce Elem) updateTertiary(t uint8) Elem {
if ce&ceTypeMask == ceType1 {
// convert to type 4
nce := ce & primaryValueMask
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
ce = nce
} else if ce&ceTypeMaskExt == ceType3or4 {
ce &= ^Elem(maxTertiary << 24)
return ce | (Elem(t) << 24)
} else {
// type 2 or 4
ce &= ^Elem(maxTertiary)
}
return ce | Elem(t)
}
// Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == Ignore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants.
func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift
} else if ce&ceIgnoreMask == Ignore {
return 0
}
return MaxQuaternary
}
// Weight returns the collation weight for the given level.
func (ce Elem) Weight(l Level) int {
switch l {
case Primary:
return ce.Primary()
case Secondary:
return ce.Secondary()
case Tertiary:
return int(ce.Tertiary())
case Quaternary:
return ce.Quaternary()
}
return 0 // return 0 (ignore) for undefined levels.
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func splitContractIndex(ce Elem) (index, n, offset int) {
n = int(ce & (1<<maxNBits - 1))
ce >>= maxNBits
index = int(ce & (1<<maxTrieIndexBits - 1))
ce >>= maxTrieIndexBits
offset = int(ce & (1<<maxContractOffsetBits - 1))
return
}
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const maxExpandIndexBits = 16
func splitExpandIndex(ce Elem) (index int) {
return int(uint16(ce))
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
func splitDecompose(ce Elem) (t1, t2 uint8) {
return uint8(ce), uint8(ce >> 8)
}
const (
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}

105
vendor/golang.org/x/text/internal/colltab/colltab.go generated vendored Normal file
View File

@ -0,0 +1,105 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package colltab contains functionality related to collation tables.
// It is only to be used by the collate and search packages.
package colltab // import "golang.org/x/text/internal/colltab"
import (
"sort"
"golang.org/x/text/language"
)
// MatchLang finds the index of t in tags, using a matching algorithm used for
// collation and search. tags[0] must be language.Und, the remaining tags should
// be sorted alphabetically.
//
// Language matching for collation and search is different from the matching
// defined by language.Matcher: the (inferred) base language must be an exact
// match for the relevant fields. For example, "gsw" should not match "de".
// Also the parent relation is different, as a parent may have a different
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
// zh.
func MatchLang(t language.Tag, tags []language.Tag) int {
// Canonicalize the values, including collapsing macro languages.
t, _ = language.All.Canonicalize(t)
base, conf := t.Base()
// Estimate the base language, but only use high-confidence values.
if conf < language.High {
// The root locale supports "search" and "standard". We assume that any
// implementation will only use one of both.
return 0
}
// Maximize base and script and normalize the tag.
if _, s, r := t.Raw(); (r != language.Region{}) {
p, _ := language.Raw.Compose(base, s, r)
// Taking the parent forces the script to be maximized.
p = p.Parent()
// Add back region and extensions.
t, _ = language.Raw.Compose(p, r, t.Extensions())
} else {
// Set the maximized base language.
t, _ = language.Raw.Compose(base, s, t.Extensions())
}
// Find start index of the language tag.
start := 1 + sort.Search(len(tags)-1, func(i int) bool {
b, _, _ := tags[i+1].Raw()
return base.String() <= b.String()
})
if start < len(tags) {
if b, _, _ := tags[start].Raw(); b != base {
return 0
}
}
// Besides the base language, script and region, only the collation type and
// the custom variant defined in the 'u' extension are used to distinguish a
// locale.
// Strip all variants and extensions and add back the custom variant.
tdef, _ := language.Raw.Compose(t.Raw())
tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
// First search for a specialized collation type, if present.
try := []language.Tag{tdef}
if co := t.TypeForKey("co"); co != "" {
tco, _ := tdef.SetTypeForKey("co", co)
try = []language.Tag{tco, tdef}
}
for _, tx := range try {
for ; tx != language.Und; tx = parent(tx) {
for i, t := range tags[start:] {
if b, _, _ := t.Raw(); b != base {
break
}
if tx == t {
return start + i
}
}
}
}
return 0
}
// parent computes the structural parent. This means inheritance may change
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
func parent(t language.Tag) language.Tag {
if t.TypeForKey("va") != "" {
t, _ = t.SetTypeForKey("va", "")
return t
}
result := language.Und
if b, s, r := t.Raw(); (r != language.Region{}) {
result, _ = language.Raw.Compose(b, s, t.Extensions())
} else if (s != language.Script{}) {
result, _ = language.Raw.Compose(b, t.Extensions())
} else if (b != language.Base{}) {
result, _ = language.Raw.Compose(t.Extensions())
}
return result
}

145
vendor/golang.org/x/text/internal/colltab/contract.go generated vendored Normal file
View File

@ -0,0 +1,145 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import "unicode/utf8"
// For a description of ContractTrieSet, see text/collate/build/contract.go.
type ContractTrieSet []struct{ L, H, N, I uint8 }
// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
states ContractTrieSet
s []byte
n int
index int
pindex int
done bool
}
type ctScannerString struct {
states ContractTrieSet
s string
n int
index int
pindex int
done bool
}
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
return ctScanner{s: b, states: t[index:], n: n}
}
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
return ctScannerString{s: str, states: t[index:], n: n}
}
// result returns the offset i and bytes consumed p so far. If no suffix
// matched, i and p will be 0.
func (s *ctScanner) result() (i, p int) {
return s.index, s.pindex
}
func (s *ctScannerString) result() (i, p int) {
return s.index, s.pindex
}
const (
final = 0
noIndex = 0xFF
)
// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func (s *ctScanner) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}
// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}

178
vendor/golang.org/x/text/internal/colltab/iter.go generated vendored Normal file
View File

@ -0,0 +1,178 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// An Iter incrementally converts chunks of the input text to collation
// elements, while ensuring that the collation elements are in normalized order
// (that is, they are in the order as if the input text were normalized first).
type Iter struct {
Weighter Weighter
Elems []Elem
// N is the number of elements in Elems that will not be reordered on
// subsequent iterations, N <= len(Elems).
N int
bytes []byte
str string
// Because the Elems buffer may contain collation elements that are needed
// for look-ahead, we need two positions in the text (bytes or str): one for
// the end position in the text for the current iteration and one for the
// start of the next call to appendNext.
pEnd int // end position in text corresponding to N.
pNext int // pEnd <= pNext.
}
// Reset sets the position in the current input text to p and discards any
// results obtained so far.
func (i *Iter) Reset(p int) {
i.Elems = i.Elems[:0]
i.N = 0
i.pEnd = p
i.pNext = p
}
// Len returns the length of the input text.
func (i *Iter) Len() int {
if i.bytes != nil {
return len(i.bytes)
}
return len(i.str)
}
// Discard removes the collation elements up to N.
func (i *Iter) Discard() {
// TODO: change this such that only modifiers following starters will have
// to be copied.
i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
i.N = 0
}
// End returns the end position of the input text for which Next has returned
// results.
func (i *Iter) End() int {
return i.pEnd
}
// SetInput resets i to input s.
func (i *Iter) SetInput(s []byte) {
i.bytes = s
i.str = ""
i.Reset(0)
}
// SetInputString resets i to input s.
func (i *Iter) SetInputString(s string) {
i.str = s
i.bytes = nil
i.Reset(0)
}
func (i *Iter) done() bool {
return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
}
func (i *Iter) appendNext() bool {
if i.done() {
return false
}
var sz int
if i.bytes == nil {
i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
} else {
i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
}
if sz == 0 {
sz = 1
}
i.pNext += sz
return true
}
// Next appends Elems to the internal array. On each iteration, it will either
// add starters or modifiers. In the majority of cases, an Elem with a primary
// value > 0 will have a CCC of 0. The CCC values of collation elements are also
// used to detect if the input string was not normalized and to adjust the
// result accordingly.
func (i *Iter) Next() bool {
if i.N == len(i.Elems) && !i.appendNext() {
return false
}
// Check if the current segment starts with a starter.
prevCCC := i.Elems[len(i.Elems)-1].CCC()
if prevCCC == 0 {
i.N = len(i.Elems)
i.pEnd = i.pNext
return true
} else if i.Elems[i.N].CCC() == 0 {
// set i.N to only cover part of i.Elems for which prevCCC == 0 and
// use rest for the next call to next.
for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
}
i.pEnd = i.pNext
return true
}
// The current (partial) segment starts with modifiers. We need to collect
// all successive modifiers to ensure that they are normalized.
for {
p := len(i.Elems)
i.pEnd = i.pNext
if !i.appendNext() {
break
}
if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
// Leave the starter for the next iteration. This ensures that we
// do not return sequences of collation elements that cross two
// segments.
//
// TODO: handle large number of combining characters by fully
// normalizing the input segment before iteration. This ensures
// results are consistent across the text repo.
i.N = p
return true
} else if ccc < prevCCC {
i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
} else {
prevCCC = ccc
}
}
done := len(i.Elems) != i.N
i.N = len(i.Elems)
return done
}
// nextNoNorm is the same as next, but does not "normalize" the collation
// elements.
func (i *Iter) nextNoNorm() bool {
// TODO: remove this function. Using this instead of next does not seem
// to improve performance in any significant way. We retain this until
// later for evaluation purposes.
if i.done() {
return false
}
i.appendNext()
i.N = len(i.Elems)
return true
}
const maxCombiningCharacters = 30
// doNorm reorders the collation elements in i.Elems.
// It assumes that blocks of collation elements added with appendNext
// either start and end with the same CCC or start with CCC == 0.
// This allows for a single insertion point for the entire block.
// The correctness of this assumption is verified in builder.go.
func (i *Iter) doNorm(p int, ccc uint8) {
n := len(i.Elems)
k := p
for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
}
i.Elems = append(i.Elems, i.Elems[p:k]...)
copy(i.Elems[p:], i.Elems[k:])
i.Elems = i.Elems[:n]
}

236
vendor/golang.org/x/text/internal/colltab/numeric.go generated vendored Normal file
View File

@ -0,0 +1,236 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode"
"unicode/utf8"
)
// NewNumericWeighter wraps w to replace individual digits to sort based on their
// numeric value.
//
// Weighter w must have a free primary weight after the primary weight for 9.
// If this is not the case, numeric value will sort at the same primary level
// as the first primary sorting after 9.
func NewNumericWeighter(w Weighter) Weighter {
getElem := func(s string) Elem {
elems, _ := w.AppendNextString(nil, s)
return elems[0]
}
nine := getElem("9")
// Numbers should order before zero, but the DUCET has no room for this.
// TODO: move before zero once we use fractional collation elements.
ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
return &numericWeighter{
Weighter: w,
// We assume that w sorts digits of different kinds in order of numeric
// value and that the tertiary weight order is preserved.
//
// TODO: evaluate whether it is worth basing the ranges on the Elem
// encoding itself once the move to fractional weights is complete.
zero: getElem("0"),
zeroSpecialLo: getElem(""), // U+FF10 FULLWIDTH DIGIT ZERO
zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
nine: nine,
nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
numberStart: ns,
}
}
// A numericWeighter translates a stream of digits into a stream of weights
// representing the numeric value.
type numericWeighter struct {
Weighter
// The Elems below all demarcate boundaries of specific ranges. With the
// current element encoding digits are in two ranges: normal (default
// tertiary value) and special. For most languages, digits have collation
// elements in the normal range.
//
// Note: the range tests are very specific for the element encoding used by
// this implementation. The tests in collate_test.go are designed to fail
// if this code is not updated when an encoding has changed.
zero Elem // normal digit zero
zeroSpecialLo Elem // special digit zero, low tertiary value
zeroSpecialHi Elem // special digit zero, high tertiary value
nine Elem // normal digit nine
nineSpecialHi Elem // special digit nine
numberStart Elem
}
// AppendNext calls the namesake of the underlying weigher, but replaces single
// digits with weights representing their value.
func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNext(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
b: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
// AppendNextString calls the namesake of the underlying weigher, but replaces
// single digits with weights representing their value.
func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNextString(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
s: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
type numberConverter struct {
w *numericWeighter
elems []Elem
nDigits int
lenIndex int
s string // set if the input was of type string
b []byte // set if the input was of type []byte
}
// init completes initialization of a numberConverter and prepares it for adding
// more digits. elems is assumed to have a digit starting at oldLen.
func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and and a placeholder
// for the number of digits.
if isZero {
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
copy(elems[oldLen+2:], elems[oldLen:])
elems[oldLen] = nc.w.numberStart
elems[oldLen+1] = 0
nc.nDigits = 1
}
nc.elems = elems
nc.lenIndex = oldLen + 1
}
// checkNextDigit reports whether bufNew adds a single digit relative to the old
// buffer. If it does, it also reports whether this digit is zero.
func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
if len(nc.elems) >= len(bufNew) {
return false, false
}
e := bufNew[len(nc.elems)]
if e < nc.w.zeroSpecialLo || nc.w.nine < e {
// Not a number.
return false, false
}
if e < nc.w.zero {
if e > nc.w.nineSpecialHi {
// Not a number.
return false, false
}
if !nc.isDigit() {
return false, false
}
isZero = e <= nc.w.zeroSpecialHi
} else {
// This is the common case if we encounter a digit.
isZero = e == nc.w.zero
}
// Test the remaining added collation elements have a zero primary value.
if n := len(bufNew) - len(nc.elems); n > 1 {
for i := len(nc.elems) + 1; i < len(bufNew); i++ {
if bufNew[i].Primary() != 0 {
return false, false
}
}
// In some rare cases, collation elements will encode runes in
// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
// are not in Nd. Also some digits that clearly belong in unicode.No,
// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
// collation elements indistinguishable from normal digits.
// Unfortunately, this means we need to make this check for nearly all
// non-Latin digits.
//
// TODO: check the performance impact and find something better if it is
// an issue.
if !nc.isDigit() {
return false, false
}
}
return isZero, true
}
func (nc *numberConverter) isDigit() bool {
if nc.b != nil {
r, _ := utf8.DecodeRune(nc.b)
return unicode.In(r, unicode.Nd)
}
r, _ := utf8.DecodeRuneInString(nc.s)
return unicode.In(r, unicode.Nd)
}
// We currently support a maximum of about 2M digits (the number of primary
// values). Such numbers will compare correctly against small numbers, but their
// comparison against other large numbers is undefined.
//
// TODO: define a proper fallback, such as comparing large numbers textually or
// actually allowing numbers of unlimited length.
//
// TODO: cap this to a lower number (like 100) and maybe allow a larger number
// in an option?
const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
}
// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}

275
vendor/golang.org/x/text/internal/colltab/table.go generated vendored Normal file
View File

@ -0,0 +1,275 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// Table holds all collation data for a given collation ordering.
type Table struct {
Index Trie // main trie
// expansion info
ExpandElem []uint32
// contraction info
ContractTries ContractTrieSet
ContractElem []uint32
MaxContractLen int
VariableTop uint32
}
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b})
}
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
return t.appendNext(w, source{str: s})
}
func (t *Table) Start(p int, b []byte) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) StartString(p int, s string) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) Domain() []string {
// TODO: implement
panic("not implemented")
}
func (t *Table) Top() uint32 {
return t.VariableTop
}
type source struct {
str string
bytes []byte
}
func (src *source) lookup(t *Table) (ce Elem, sz int) {
if src.bytes == nil {
return t.Index.lookupString(src.str)
}
return t.Index.lookup(src.bytes)
}
func (src *source) tail(sz int) {
if src.bytes == nil {
src.str = src.str[sz:]
} else {
src.bytes = src.bytes[sz:]
}
}
func (src *source) nfd(buf []byte, end int) []byte {
if src.bytes == nil {
return norm.NFD.AppendString(buf[:0], src.str[:end])
}
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
}
func (src *source) rune() (r rune, sz int) {
if src.bytes == nil {
return utf8.DecodeRuneInString(src.str)
}
return utf8.DecodeRune(src.bytes)
}
func (src *source) properties(f norm.Form) norm.Properties {
if src.bytes == nil {
return f.PropertiesString(src.str)
}
return f.Properties(src.bytes)
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are
// appended as well. It returns a new slice that includes the appended
// weights and the number of bytes consumed from s.
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
ce, sz := src.lookup(t)
tp := ce.ctype()
if tp == ceNormal {
if ce == 0 {
r, _ := src.rune()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
// TODO: performance can be considerably improved here.
n = sz
var buf [16]byte // Used for decomposing Hangul.
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
ce, sz = t.Index.lookup(b)
w = append(w, ce)
}
return w, n
}
ce = makeImplicitCE(implicitPrimary(r))
}
w = append(w, ce)
} else if tp == ceExpansionIndex {
w = t.appendExpansion(w, ce)
} else if tp == ceContractionIndex {
n := 0
src.tail(sz)
if src.bytes == nil {
w, n = t.matchContractionString(w, ce, src.str)
} else {
w, n = t.matchContraction(w, ce, src.bytes)
}
sz += n
} else if tp == ceDecompose {
// Decompose using NFKD and replace tertiary weights.
t1, t2 := splitDecompose(ce)
i := len(w)
nfkd := src.properties(norm.NFKD).Decomposition()
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
w, p = t.appendNext(w, source{bytes: nfkd})
}
w[i] = w[i].updateTertiary(t1)
if i++; i < len(w) {
w[i] = w[i].updateTertiary(t2)
for i++; i < len(w); i++ {
w[i] = w[i].updateTertiary(maxTertiary)
}
}
}
return w, sz
}
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
i := splitExpandIndex(ce)
n := int(t.ExpandElem[i])
i++
for _, ce := range t.ExpandElem[i : i+n] {
w = append(w, Elem(ce))
}
return w
}
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scanner(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.Properties(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.Properties(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: unify the two implementations. This is best done after first simplifying
// the algorithm taking into account the inclusion of both NFC and NFD forms
// in the table.
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scannerString(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.PropertiesString(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.PropertiesString(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}

159
vendor/golang.org/x/text/internal/colltab/trie.go generated vendored Normal file
View File

@ -0,0 +1,159 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character in an
// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table of collation elements.
// For a full description, see go.text/collate/build/trie.go.
package colltab
const blockSize = 64
type Trie struct {
Index0 []uint16 // index for first byte (0xC0-0xFF)
Values0 []uint32 // index for first byte (0x00-0x7F)
Index []uint16
Values []uint32
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
)
func (t *Trie) lookupValue(n uint16, b byte) Elem {
return Elem(t.Values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *Trie) lookup(s []byte) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// The body of lookupString is a verbatim copy of that of lookup.
func (t *Trie) lookupString(s string) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}

31
vendor/golang.org/x/text/internal/colltab/weighter.go generated vendored Normal file
View File

@ -0,0 +1,31 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab // import "golang.org/x/text/internal/colltab"
// A Weighter can be used as a source for Collator and Searcher.
type Weighter interface {
// Start finds the start of the segment that includes position p.
Start(p int, b []byte) int
// StartString finds the start of the segment that includes position p.
StartString(p int, s string) int
// AppendNext appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
// AppendNextString appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
// Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table.
Domain() []string
// Top returns the highest variable primary value.
Top() uint32
}

100
vendor/golang.org/x/text/internal/tag/tag.go generated vendored Normal file
View File

@ -0,0 +1,100 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tag contains functionality handling tags and related data.
package tag // import "golang.org/x/text/internal/tag"
import "sort"
// An Index converts tags to a compact numeric value.
//
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
// be used to store additional information about the tag.
type Index string
// Elem returns the element data at the given index.
func (s Index) Elem(x int) string {
return string(s[x*4 : x*4+4])
}
// Index reports the index of the given key or -1 if it could not be found.
// Only the first len(key) bytes from the start of the 4-byte entries will be
// considered for the search and the first match in Index will be returned.
func (s Index) Index(key []byte) int {
n := len(key)
// search the index of the first entry with an equal or higher value than
// key in s.
index := sort.Search(len(s)/4, func(i int) bool {
return cmp(s[i*4:i*4+n], key) != -1
})
i := index * 4
if cmp(s[i:i+len(key)], key) != 0 {
return -1
}
return index
}
// Next finds the next occurrence of key after index x, which must have been
// obtained from a call to Index using the same key. It returns x+1 or -1.
func (s Index) Next(key []byte, x int) int {
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
return x
}
return -1
}
// cmp returns an integer comparing a and b lexicographically.
func cmp(a Index, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
for i, c := range b[:n] {
switch {
case a[i] > c:
return 1
case a[i] < c:
return -1
}
}
switch {
case len(a) < len(b):
return -1
case len(a) > len(b):
return 1
}
return 0
}
// Compare returns an integer comparing a and b lexicographically.
func Compare(a string, b []byte) int {
return cmp(Index(a), b)
}
// FixCase reformats b to the same pattern of cases as form.
// If returns false if string b is malformed.
func FixCase(form string, b []byte) bool {
if len(form) != len(b) {
return false
}
for i, c := range b {
if form[i] <= 'Z' {
if c >= 'a' {
c -= 'z' - 'Z'
}
if c < 'A' || 'Z' < c {
return false
}
} else {
if c <= 'Z' {
c += 'z' - 'Z'
}
if c < 'a' || 'z' < c {
return false
}
}
b[i] = c
}
return true
}

16
vendor/golang.org/x/text/language/common.go generated vendored Normal file
View File

@ -0,0 +1,16 @@
// This file was generated by go generate; DO NOT EDIT
package language
// This file contains code common to the maketables.go and the package code.
// langAliasType is the type of an alias in langAliasMap.
type langAliasType int8
const (
langDeprecated langAliasType = iota
langMacro
langLegacy
langAliasTypeUnknown langAliasType = -1
)

197
vendor/golang.org/x/text/language/coverage.go generated vendored Normal file
View File

@ -0,0 +1,197 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"fmt"
"sort"
)
// The Coverage interface is used to define the level of coverage of an
// internationalization service. Note that not all types are supported by all
// services. As lists may be generated on the fly, it is recommended that users
// of a Coverage cache the results.
type Coverage interface {
// Tags returns the list of supported tags.
Tags() []Tag
// BaseLanguages returns the list of supported base languages.
BaseLanguages() []Base
// Scripts returns the list of supported scripts.
Scripts() []Script
// Regions returns the list of supported regions.
Regions() []Region
}
var (
// Supported defines a Coverage that lists all supported subtags. Tags
// always returns nil.
Supported Coverage = allSubtags{}
)
// TODO:
// - Support Variants, numbering systems.
// - CLDR coverage levels.
// - Set of common tags defined in this package.
type allSubtags struct{}
// Regions returns the list of supported regions. As all regions are in a
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" region is not returned.
func (s allSubtags) Regions() []Region {
reg := make([]Region, numRegions)
for i := range reg {
reg[i] = Region{regionID(i + 1)}
}
return reg
}
// Scripts returns the list of supported scripts. As all scripts are in a
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" script is not returned.
func (s allSubtags) Scripts() []Script {
scr := make([]Script, numScripts)
for i := range scr {
scr[i] = Script{scriptID(i + 1)}
}
return scr
}
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func (s allSubtags) BaseLanguages() []Base {
base := make([]Base, 0, numLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Base{langID(i)})
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Base{langID(i)})
}
v >>= 1
i++
}
}
return base
}
// Tags always returns nil.
func (s allSubtags) Tags() []Tag {
return nil
}
// coverage is used used by NewCoverage which is used as a convenient way for
// creating Coverage implementations for partially defined data. Very often a
// package will only need to define a subset of slices. coverage provides a
// convenient way to do this. Moreover, packages using NewCoverage, instead of
// their own implementation, will not break if later new slice types are added.
type coverage struct {
tags func() []Tag
bases func() []Base
scripts func() []Script
regions func() []Region
}
func (s *coverage) Tags() []Tag {
if s.tags == nil {
return nil
}
return s.tags()
}
// bases implements sort.Interface and is used to sort base languages.
type bases []Base
func (b bases) Len() int {
return len(b)
}
func (b bases) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bases) Less(i, j int) bool {
return b[i].langID < b[j].langID
}
// BaseLanguages returns the result from calling s.bases if it is specified or
// otherwise derives the set of supported base languages from tags.
func (s *coverage) BaseLanguages() []Base {
if s.bases == nil {
tags := s.Tags()
if len(tags) == 0 {
return nil
}
a := make([]Base, len(tags))
for i, t := range tags {
a[i] = Base{langID(t.lang)}
}
sort.Sort(bases(a))
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
k++
a[k] = a[i]
}
}
return a[:k+1]
}
return s.bases()
}
func (s *coverage) Scripts() []Script {
if s.scripts == nil {
return nil
}
return s.scripts()
}
func (s *coverage) Regions() []Region {
if s.regions == nil {
return nil
}
return s.regions()
}
// NewCoverage returns a Coverage for the given lists. It is typically used by
// packages providing internationalization services to define their level of
// coverage. A list may be of type []T or func() []T, where T is either Tag,
// Base, Script or Region. The returned Coverage derives the value for Bases
// from Tags if no func or slice for []Base is specified. For other unspecified
// types the returned Coverage will return nil for the respective methods.
func NewCoverage(list ...interface{}) Coverage {
s := &coverage{}
for _, x := range list {
switch v := x.(type) {
case func() []Base:
s.bases = v
case func() []Script:
s.scripts = v
case func() []Region:
s.regions = v
case func() []Tag:
s.tags = v
case []Base:
s.bases = func() []Base { return v }
case []Script:
s.scripts = func() []Script { return v }
case []Region:
s.regions = func() []Region { return v }
case []Tag:
s.tags = func() []Tag { return v }
default:
panic(fmt.Sprintf("language: unsupported set type %T", v))
}
}
return s
}

38
vendor/golang.org/x/text/language/go1_1.go generated vendored Normal file
View File

@ -0,0 +1,38 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.2
package language
import "sort"
func sortStable(s sort.Interface) {
ss := stableSort{
s: s,
pos: make([]int, s.Len()),
}
for i := range ss.pos {
ss.pos[i] = i
}
sort.Sort(&ss)
}
type stableSort struct {
s sort.Interface
pos []int
}
func (s *stableSort) Len() int {
return len(s.pos)
}
func (s *stableSort) Less(i, j int) bool {
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
}
func (s *stableSort) Swap(i, j int) {
s.s.Swap(i, j)
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
}

11
vendor/golang.org/x/text/language/go1_2.go generated vendored Normal file
View File

@ -0,0 +1,11 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.2
package language
import "sort"
var sortStable = sort.Stable

767
vendor/golang.org/x/text/language/index.go generated vendored Normal file
View File

@ -0,0 +1,767 @@
// This file was generated by go generate; DO NOT EDIT
package language
// NumCompactTags is the number of common tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = 752
var specialTags = []Tag{ // 2 elements
0: {lang: 0xd5, region: 0x6d, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
1: {lang: 0x134, region: 0x134, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
} // Size: 72 bytes
var coreTags = map[uint32]uint16{
0x0: 0, // und
0x01500000: 3, // af
0x015000d1: 4, // af-NA
0x01500160: 5, // af-ZA
0x01b00000: 6, // agq
0x01b00051: 7, // agq-CM
0x02000000: 8, // ak
0x0200007f: 9, // ak-GH
0x02600000: 10, // am
0x0260006e: 11, // am-ET
0x03900000: 12, // ar
0x03900001: 13, // ar-001
0x03900022: 14, // ar-AE
0x03900038: 15, // ar-BH
0x03900061: 16, // ar-DJ
0x03900066: 17, // ar-DZ
0x0390006a: 18, // ar-EG
0x0390006b: 19, // ar-EH
0x0390006c: 20, // ar-ER
0x03900096: 21, // ar-IL
0x0390009a: 22, // ar-IQ
0x039000a0: 23, // ar-JO
0x039000a7: 24, // ar-KM
0x039000ab: 25, // ar-KW
0x039000af: 26, // ar-LB
0x039000b8: 27, // ar-LY
0x039000b9: 28, // ar-MA
0x039000c8: 29, // ar-MR
0x039000e0: 30, // ar-OM
0x039000ec: 31, // ar-PS
0x039000f2: 32, // ar-QA
0x03900107: 33, // ar-SA
0x0390010a: 34, // ar-SD
0x03900114: 35, // ar-SO
0x03900116: 36, // ar-SS
0x0390011b: 37, // ar-SY
0x0390011f: 38, // ar-TD
0x03900127: 39, // ar-TN
0x0390015d: 40, // ar-YE
0x03f00000: 41, // ars
0x04200000: 42, // as
0x04200098: 43, // as-IN
0x04300000: 44, // asa
0x0430012e: 45, // asa-TZ
0x04700000: 46, // ast
0x0470006d: 47, // ast-ES
0x05700000: 48, // az
0x0571e000: 49, // az-Cyrl
0x0571e031: 50, // az-Cyrl-AZ
0x05752000: 51, // az-Latn
0x05752031: 52, // az-Latn-AZ
0x05d00000: 53, // bas
0x05d00051: 54, // bas-CM
0x07000000: 55, // be
0x07000046: 56, // be-BY
0x07400000: 57, // bem
0x07400161: 58, // bem-ZM
0x07800000: 59, // bez
0x0780012e: 60, // bez-TZ
0x07d00000: 61, // bg
0x07d00037: 62, // bg-BG
0x08100000: 63, // bh
0x09e00000: 64, // bm
0x09e000c2: 65, // bm-ML
0x0a300000: 66, // bn
0x0a300034: 67, // bn-BD
0x0a300098: 68, // bn-IN
0x0a700000: 69, // bo
0x0a700052: 70, // bo-CN
0x0a700098: 71, // bo-IN
0x0b000000: 72, // br
0x0b000077: 73, // br-FR
0x0b300000: 74, // brx
0x0b300098: 75, // brx-IN
0x0b500000: 76, // bs
0x0b51e000: 77, // bs-Cyrl
0x0b51e032: 78, // bs-Cyrl-BA
0x0b552000: 79, // bs-Latn
0x0b552032: 80, // bs-Latn-BA
0x0d500000: 81, // ca
0x0d500021: 82, // ca-AD
0x0d50006d: 83, // ca-ES
0x0d500077: 84, // ca-FR
0x0d50009d: 85, // ca-IT
0x0da00000: 86, // ce
0x0da00105: 87, // ce-RU
0x0dd00000: 88, // cgg
0x0dd00130: 89, // cgg-UG
0x0e300000: 90, // chr
0x0e300134: 91, // chr-US
0x0e700000: 92, // ckb
0x0e70009a: 93, // ckb-IQ
0x0e70009b: 94, // ckb-IR
0x0f600000: 95, // cs
0x0f60005d: 96, // cs-CZ
0x0fa00000: 97, // cu
0x0fa00105: 98, // cu-RU
0x0fc00000: 99, // cy
0x0fc0007a: 100, // cy-GB
0x0fd00000: 101, // da
0x0fd00062: 102, // da-DK
0x0fd00081: 103, // da-GL
0x10400000: 104, // dav
0x104000a3: 105, // dav-KE
0x10900000: 106, // de
0x1090002d: 107, // de-AT
0x10900035: 108, // de-BE
0x1090004d: 109, // de-CH
0x1090005f: 110, // de-DE
0x1090009d: 111, // de-IT
0x109000b1: 112, // de-LI
0x109000b6: 113, // de-LU
0x11300000: 114, // dje
0x113000d3: 115, // dje-NE
0x11b00000: 116, // dsb
0x11b0005f: 117, // dsb-DE
0x12000000: 118, // dua
0x12000051: 119, // dua-CM
0x12400000: 120, // dv
0x12700000: 121, // dyo
0x12700113: 122, // dyo-SN
0x12900000: 123, // dz
0x12900042: 124, // dz-BT
0x12b00000: 125, // ebu
0x12b000a3: 126, // ebu-KE
0x12c00000: 127, // ee
0x12c0007f: 128, // ee-GH
0x12c00121: 129, // ee-TG
0x13100000: 130, // el
0x1310005c: 131, // el-CY
0x13100086: 132, // el-GR
0x13400000: 133, // en
0x13400001: 134, // en-001
0x1340001a: 135, // en-150
0x13400024: 136, // en-AG
0x13400025: 137, // en-AI
0x1340002c: 138, // en-AS
0x1340002d: 139, // en-AT
0x1340002e: 140, // en-AU
0x13400033: 141, // en-BB
0x13400035: 142, // en-BE
0x13400039: 143, // en-BI
0x1340003c: 144, // en-BM
0x13400041: 145, // en-BS
0x13400045: 146, // en-BW
0x13400047: 147, // en-BZ
0x13400048: 148, // en-CA
0x13400049: 149, // en-CC
0x1340004d: 150, // en-CH
0x1340004f: 151, // en-CK
0x13400051: 152, // en-CM
0x1340005b: 153, // en-CX
0x1340005c: 154, // en-CY
0x1340005f: 155, // en-DE
0x13400060: 156, // en-DG
0x13400062: 157, // en-DK
0x13400063: 158, // en-DM
0x1340006c: 159, // en-ER
0x13400071: 160, // en-FI
0x13400072: 161, // en-FJ
0x13400073: 162, // en-FK
0x13400074: 163, // en-FM
0x1340007a: 164, // en-GB
0x1340007b: 165, // en-GD
0x1340007e: 166, // en-GG
0x1340007f: 167, // en-GH
0x13400080: 168, // en-GI
0x13400082: 169, // en-GM
0x13400089: 170, // en-GU
0x1340008b: 171, // en-GY
0x1340008c: 172, // en-HK
0x13400095: 173, // en-IE
0x13400096: 174, // en-IL
0x13400097: 175, // en-IM
0x13400098: 176, // en-IN
0x13400099: 177, // en-IO
0x1340009e: 178, // en-JE
0x1340009f: 179, // en-JM
0x134000a3: 180, // en-KE
0x134000a6: 181, // en-KI
0x134000a8: 182, // en-KN
0x134000ac: 183, // en-KY
0x134000b0: 184, // en-LC
0x134000b3: 185, // en-LR
0x134000b4: 186, // en-LS
0x134000be: 187, // en-MG
0x134000bf: 188, // en-MH
0x134000c5: 189, // en-MO
0x134000c6: 190, // en-MP
0x134000c9: 191, // en-MS
0x134000ca: 192, // en-MT
0x134000cb: 193, // en-MU
0x134000cd: 194, // en-MW
0x134000cf: 195, // en-MY
0x134000d1: 196, // en-NA
0x134000d4: 197, // en-NF
0x134000d5: 198, // en-NG
0x134000d8: 199, // en-NL
0x134000dc: 200, // en-NR
0x134000de: 201, // en-NU
0x134000df: 202, // en-NZ
0x134000e5: 203, // en-PG
0x134000e6: 204, // en-PH
0x134000e7: 205, // en-PK
0x134000ea: 206, // en-PN
0x134000eb: 207, // en-PR
0x134000ef: 208, // en-PW
0x13400106: 209, // en-RW
0x13400108: 210, // en-SB
0x13400109: 211, // en-SC
0x1340010a: 212, // en-SD
0x1340010b: 213, // en-SE
0x1340010c: 214, // en-SG
0x1340010d: 215, // en-SH
0x1340010e: 216, // en-SI
0x13400111: 217, // en-SL
0x13400116: 218, // en-SS
0x1340011a: 219, // en-SX
0x1340011c: 220, // en-SZ
0x1340011e: 221, // en-TC
0x13400124: 222, // en-TK
0x13400128: 223, // en-TO
0x1340012b: 224, // en-TT
0x1340012c: 225, // en-TV
0x1340012e: 226, // en-TZ
0x13400130: 227, // en-UG
0x13400132: 228, // en-UM
0x13400134: 229, // en-US
0x13400138: 230, // en-VC
0x1340013b: 231, // en-VG
0x1340013c: 232, // en-VI
0x1340013e: 233, // en-VU
0x13400141: 234, // en-WS
0x13400160: 235, // en-ZA
0x13400161: 236, // en-ZM
0x13400163: 237, // en-ZW
0x13700000: 238, // eo
0x13700001: 239, // eo-001
0x13900000: 240, // es
0x1390001e: 241, // es-419
0x1390002b: 242, // es-AR
0x1390003e: 243, // es-BO
0x13900040: 244, // es-BR
0x13900050: 245, // es-CL
0x13900053: 246, // es-CO
0x13900055: 247, // es-CR
0x13900058: 248, // es-CU
0x13900064: 249, // es-DO
0x13900067: 250, // es-EA
0x13900068: 251, // es-EC
0x1390006d: 252, // es-ES
0x13900085: 253, // es-GQ
0x13900088: 254, // es-GT
0x1390008e: 255, // es-HN
0x13900093: 256, // es-IC
0x139000ce: 257, // es-MX
0x139000d7: 258, // es-NI
0x139000e1: 259, // es-PA
0x139000e3: 260, // es-PE
0x139000e6: 261, // es-PH
0x139000eb: 262, // es-PR
0x139000f0: 263, // es-PY
0x13900119: 264, // es-SV
0x13900134: 265, // es-US
0x13900135: 266, // es-UY
0x1390013a: 267, // es-VE
0x13b00000: 268, // et
0x13b00069: 269, // et-EE
0x14000000: 270, // eu
0x1400006d: 271, // eu-ES
0x14100000: 272, // ewo
0x14100051: 273, // ewo-CM
0x14300000: 274, // fa
0x14300023: 275, // fa-AF
0x1430009b: 276, // fa-IR
0x14900000: 277, // ff
0x14900051: 278, // ff-CM
0x14900083: 279, // ff-GN
0x149000c8: 280, // ff-MR
0x14900113: 281, // ff-SN
0x14c00000: 282, // fi
0x14c00071: 283, // fi-FI
0x14e00000: 284, // fil
0x14e000e6: 285, // fil-PH
0x15300000: 286, // fo
0x15300062: 287, // fo-DK
0x15300075: 288, // fo-FO
0x15900000: 289, // fr
0x15900035: 290, // fr-BE
0x15900036: 291, // fr-BF
0x15900039: 292, // fr-BI
0x1590003a: 293, // fr-BJ
0x1590003b: 294, // fr-BL
0x15900048: 295, // fr-CA
0x1590004a: 296, // fr-CD
0x1590004b: 297, // fr-CF
0x1590004c: 298, // fr-CG
0x1590004d: 299, // fr-CH
0x1590004e: 300, // fr-CI
0x15900051: 301, // fr-CM
0x15900061: 302, // fr-DJ
0x15900066: 303, // fr-DZ
0x15900077: 304, // fr-FR
0x15900079: 305, // fr-GA
0x1590007d: 306, // fr-GF
0x15900083: 307, // fr-GN
0x15900084: 308, // fr-GP
0x15900085: 309, // fr-GQ
0x15900090: 310, // fr-HT
0x159000a7: 311, // fr-KM
0x159000b6: 312, // fr-LU
0x159000b9: 313, // fr-MA
0x159000ba: 314, // fr-MC
0x159000bd: 315, // fr-MF
0x159000be: 316, // fr-MG
0x159000c2: 317, // fr-ML
0x159000c7: 318, // fr-MQ
0x159000c8: 319, // fr-MR
0x159000cb: 320, // fr-MU
0x159000d2: 321, // fr-NC
0x159000d3: 322, // fr-NE
0x159000e4: 323, // fr-PF
0x159000e9: 324, // fr-PM
0x15900101: 325, // fr-RE
0x15900106: 326, // fr-RW
0x15900109: 327, // fr-SC
0x15900113: 328, // fr-SN
0x1590011b: 329, // fr-SY
0x1590011f: 330, // fr-TD
0x15900121: 331, // fr-TG
0x15900127: 332, // fr-TN
0x1590013e: 333, // fr-VU
0x1590013f: 334, // fr-WF
0x1590015e: 335, // fr-YT
0x16400000: 336, // fur
0x1640009d: 337, // fur-IT
0x16800000: 338, // fy
0x168000d8: 339, // fy-NL
0x16900000: 340, // ga
0x16900095: 341, // ga-IE
0x17800000: 342, // gd
0x1780007a: 343, // gd-GB
0x18a00000: 344, // gl
0x18a0006d: 345, // gl-ES
0x19c00000: 346, // gsw
0x19c0004d: 347, // gsw-CH
0x19c00077: 348, // gsw-FR
0x19c000b1: 349, // gsw-LI
0x19d00000: 350, // gu
0x19d00098: 351, // gu-IN
0x1a200000: 352, // guw
0x1a400000: 353, // guz
0x1a4000a3: 354, // guz-KE
0x1a500000: 355, // gv
0x1a500097: 356, // gv-IM
0x1ad00000: 357, // ha
0x1ad0007f: 358, // ha-GH
0x1ad000d3: 359, // ha-NE
0x1ad000d5: 360, // ha-NG
0x1b100000: 361, // haw
0x1b100134: 362, // haw-US
0x1b500000: 363, // he
0x1b500096: 364, // he-IL
0x1b700000: 365, // hi
0x1b700098: 366, // hi-IN
0x1ca00000: 367, // hr
0x1ca00032: 368, // hr-BA
0x1ca0008f: 369, // hr-HR
0x1cb00000: 370, // hsb
0x1cb0005f: 371, // hsb-DE
0x1ce00000: 372, // hu
0x1ce00091: 373, // hu-HU
0x1d000000: 374, // hy
0x1d000027: 375, // hy-AM
0x1da00000: 376, // id
0x1da00094: 377, // id-ID
0x1df00000: 378, // ig
0x1df000d5: 379, // ig-NG
0x1e200000: 380, // ii
0x1e200052: 381, // ii-CN
0x1f000000: 382, // is
0x1f00009c: 383, // is-IS
0x1f100000: 384, // it
0x1f10004d: 385, // it-CH
0x1f10009d: 386, // it-IT
0x1f100112: 387, // it-SM
0x1f200000: 388, // iu
0x1f800000: 389, // ja
0x1f8000a1: 390, // ja-JP
0x1fb00000: 391, // jbo
0x1ff00000: 392, // jgo
0x1ff00051: 393, // jgo-CM
0x20200000: 394, // jmc
0x2020012e: 395, // jmc-TZ
0x20600000: 396, // jv
0x20800000: 397, // ka
0x2080007c: 398, // ka-GE
0x20a00000: 399, // kab
0x20a00066: 400, // kab-DZ
0x20e00000: 401, // kaj
0x20f00000: 402, // kam
0x20f000a3: 403, // kam-KE
0x21700000: 404, // kcg
0x21b00000: 405, // kde
0x21b0012e: 406, // kde-TZ
0x21f00000: 407, // kea
0x21f00059: 408, // kea-CV
0x22c00000: 409, // khq
0x22c000c2: 410, // khq-ML
0x23100000: 411, // ki
0x231000a3: 412, // ki-KE
0x23a00000: 413, // kk
0x23a000ad: 414, // kk-KZ
0x23c00000: 415, // kkj
0x23c00051: 416, // kkj-CM
0x23d00000: 417, // kl
0x23d00081: 418, // kl-GL
0x23e00000: 419, // kln
0x23e000a3: 420, // kln-KE
0x24200000: 421, // km
0x242000a5: 422, // km-KH
0x24900000: 423, // kn
0x24900098: 424, // kn-IN
0x24b00000: 425, // ko
0x24b000a9: 426, // ko-KP
0x24b000aa: 427, // ko-KR
0x24d00000: 428, // kok
0x24d00098: 429, // kok-IN
0x26100000: 430, // ks
0x26100098: 431, // ks-IN
0x26200000: 432, // ksb
0x2620012e: 433, // ksb-TZ
0x26400000: 434, // ksf
0x26400051: 435, // ksf-CM
0x26500000: 436, // ksh
0x2650005f: 437, // ksh-DE
0x26b00000: 438, // ku
0x27800000: 439, // kw
0x2780007a: 440, // kw-GB
0x28100000: 441, // ky
0x281000a4: 442, // ky-KG
0x28800000: 443, // lag
0x2880012e: 444, // lag-TZ
0x28c00000: 445, // lb
0x28c000b6: 446, // lb-LU
0x29a00000: 447, // lg
0x29a00130: 448, // lg-UG
0x2a600000: 449, // lkt
0x2a600134: 450, // lkt-US
0x2ac00000: 451, // ln
0x2ac00029: 452, // ln-AO
0x2ac0004a: 453, // ln-CD
0x2ac0004b: 454, // ln-CF
0x2ac0004c: 455, // ln-CG
0x2af00000: 456, // lo
0x2af000ae: 457, // lo-LA
0x2b600000: 458, // lrc
0x2b60009a: 459, // lrc-IQ
0x2b60009b: 460, // lrc-IR
0x2b700000: 461, // lt
0x2b7000b5: 462, // lt-LT
0x2b900000: 463, // lu
0x2b90004a: 464, // lu-CD
0x2bb00000: 465, // luo
0x2bb000a3: 466, // luo-KE
0x2bc00000: 467, // luy
0x2bc000a3: 468, // luy-KE
0x2be00000: 469, // lv
0x2be000b7: 470, // lv-LV
0x2c800000: 471, // mas
0x2c8000a3: 472, // mas-KE
0x2c80012e: 473, // mas-TZ
0x2e000000: 474, // mer
0x2e0000a3: 475, // mer-KE
0x2e400000: 476, // mfe
0x2e4000cb: 477, // mfe-MU
0x2e800000: 478, // mg
0x2e8000be: 479, // mg-MG
0x2e900000: 480, // mgh
0x2e9000d0: 481, // mgh-MZ
0x2eb00000: 482, // mgo
0x2eb00051: 483, // mgo-CM
0x2f600000: 484, // mk
0x2f6000c1: 485, // mk-MK
0x2fb00000: 486, // ml
0x2fb00098: 487, // ml-IN
0x30200000: 488, // mn
0x302000c4: 489, // mn-MN
0x31200000: 490, // mr
0x31200098: 491, // mr-IN
0x31600000: 492, // ms
0x3160003d: 493, // ms-BN
0x316000cf: 494, // ms-MY
0x3160010c: 495, // ms-SG
0x31700000: 496, // mt
0x317000ca: 497, // mt-MT
0x31c00000: 498, // mua
0x31c00051: 499, // mua-CM
0x32800000: 500, // my
0x328000c3: 501, // my-MM
0x33100000: 502, // mzn
0x3310009b: 503, // mzn-IR
0x33800000: 504, // nah
0x33c00000: 505, // naq
0x33c000d1: 506, // naq-NA
0x33e00000: 507, // nb
0x33e000d9: 508, // nb-NO
0x33e0010f: 509, // nb-SJ
0x34500000: 510, // nd
0x34500163: 511, // nd-ZW
0x34700000: 512, // nds
0x3470005f: 513, // nds-DE
0x347000d8: 514, // nds-NL
0x34800000: 515, // ne
0x34800098: 516, // ne-IN
0x348000da: 517, // ne-NP
0x35e00000: 518, // nl
0x35e0002f: 519, // nl-AW
0x35e00035: 520, // nl-BE
0x35e0003f: 521, // nl-BQ
0x35e0005a: 522, // nl-CW
0x35e000d8: 523, // nl-NL
0x35e00115: 524, // nl-SR
0x35e0011a: 525, // nl-SX
0x35f00000: 526, // nmg
0x35f00051: 527, // nmg-CM
0x36100000: 528, // nn
0x361000d9: 529, // nn-NO
0x36300000: 530, // nnh
0x36300051: 531, // nnh-CM
0x36600000: 532, // no
0x36c00000: 533, // nqo
0x36d00000: 534, // nr
0x37100000: 535, // nso
0x37700000: 536, // nus
0x37700116: 537, // nus-SS
0x37e00000: 538, // ny
0x38000000: 539, // nyn
0x38000130: 540, // nyn-UG
0x38700000: 541, // om
0x3870006e: 542, // om-ET
0x387000a3: 543, // om-KE
0x38c00000: 544, // or
0x38c00098: 545, // or-IN
0x38f00000: 546, // os
0x38f0007c: 547, // os-GE
0x38f00105: 548, // os-RU
0x39400000: 549, // pa
0x39405000: 550, // pa-Arab
0x394050e7: 551, // pa-Arab-PK
0x3942f000: 552, // pa-Guru
0x3942f098: 553, // pa-Guru-IN
0x39800000: 554, // pap
0x3aa00000: 555, // pl
0x3aa000e8: 556, // pl-PL
0x3b400000: 557, // prg
0x3b400001: 558, // prg-001
0x3b500000: 559, // ps
0x3b500023: 560, // ps-AF
0x3b700000: 561, // pt
0x3b700029: 562, // pt-AO
0x3b700040: 563, // pt-BR
0x3b70004d: 564, // pt-CH
0x3b700059: 565, // pt-CV
0x3b700085: 566, // pt-GQ
0x3b70008a: 567, // pt-GW
0x3b7000b6: 568, // pt-LU
0x3b7000c5: 569, // pt-MO
0x3b7000d0: 570, // pt-MZ
0x3b7000ed: 571, // pt-PT
0x3b700117: 572, // pt-ST
0x3b700125: 573, // pt-TL
0x3bb00000: 574, // qu
0x3bb0003e: 575, // qu-BO
0x3bb00068: 576, // qu-EC
0x3bb000e3: 577, // qu-PE
0x3cb00000: 578, // rm
0x3cb0004d: 579, // rm-CH
0x3d000000: 580, // rn
0x3d000039: 581, // rn-BI
0x3d300000: 582, // ro
0x3d3000bb: 583, // ro-MD
0x3d300103: 584, // ro-RO
0x3d500000: 585, // rof
0x3d50012e: 586, // rof-TZ
0x3d900000: 587, // ru
0x3d900046: 588, // ru-BY
0x3d9000a4: 589, // ru-KG
0x3d9000ad: 590, // ru-KZ
0x3d9000bb: 591, // ru-MD
0x3d900105: 592, // ru-RU
0x3d90012f: 593, // ru-UA
0x3dc00000: 594, // rw
0x3dc00106: 595, // rw-RW
0x3dd00000: 596, // rwk
0x3dd0012e: 597, // rwk-TZ
0x3e200000: 598, // sah
0x3e200105: 599, // sah-RU
0x3e300000: 600, // saq
0x3e3000a3: 601, // saq-KE
0x3e900000: 602, // sbp
0x3e90012e: 603, // sbp-TZ
0x3f200000: 604, // sdh
0x3f300000: 605, // se
0x3f300071: 606, // se-FI
0x3f3000d9: 607, // se-NO
0x3f30010b: 608, // se-SE
0x3f500000: 609, // seh
0x3f5000d0: 610, // seh-MZ
0x3f700000: 611, // ses
0x3f7000c2: 612, // ses-ML
0x3f800000: 613, // sg
0x3f80004b: 614, // sg-CF
0x3fe00000: 615, // shi
0x3fe52000: 616, // shi-Latn
0x3fe520b9: 617, // shi-Latn-MA
0x3fed2000: 618, // shi-Tfng
0x3fed20b9: 619, // shi-Tfng-MA
0x40200000: 620, // si
0x402000b2: 621, // si-LK
0x40800000: 622, // sk
0x40800110: 623, // sk-SK
0x40c00000: 624, // sl
0x40c0010e: 625, // sl-SI
0x41200000: 626, // sma
0x41300000: 627, // smi
0x41400000: 628, // smj
0x41500000: 629, // smn
0x41500071: 630, // smn-FI
0x41800000: 631, // sms
0x41900000: 632, // sn
0x41900163: 633, // sn-ZW
0x41f00000: 634, // so
0x41f00061: 635, // so-DJ
0x41f0006e: 636, // so-ET
0x41f000a3: 637, // so-KE
0x41f00114: 638, // so-SO
0x42700000: 639, // sq
0x42700026: 640, // sq-AL
0x427000c1: 641, // sq-MK
0x4270014c: 642, // sq-XK
0x42800000: 643, // sr
0x4281e000: 644, // sr-Cyrl
0x4281e032: 645, // sr-Cyrl-BA
0x4281e0bc: 646, // sr-Cyrl-ME
0x4281e104: 647, // sr-Cyrl-RS
0x4281e14c: 648, // sr-Cyrl-XK
0x42852000: 649, // sr-Latn
0x42852032: 650, // sr-Latn-BA
0x428520bc: 651, // sr-Latn-ME
0x42852104: 652, // sr-Latn-RS
0x4285214c: 653, // sr-Latn-XK
0x42d00000: 654, // ss
0x43000000: 655, // ssy
0x43100000: 656, // st
0x43a00000: 657, // sv
0x43a00030: 658, // sv-AX
0x43a00071: 659, // sv-FI
0x43a0010b: 660, // sv-SE
0x43b00000: 661, // sw
0x43b0004a: 662, // sw-CD
0x43b000a3: 663, // sw-KE
0x43b0012e: 664, // sw-TZ
0x43b00130: 665, // sw-UG
0x44400000: 666, // syr
0x44600000: 667, // ta
0x44600098: 668, // ta-IN
0x446000b2: 669, // ta-LK
0x446000cf: 670, // ta-MY
0x4460010c: 671, // ta-SG
0x45700000: 672, // te
0x45700098: 673, // te-IN
0x45a00000: 674, // teo
0x45a000a3: 675, // teo-KE
0x45a00130: 676, // teo-UG
0x46100000: 677, // th
0x46100122: 678, // th-TH
0x46500000: 679, // ti
0x4650006c: 680, // ti-ER
0x4650006e: 681, // ti-ET
0x46700000: 682, // tig
0x46c00000: 683, // tk
0x46c00126: 684, // tk-TM
0x47600000: 685, // tn
0x47800000: 686, // to
0x47800128: 687, // to-TO
0x48000000: 688, // tr
0x4800005c: 689, // tr-CY
0x4800012a: 690, // tr-TR
0x48400000: 691, // ts
0x49a00000: 692, // twq
0x49a000d3: 693, // twq-NE
0x49f00000: 694, // tzm
0x49f000b9: 695, // tzm-MA
0x4a200000: 696, // ug
0x4a200052: 697, // ug-CN
0x4a400000: 698, // uk
0x4a40012f: 699, // uk-UA
0x4aa00000: 700, // ur
0x4aa00098: 701, // ur-IN
0x4aa000e7: 702, // ur-PK
0x4b200000: 703, // uz
0x4b205000: 704, // uz-Arab
0x4b205023: 705, // uz-Arab-AF
0x4b21e000: 706, // uz-Cyrl
0x4b21e136: 707, // uz-Cyrl-UZ
0x4b252000: 708, // uz-Latn
0x4b252136: 709, // uz-Latn-UZ
0x4b400000: 710, // vai
0x4b452000: 711, // vai-Latn
0x4b4520b3: 712, // vai-Latn-LR
0x4b4d9000: 713, // vai-Vaii
0x4b4d90b3: 714, // vai-Vaii-LR
0x4b600000: 715, // ve
0x4b900000: 716, // vi
0x4b90013d: 717, // vi-VN
0x4bf00000: 718, // vo
0x4bf00001: 719, // vo-001
0x4c200000: 720, // vun
0x4c20012e: 721, // vun-TZ
0x4c400000: 722, // wa
0x4c500000: 723, // wae
0x4c50004d: 724, // wae-CH
0x4db00000: 725, // wo
0x4e800000: 726, // xh
0x4f100000: 727, // xog
0x4f100130: 728, // xog-UG
0x4ff00000: 729, // yav
0x4ff00051: 730, // yav-CM
0x50800000: 731, // yi
0x50800001: 732, // yi-001
0x50e00000: 733, // yo
0x50e0003a: 734, // yo-BJ
0x50e000d5: 735, // yo-NG
0x51500000: 736, // yue
0x5150008c: 737, // yue-HK
0x51e00000: 738, // zgh
0x51e000b9: 739, // zgh-MA
0x51f00000: 740, // zh
0x51f34000: 741, // zh-Hans
0x51f34052: 742, // zh-Hans-CN
0x51f3408c: 743, // zh-Hans-HK
0x51f340c5: 744, // zh-Hans-MO
0x51f3410c: 745, // zh-Hans-SG
0x51f35000: 746, // zh-Hant
0x51f3508c: 747, // zh-Hant-HK
0x51f350c5: 748, // zh-Hant-MO
0x51f3512d: 749, // zh-Hant-TW
0x52400000: 750, // zu
0x52400160: 751, // zu-ZA
}
// Total table size 4580 bytes (4KiB); checksum: A7F72A2A

975
vendor/golang.org/x/text/language/language.go generated vendored Normal file
View File

@ -0,0 +1,975 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run maketables.go gen_common.go -output tables.go
//go:generate go run gen_index.go
// Package language implements BCP 47 language tags and related functionality.
//
// The Tag type, which is used to represent languages, is agnostic to the
// meaning of its subtags. Tags are not fully canonicalized to preserve
// information that may be valuable in certain contexts. As a consequence, two
// different tags may represent identical languages.
//
// Initializing language- or locale-specific components usually consists of
// two steps. The first step is to select a display language based on the
// preferred languages of the user and the languages supported by an application.
// The second step is to create the language-specific services based on
// this selection. Each is discussed in more details below.
//
// Matching preferred against supported languages
//
// An application may support various languages. This list is typically limited
// by the languages for which there exists translations of the user interface.
// Similarly, a user may provide a list of preferred languages which is limited
// by the languages understood by this user.
// An application should use a Matcher to find the best supported language based
// on the user's preferred list.
// Matchers are aware of the intricacies of equivalence between languages.
// The default Matcher implementation takes into account things such as
// deprecated subtags, legacy tags, and mutual intelligibility between scripts
// and languages.
//
// A Matcher for English, Australian English, Danish, and standard Mandarin can
// be defined as follows:
//
// var matcher = language.NewMatcher([]language.Tag{
// language.English, // The first language is used as fallback.
// language.MustParse("en-AU"),
// language.Danish,
// language.Chinese,
// })
//
// The following code selects the best match for someone speaking Spanish and
// Norwegian:
//
// preferred := []language.Tag{ language.Spanish, language.Norwegian }
// tag, _, _ := matcher.Match(preferred...)
//
// In this case, the best match is Danish, as Danish is sufficiently a match to
// Norwegian to not have to fall back to the default.
// See ParseAcceptLanguage on how to handle the Accept-Language HTTP header.
//
// Selecting language-specific services
//
// One should always use the Tag returned by the Matcher to create an instance
// of any of the language-specific services provided by the text repository.
// This prevents the mixing of languages, such as having a different language for
// messages and display names, as well as improper casing or sorting order for
// the selected language.
// Using the returned Tag also allows user-defined settings, such as collation
// order or numbering system to be transparently passed as options.
//
// If you have language-specific data in your application, however, it will in
// most cases suffice to use the index returned by the matcher to identify
// the user language.
// The following loop provides an alternative in case this is not sufficient:
//
// supported := map[language.Tag]data{
// language.English: enData,
// language.MustParse("en-AU"): enAUData,
// language.Danish: daData,
// language.Chinese: zhData,
// }
// tag, _, _ := matcher.Match(preferred...)
// for ; tag != language.Und; tag = tag.Parent() {
// if v, ok := supported[tag]; ok {
// return v
// }
// }
// return enData // should not reach here
//
// Repeatedly taking the Parent of the tag returned by Match will eventually
// match one of the tags used to initialize the Matcher.
//
// Canonicalization
//
// By default, only legacy and deprecated tags are converted into their
// canonical equivalent. All other information is preserved. This approach makes
// the confidence scores more accurate and allows matchers to distinguish
// between variants that are otherwise lost.
//
// As a consequence, two tags that should be treated as identical according to
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
// Matchers will handle such distinctions, though, and are aware of the
// equivalence relations. The CanonType type can be used to alter the
// canonicalization form.
//
// References
//
// BCP 47 - Tags for Identifying Languages
// http://tools.ietf.org/html/bcp47
package language // import "golang.org/x/text/language"
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
lang langID
region regionID
script scriptID
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
}
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
return Default.Make(s)
}
// Make is a convenience wrapper for c.Parse that omits the error.
// In case of an error, a sensible default is returned.
func (c CanonType) Make(s string) Tag {
t, _ := c.Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
func (t Tag) Raw() (b Base, s Script, r Region) {
return Base{t.lang}, Script{t.script}, Region{t.region}
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.lang == a.lang && t.script == a.script && t.region == a.region
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(und)
}
// private reports whether the Tag consists solely of a private use tag.
func (t Tag) private() bool {
return t.str != "" && t.pVariant == 0
}
// CanonType can be used to enable or disable various types of canonicalization.
type CanonType int
const (
// Replace deprecated base languages with their preferred replacements.
DeprecatedBase CanonType = 1 << iota
// Replace deprecated scripts with their preferred replacements.
DeprecatedScript
// Replace deprecated regions with their preferred replacements.
DeprecatedRegion
// Remove redundant scripts.
SuppressScript
// Normalize legacy encodings. This includes legacy languages defined in
// CLDR as well as bibliographic codes defined in ISO-639.
Legacy
// Map the dominant language of a macro language group to the macro language
// subtag. For example cmn -> zh.
Macro
// The CLDR flag should be used if full compatibility with CLDR is required.
// There are a few cases where language.Tag may differ from CLDR. To follow all
// of CLDR's suggestions, use All|CLDR.
CLDR
// Raw can be used to Compose or Parse without Canonicalization.
Raw CanonType = 0
// Replace all deprecated tags with their preferred replacements.
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
// All canonicalizations recommended by BCP 47.
BCP47 = Deprecated | SuppressScript
// All canonicalizations.
All = BCP47 | Legacy | Macro
// Default is the canonicalization used by Parse, Make and Compose. To
// preserve as much information as possible, canonicalizations that remove
// potentially valuable information are not included. The Matcher is
// designed to recognize similar tags that would be the same if
// they were canonicalized using All.
Default = Deprecated | Legacy
canonLang = DeprecatedBase | Legacy | Macro
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
)
// canonicalize returns the canonicalized equivalent of the tag and
// whether there was any change.
func (t Tag) canonicalize(c CanonType) (Tag, bool) {
if c == Raw {
return t, false
}
changed := false
if c&SuppressScript != 0 {
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
t.script = 0
changed = true
}
}
if c&canonLang != 0 {
for {
if l, aliasType := normLang(t.lang); l != t.lang {
switch aliasType {
case langLegacy:
if c&Legacy != 0 {
if t.lang == _sh && t.script == 0 {
t.script = _Latn
}
t.lang = l
changed = true
}
case langMacro:
if c&Macro != 0 {
// We deviate here from CLDR. The mapping "nb" -> "no"
// qualifies as a typical Macro language mapping. However,
// for legacy reasons, CLDR maps "no", the macro language
// code for Norwegian, to the dominant variant "nb". This
// change is currently under consideration for CLDR as well.
// See http://unicode.org/cldr/trac/ticket/2698 and also
// http://unicode.org/cldr/trac/ticket/1790 for some of the
// practical implications. TODO: this check could be removed
// if CLDR adopts this change.
if c&CLDR == 0 || t.lang != _nb {
changed = true
t.lang = l
}
}
case langDeprecated:
if c&DeprecatedBase != 0 {
if t.lang == _mo && t.region == 0 {
t.region = _MD
}
t.lang = l
changed = true
// Other canonicalization types may still apply.
continue
}
}
} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
t.lang = _nb
changed = true
}
break
}
}
if c&DeprecatedScript != 0 {
if t.script == _Qaai {
changed = true
t.script = _Zinh
}
}
if c&DeprecatedRegion != 0 {
if r := normRegion(t.region); r != 0 {
changed = true
t.region = r
}
}
return t, changed
}
// Canonicalize returns the canonicalized equivalent of the tag.
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
t, changed := t.canonicalize(c)
if changed {
t.remakeString()
}
return t, nil
}
// Confidence indicates the level of certainty for a given return value.
// For example, Serbian may be written in Cyrillic or Latin script.
// The confidence level indicates whether a value was explicitly specified,
// whether it is typically the only possible value, or whether there is
// an ambiguity.
type Confidence int
const (
No Confidence = iota // full confidence that there was no match
Low // most likely value picked out of a set of alternatives
High // value is generally assumed to be the correct match
Exact // exact match or explicitly specified value
)
var confName = []string{"No", "Low", "High", "Exact"}
func (c Confidence) String() string {
return confName[c]
}
// remakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) remakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.lang.stringToBuf(buf[:])
if t.script != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.script.String())
}
if t.region != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.region.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.script == 0 && t.region == 0 {
return t.lang.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
}
// Base returns the base language of the language tag. If the base language is
// unspecified, an attempt will be made to infer it from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Base() (Base, Confidence) {
if t.lang != 0 {
return Base{t.lang}, Exact
}
c := High
if t.script == 0 && !(Region{t.region}).IsCountry() {
c = Low
}
if tag, err := addTags(t); err == nil && tag.lang != 0 {
return Base{tag.lang}, c
}
return Base{0}, No
}
// Script infers the script for the language tag. If it was not explicitly given, it will infer
// a most likely candidate.
// If more than one script is commonly used for a language, the most likely one
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
// for Serbian.
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
// Note that an inferred script is never guaranteed to be the correct one. Latin is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Script() (Script, Confidence) {
if t.script != 0 {
return Script{t.script}, Exact
}
sc, c := scriptID(_Zzzz), No
if t.lang < langNoIndexOffset {
if scr := scriptID(suppressScript[t.lang]); scr != 0 {
// Note: it is not always the case that a language with a suppress
// script value is only written in one script (e.g. kk, ms, pa).
if t.region == 0 {
return Script{scriptID(scr)}, High
}
sc, c = scr, High
}
}
if tag, err := addTags(t); err == nil {
if tag.script != sc {
sc, c = tag.script, Low
}
} else {
t, _ = (Deprecated | Macro).Canonicalize(t)
if tag, err := addTags(t); err == nil && tag.script != sc {
sc, c = tag.script, Low
}
}
return Script{sc}, c
}
// Region returns the region for the language tag. If it was not explicitly given, it will
// infer a most likely candidate from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Region() (Region, Confidence) {
if t.region != 0 {
return Region{t.region}, Exact
}
if t, err := addTags(t); err == nil {
return Region{t.region}, Low // TODO: differentiate between high and low.
}
t, _ = (Deprecated | Macro).Canonicalize(t)
if tag, err := addTags(t); err == nil {
return Region{tag.region}, Low
}
return Region{_ZZ}, No // TODO: return world instead of undetermined?
}
// Variant returns the variants specified explicitly for this language tag.
// or nil if no variant was specified.
func (t Tag) Variants() []Variant {
v := []Variant{}
if int(t.pVariant) < int(t.pExt) {
for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
x, str = nextToken(str)
v = append(v, Variant{x})
}
}
return v
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
t, _ = Raw.Compose(t.Raw())
if t.region == 0 && t.script != 0 && t.lang != 0 {
base, _ := addTags(Tag{lang: t.lang})
if base.script == t.script {
return Tag{lang: t.lang}
}
}
return t
}
if t.lang != 0 {
if t.region != 0 {
maxScript := t.script
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.script
}
for i := range parents {
if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if regionID(r) == t.region {
return Tag{
lang: t.lang,
script: scriptID(parents[i].script),
region: regionID(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{lang: t.lang})
if base.script != maxScript {
return Tag{lang: t.lang, script: maxScript}
}
return Tag{lang: t.lang}
} else if t.script != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{lang: t.lang})
if base.script != t.script {
return und
}
return Tag{lang: t.lang}
}
}
return und
}
// returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// Extension is a single BCP 47 extension.
type Extension struct {
s string
}
// String returns the string representation of the extension, including the
// type tag.
func (e Extension) String() string {
return e.s
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (e Extension, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return Extension{}, errSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return Extension{}, errSyntax
}
return Extension{string(scan.b)}, nil
}
// Type returns the one-byte extension type of e. It returns 0 for the zero
// exception.
func (e Extension) Type() byte {
if e.s == "" {
return 0
}
return e.s[0]
}
// Tokens returns the list of tokens of e.
func (e Extension) Tokens() []string {
return strings.Split(e.s, "-")
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return Extension{ext}, true
}
}
return Extension{}, false
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []Extension {
e := []Extension{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, Extension{ext})
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
}
return ""
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.private() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
}
// findKeyAndType returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository. The index will change over time
// and should not be stored in persistent storage. Extensions, except for the
// 'va' type of the 'u' extension, are ignored. It will return 0, false if no
// compact tag exists, where 0 is the index for the root language (Und).
func CompactIndex(t Tag) (index int, ok bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
b, s, r := t.Raw()
if len(t.str) > 0 {
if strings.HasPrefix(t.str, "x-") {
// We have no entries for user-defined tags.
return 0, false
}
if uint16(t.pVariant) != t.pExt {
// There are no tags with variants and an u-va type.
if t.TypeForKey("va") != "" {
return 0, false
}
t, _ = Raw.Compose(b, s, r, t.Variants())
} else if _, ok := t.Extension('u'); ok {
// Strip all but the 'va' entry.
variant := t.TypeForKey("va")
t, _ = Raw.Compose(b, s, r)
t, _ = t.SetTypeForKey("va", variant)
}
if len(t.str) > 0 {
// We have some variants.
for i, s := range specialTags {
if s == t {
return i + 1, true
}
}
return 0, false
}
}
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
key := uint32(b.langID) << (8 + 12)
key |= uint32(s.scriptID) << 12
key |= uint32(r.regionID)
x, ok := coreTags[key]
return int(x), ok
}
// Base is an ISO 639 language code, used for encoding the base language
// of a language tag.
type Base struct {
langID
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Base, error) {
if n := len(s); n < 2 || 3 < n {
return Base{}, errSyntax
}
var buf [3]byte
l, err := getLangID(buf[:copy(buf[:], s)])
return Base{l}, err
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
scriptID
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return Script{}, errSyntax
}
var buf [4]byte
sc, err := getScriptID(script, buf[:copy(buf[:], s)])
return Script{sc}, err
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
rid, err := getRegionM49(r)
return Region{rid}, err
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return Region{}, errSyntax
}
var buf [3]byte
r, err := getRegionID(buf[:copy(buf[:], s)])
return Region{r}, err
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK {
return false
}
return true
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r.regionID == 0 {
return false
}
return int(regionInclusion[r.regionID]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
return r.regionID.contains(c.regionID)
}
func (r regionID) contains(c regionID) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r.regionID == _GB {
r = Region{_UK}
}
if (r.typ() & ccTLD) == 0 {
return Region{}, errNoTLD
}
return r, nil
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r.regionID); cr != 0 {
return Region{cr}
}
return r
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
variant string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
if _, ok := variantIndex[s]; ok {
return Variant{s}, nil
}
return Variant{}, mkErrInvalid([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.variant
}

396
vendor/golang.org/x/text/language/lookup.go generated vendored Normal file
View File

@ -0,0 +1,396 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"fmt"
"sort"
"strconv"
"golang.org/x/text/internal/tag"
)
// findIndex tries to find the given tag in idx and returns a standardized error
// if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) {
return 0, errSyntax
}
i := idx.Index(key)
if i == -1 {
return 0, mkErrInvalid(key)
}
return i, nil
}
func searchUint(imap []uint16, key uint16) int {
return sort.Search(len(imap), func(i int) bool {
return imap[i] >= key
})
}
type langID uint16
// getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag.
func getLangID(s []byte) (langID, error) {
if len(s) == 2 {
return getLangISO2(s)
}
return getLangISO3(s)
}
// mapLang returns the mapped langID of id according to mapping m.
func normLang(id langID) (langID, langAliasType) {
k := sort.Search(len(langAliasMap), func(i int) bool {
return langAliasMap[i].from >= uint16(id)
})
if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {
return langID(langAliasMap[k].to), langAliasTypes[k]
}
return id, langAliasTypeUnknown
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO2(s []byte) (langID, error) {
if !tag.FixCase("zz", s) {
return 0, errSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return langID(i), nil
}
return 0, mkErrInvalid(s)
}
const base = 'z' - 'a' + 1
func strToInt(s []byte) uint {
v := uint(0)
for i := 0; i < len(s); i++ {
v *= base
v += uint(s[i] - 'a')
}
return v
}
// converts the given integer to the original ASCII string passed to strToInt.
// len(s) must match the number of characters obtained.
func intToStr(v uint, s []byte) {
for i := len(s) - 1; i >= 0; i-- {
s[i] = byte(v%base) + 'a'
v /= base
}
}
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO3(s []byte) (langID, error) {
if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
// We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default.
id := langID(i)
if id == nonCanonicalUnd {
return 0, nil
}
return id, nil
}
}
if i := altLangISO3.Index(s); i != -1 {
return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return langID(n) + langNoIndexOffset, nil
}
// Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
return langID(i), nil
}
}
return 0, mkErrInvalid(s)
}
return 0, errSyntax
}
// stringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
func (id langID) stringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
} else if id == 0 {
return copy(b, "und")
}
l := lang[id<<2:]
if l[3] == 0 {
return copy(b, l[:3])
}
return copy(b, l[:2])
}
// String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
func (b langID) String() string {
if b == 0 {
return "und"
} else if b >= langNoIndexOffset {
b -= langNoIndexOffset
buf := [3]byte{}
intToStr(uint(b), buf[:])
return string(buf[:])
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
}
return l[:2]
}
// ISO3 returns the ISO 639-3 language code.
func (b langID) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
} else if l[2] == 0 {
return altLangISO3.Elem(int(l[3]))[:3]
}
// This allocation will only happen for 3-letter ISO codes
// that are non-canonical BCP 47 language identifiers.
return l[0:1] + l[2:4]
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b langID) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
type regionID uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
func getRegionID(s []byte) (regionID, error) {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
}
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
return getRegionM49(int(i))
}
}
return getRegionISO2(s)
}
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO2(s []byte) (regionID, error) {
i, err := findIndex(regionISO, s, "ZZ")
if err != nil {
return 0, err
}
return regionID(i) + isoRegionOffset, nil
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO3(s []byte) (regionID, error) {
if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
return regionID(i) + isoRegionOffset, nil
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
return regionID(altRegionIDs[i/3]), nil
}
}
return 0, mkErrInvalid(s)
}
return 0, errSyntax
}
func getRegionM49(n int) (regionID, error) {
if 0 < n && n <= 999 {
const (
searchBits = 7
regionBits = 9
regionMask = 1<<regionBits - 1
)
idx := n >> searchBits
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
val := uint16(n) << regionBits // we rely on bits shifting out
i := sort.Search(len(buf), func(i int) bool {
return buf[i] >= val
})
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
return regionID(r & regionMask), nil
}
}
var e ValueError
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
return 0, e
}
// normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR).
func normRegion(r regionID) regionID {
m := regionOldMap
k := sort.Search(len(m), func(i int) bool {
return m[i].from >= uint16(r)
})
if k < len(m) && m[k].from == uint16(r) {
return regionID(m[k].to)
}
return 0
}
const (
iso3166UserAssigned = 1 << iota
ccTLD
bcp47Region
)
func (r regionID) typ() byte {
return regionTypes[r]
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r regionID) String() string {
if r < isoRegionOffset {
if r == 0 {
return "ZZ"
}
return fmt.Sprintf("%03d", r.M49())
}
r -= isoRegionOffset
return regionISO.Elem(int(r))[:2]
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r regionID) ISO3() string {
if r < isoRegionOffset {
return "ZZZ"
}
r -= isoRegionOffset
reg := regionISO.Elem(int(r))
switch reg[2] {
case 0:
return altRegionISO3[reg[3]:][:3]
case ' ':
return "ZZZ"
}
return reg[0:1] + reg[2:4]
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r regionID) M49() int {
return int(m49[r])
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r regionID) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}
type scriptID uint8
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
func getScriptID(idx tag.Index, s []byte) (scriptID, error) {
i, err := findIndex(idx, s, "Zzzz")
return scriptID(i), err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s scriptID) String() string {
if s == 0 {
return "Zzzz"
}
return script.Elem(int(s))
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s scriptID) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx
}
const (
maxAltTaglen = len("en-US-POSIX")
maxLen = maxAltTaglen
)
var (
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
// their base language or index to more elaborate tag.
grandfatheredMap = map[[maxLen]byte]int16{
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
// Grandfathered tags with no modern replacement will be converted as
// follows:
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
// CLDR-specific tag.
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
}
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
)
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
if v, ok := grandfatheredMap[s]; ok {
if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
}
t.lang = langID(v)
return t, true
}
return t, false
}

841
vendor/golang.org/x/text/language/match.go generated vendored Normal file
View File

@ -0,0 +1,841 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "errors"
// Matcher is the interface that wraps the Match method.
//
// Match returns the best match for any of the given tags, along with
// a unique index associated with the returned tag and a confidence
// score.
type Matcher interface {
Match(t ...Tag) (tag Tag, index int, c Confidence)
}
// Comprehends reports the confidence score for a speaker of a given language
// to being able to comprehend the written form of an alternative language.
func Comprehends(speaker, alternative Tag) Confidence {
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
return c
}
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
// against a list of supported tags based on written intelligibility, closeness
// of dialect, equivalence of subtags and various other rules. It is initialized
// with the list of supported tags. The first element is used as the default
// value in case no match is found.
//
// Its Match method matches the first of the given Tags to reach a certain
// confidence threshold. The tags passed to Match should therefore be specified
// in order of preference. Extensions are ignored for matching.
//
// The index returned by the Match method corresponds to the index of the
// matched tag in t, but is augmented with the Unicode extension ('u')of the
// corresponding preferred tag. This allows user locale options to be passed
// transparently.
func NewMatcher(t []Tag) Matcher {
return newMatcher(t)
}
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
match, w, c := m.getBest(want...)
if match == nil {
t = m.default_.tag
} else {
t, index = match.tag, match.index
}
// Copy options from the user-provided tag into the result tag. This is hard
// to do after the fact, so we do it here.
// TODO: consider also adding in variants that are compatible with the
// matched language.
// TODO: Add back region if it is non-ambiguous? Or create another tag to
// preserve the region?
if u, ok := w.Extension('u'); ok {
t, _ = Raw.Compose(t, u)
}
return t, index, c
}
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id langID) {
if t.lang == 0 {
t.lang = id
}
}
func (t *Tag) setUndefinedScript(id scriptID) {
if t.script == 0 {
t.script = id
}
}
func (t *Tag) setUndefinedRegion(id regionID) {
if t.region == 0 || t.region.contains(id) {
t.region = id
}
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns a ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.remakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
t.region = regionID(x.region)
}
return true
}
return false
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.private() {
return t, nil
}
if t.script != 0 && t.region != 0 {
if t.lang != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.region : t.region+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if scriptID(x.script) == t.script {
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
}
if t.lang != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.script != 0 {
for _, x := range list {
if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(regionID(x.region))
return t, nil
}
}
} else if t.region != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
tt.region = regionID(x.region)
tt.setUndefinedScript(scriptID(x.script))
goodScript = goodScript && tt.script == scriptID(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.script = tt.script
}
}
}
}
} else {
// Search matches for und-script.
if t.script != 0 {
x := likelyScript[t.script]
if x.region != 0 {
t.setUndefinedRegion(regionID(x.region))
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.region != 0 {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
t.region = regionID(x.region)
}
} else {
x := likelyRegion[t.region]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(scriptID(x.script))
t.setUndefinedRegion(regionID(x.region))
}
specializeRegion(&t)
if t.lang == 0 {
t.lang = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.lang = id.lang
t.script = id.script
t.region = id.region
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.remakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{lang: t.lang},
{lang: t.lang, region: t.region},
{lang: t.lang, script: t.script},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}
// Tag Matching
// CLDR defines an algorithm for finding the best match between two sets of language
// tags. The basic algorithm defines how to score a possible match and then find
// the match with the best score
// (see http://www.unicode.org/reports/tr35/#LanguageMatching).
// Using scoring has several disadvantages. The scoring obfuscates the importance of
// the various factors considered, making the algorithm harder to understand. Using
// scoring also requires the full score to be computed for each pair of tags.
//
// We will use a different algorithm which aims to have the following properties:
// - clarity on the precedence of the various selection factors, and
// - improved performance by allowing early termination of a comparison.
//
// Matching algorithm (overview)
// Input:
// - supported: a set of supported tags
// - default: the default tag to return in case there is no match
// - desired: list of desired tags, ordered by preference, starting with
// the most-preferred.
//
// Algorithm:
// 1) Set the best match to the lowest confidence level
// 2) For each tag in "desired":
// a) For each tag in "supported":
// 1) compute the match between the two tags.
// 2) if the match is better than the previous best match, replace it
// with the new match. (see next section)
// b) if the current best match is above a certain threshold, return this
// match without proceeding to the next tag in "desired". [See Note 1]
// 3) If the best match so far is below a certain threshold, return "default".
//
// Ranking:
// We use two phases to determine whether one pair of tags are a better match
// than another pair of tags. First, we determine a rough confidence level. If the
// levels are different, the one with the highest confidence wins.
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
// rules.
//
// The confidence level of matching a pair of tags is determined by finding the
// lowest confidence level of any matches of the corresponding subtags (the
// result is deemed as good as its weakest link).
// We define the following levels:
// Exact - An exact match of a subtag, before adding likely subtags.
// MaxExact - An exact match of a subtag, after adding likely subtags.
// [See Note 2].
// High - High level of mutual intelligibility between different subtag
// variants.
// Low - Low level of mutual intelligibility between different subtag
// variants.
// No - No mutual intelligibility.
//
// The following levels can occur for each type of subtag:
// Base: Exact, MaxExact, High, Low, No
// Script: Exact, MaxExact [see Note 3], Low, No
// Region: Exact, MaxExact, High
// Variant: Exact, High
// Private: Exact, No
//
// Any result with a confidence level of Low or higher is deemed a possible match.
// Once a desired tag matches any of the supported tags with a level of MaxExact
// or higher, the next desired tag is not considered (see Step 2.b).
// Note that CLDR provides languageMatching data that defines close equivalence
// classes for base languages, scripts and regions.
//
// Tie-breaking
// If we get the same confidence level for two matches, we apply a sequence of
// tie-breaking rules. The first that succeeds defines the result. The rules are
// applied in the following order.
// 1) Original language was defined and was identical.
// 2) Original region was defined and was identical.
// 3) Distance between two maximized regions was the smallest.
// 4) Original script was defined and was identical.
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
// If there is still no winner after these rules are applied, the first match
// found wins.
//
// Notes:
// [1] Note that even if we may not have a perfect match, if a match is above a
// certain threshold, it is considered a better match than any other match
// to a tag later in the list of preferred language tags.
// [2] In practice, as matching of Exact is done in a separate phase from
// matching the other levels, we reuse the Exact level to mean MaxExact in
// the second phase. As a consequence, we only need the levels defined by
// the Confidence type. The MaxExact confidence level is mapped to High in
// the public API.
// [3] We do not differentiate between maximized script values that were derived
// from suppressScript versus most likely tag data. We determined that in
// ranking the two, one ranks just after the other. Moreover, the two cannot
// occur concurrently. As a consequence, they are identical for practical
// purposes.
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
// the MaxExact level to allow iw vs he to still be a closer match than
// en-AU vs en-US, for example.
// [5] In CLDR a locale inherits fields that are unspecified for this locale
// from its parent. Therefore, if a locale is a parent of another locale,
// it is a strong measure for closeness, especially when no other tie
// breaker rule applies. One could also argue it is inconsistent, for
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
// though its parent is pt-PT according to the inheritance rules.
//
// Implementation Details:
// There are several performance considerations worth pointing out. Most notably,
// we preprocess as much as possible (within reason) at the time of creation of a
// matcher. This includes:
// - creating a per-language map, which includes data for the raw base language
// and its canonicalized variant (if applicable),
// - expanding entries for the equivalence classes defined in CLDR's
// languageMatch data.
// The per-language map ensures that typically only a very small number of tags
// need to be considered. The pre-expansion of canonicalized subtags and
// equivalence classes reduces the amount of map lookups that need to be done at
// runtime.
// matcher keeps a set of supported language tags, indexed by language.
type matcher struct {
default_ *haveTag
index map[langID]*matchHeader
passSettings bool
}
// matchHeader has the lists of tags for exact matches and matches based on
// maximized and canonicalized tags for a given language.
type matchHeader struct {
exact []*haveTag
max []*haveTag
}
// haveTag holds a supported Tag and its maximized script and region. The maximized
// or canonicalized language is not stored as it is not needed during matching.
type haveTag struct {
tag Tag
// index of this tag in the original list of supported tags.
index int
// conf is the maximum confidence that can result from matching this haveTag.
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
conf Confidence
// Maximized region and script.
maxRegion regionID
maxScript scriptID
// altScript may be checked as an alternative match to maxScript. If altScript
// matches, the confidence level for this match is Low. Theoretically there
// could be multiple alternative scripts. This does not occur in practice.
altScript scriptID
// nextMax is the index of the next haveTag with the same maximized tags.
nextMax uint16
}
func makeHaveTag(tag Tag, index int) (haveTag, langID) {
max := tag
if tag.lang != 0 {
max, _ = max.canonicalize(All)
max, _ = addTags(max)
max.remakeString()
}
return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang
}
// altScript returns an alternative script that may match the given script with
// a low confidence. At the moment, the langMatch data allows for at most one
// script to map to another and we rely on this to keep the code simple.
func altScript(l langID, s scriptID) scriptID {
for _, alt := range matchScript {
if (alt.lang == 0 || langID(alt.lang) == l) && scriptID(alt.have) == s {
return scriptID(alt.want)
}
}
return 0
}
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
// Tags that have the same maximized values are linked by index.
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
// Don't add new exact matches.
for _, v := range h.exact {
if v.tag.equalsRest(n.tag) {
return
}
}
if exact {
h.exact = append(h.exact, &n)
}
// Allow duplicate maximized tags, but create a linked list to allow quickly
// comparing the equivalents and bail out.
for i, v := range h.max {
if v.maxScript == n.maxScript &&
v.maxRegion == n.maxRegion &&
v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() {
for h.max[i].nextMax != 0 {
i = int(h.max[i].nextMax)
}
h.max[i].nextMax = uint16(len(h.max))
break
}
}
h.max = append(h.max, &n)
}
// header returns the matchHeader for the given language. It creates one if
// it doesn't already exist.
func (m *matcher) header(l langID) *matchHeader {
if h := m.index[l]; h != nil {
return h
}
h := &matchHeader{}
m.index[l] = h
return h
}
// newMatcher builds an index for the given supported tags and returns it as
// a matcher. It also expands the index by considering various equivalence classes
// for a given tag.
func newMatcher(supported []Tag) *matcher {
m := &matcher{
index: make(map[langID]*matchHeader),
}
if len(supported) == 0 {
m.default_ = &haveTag{}
return m
}
// Add supported languages to the index. Add exact matches first to give
// them precedence.
for i, tag := range supported {
pair, _ := makeHaveTag(tag, i)
m.header(tag.lang).addIfNew(pair, true)
}
m.default_ = m.header(supported[0].lang).exact[0]
for i, tag := range supported {
pair, max := makeHaveTag(tag, i)
if max != tag.lang {
m.header(max).addIfNew(pair, false)
}
}
// update is used to add indexes in the map for equivalent languages.
// If force is true, the update will also apply to derived entries. To
// avoid applying a "transitive closure", use false.
update := func(want, have uint16, conf Confidence, force bool) {
if hh := m.index[langID(have)]; hh != nil {
if !force && len(hh.exact) == 0 {
return
}
hw := m.header(langID(want))
for _, ht := range hh.max {
v := *ht
if conf < v.conf {
v.conf = conf
}
v.nextMax = 0 // this value needs to be recomputed
if v.altScript != 0 {
v.altScript = altScript(langID(want), v.maxScript)
}
hw.addIfNew(v, conf == Exact && len(hh.exact) > 0)
}
}
}
// Add entries for languages with mutual intelligibility as defined by CLDR's
// languageMatch data.
for _, ml := range matchLang {
update(ml.want, ml.have, Confidence(ml.conf), false)
if !ml.oneway {
update(ml.have, ml.want, Confidence(ml.conf), false)
}
}
// Add entries for possible canonicalizations. This is an optimization to
// ensure that only one map lookup needs to be done at runtime per desired tag.
// First we match deprecated equivalents. If they are perfect equivalents
// (their canonicalization simply substitutes a different language code, but
// nothing else), the match confidence is Exact, otherwise it is High.
for i, lm := range langAliasMap {
if lm.from == _sh {
continue
}
// If deprecated codes match and there is no fiddling with the script or
// or region, we consider it an exact match.
conf := Exact
if langAliasTypes[i] != langMacro {
if !isExactEquivalent(langID(lm.from)) {
conf = High
}
update(lm.to, lm.from, conf, true)
}
update(lm.from, lm.to, conf, true)
}
return m
}
// getBest gets the best matching tag in m for any of the given tags, taking into
// account the order of preference of the given tags.
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
best := bestMatch{}
for _, w := range want {
var max Tag
// Check for exact match first.
h := m.index[w.lang]
if w.lang != 0 {
// Base language is defined.
if h == nil {
continue
}
for i := range h.exact {
have := h.exact[i]
if have.tag.equalsRest(w) {
return have, w, Exact
}
}
max, _ = w.canonicalize(Legacy | Deprecated)
max, _ = addTags(max)
} else {
// Base language is not defined.
if h != nil {
for i := range h.exact {
have := h.exact[i]
if have.tag.equalsRest(w) {
return have, w, Exact
}
}
}
if w.script == 0 && w.region == 0 {
// We skip all tags matching und for approximate matching, including
// private tags.
continue
}
max, _ = addTags(w)
if h = m.index[max.lang]; h == nil {
continue
}
}
// Check for match based on maximized tag.
for i := range h.max {
have := h.max[i]
best.update(have, w, max.script, max.region)
if best.conf == Exact {
for have.nextMax != 0 {
have = h.max[have.nextMax]
best.update(have, w, max.script, max.region)
}
return best.have, best.want, High
}
}
}
if best.conf <= No {
if len(want) != 0 {
return nil, want[0], No
}
return nil, Tag{}, No
}
return best.have, best.want, best.conf
}
// bestMatch accumulates the best match so far.
type bestMatch struct {
have *haveTag
want Tag
conf Confidence
// Cached results from applying tie-breaking rules.
origLang bool
origReg bool
regDist uint8
origScript bool
parentDist uint8 // 255 if have is not an ancestor of want tag.
}
// update updates the existing best match if the new pair is considered to be a
// better match.
// To determine if the given pair is a better match, it first computes the rough
// confidence level. If this surpasses the current match, it will replace it and
// update the tie-breaker rule cache. If there is a tie, it proceeds with applying
// a series of tie-breaker rules. If there is no conclusive winner after applying
// the tie-breaker rules, it leaves the current match as the preferred match.
func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID) {
// Bail if the maximum attainable confidence is below that of the current best match.
c := have.conf
if c < m.conf {
return
}
if have.maxScript != maxScript {
// There is usually very little comprehension between different scripts.
// In a few cases there may still be Low comprehension. This possibility is
// pre-computed and stored in have.altScript.
if Low < m.conf || have.altScript != maxScript {
return
}
c = Low
} else if have.maxRegion != maxRegion {
// There is usually a small difference between languages across regions.
// We use the region distance (below) to disambiguate between equal matches.
if High < c {
c = High
}
}
// We store the results of the computations of the tie-breaker rules along
// with the best match. There is no need to do the checks once we determine
// we have a winner, but we do still need to do the tie-breaker computations.
// We use "beaten" to keep track if we still need to do the checks.
beaten := false // true if the new pair defeats the current one.
if c != m.conf {
if c < m.conf {
return
}
beaten = true
}
// Tie-breaker rules:
// We prefer if the pre-maximized language was specified and identical.
origLang := have.tag.lang == tag.lang && tag.lang != 0
if !beaten && m.origLang != origLang {
if m.origLang {
return
}
beaten = true
}
// We prefer if the pre-maximized region was specified and identical.
origReg := have.tag.region == tag.region && tag.region != 0
if !beaten && m.origReg != origReg {
if m.origReg {
return
}
beaten = true
}
// Next we prefer smaller distances between regions, as defined by regionDist.
regDist := regionDist(have.maxRegion, maxRegion, tag.lang)
if !beaten && m.regDist != regDist {
if regDist > m.regDist {
return
}
beaten = true
}
// Next we prefer if the pre-maximized script was specified and identical.
origScript := have.tag.script == tag.script && tag.script != 0
if !beaten && m.origScript != origScript {
if m.origScript {
return
}
beaten = true
}
// Finally we prefer tags which have a closer parent relationship.
parentDist := parentDistance(have.tag.region, tag)
if !beaten && m.parentDist != parentDist {
if parentDist > m.parentDist {
return
}
beaten = true
}
// Update m to the newly found best match.
if beaten {
m.have = have
m.want = tag
m.conf = c
m.origLang = origLang
m.origReg = origReg
m.origScript = origScript
m.regDist = regDist
m.parentDist = parentDist
}
}
// parentDistance returns the number of times Parent must be called before the
// regions match. It is assumed that it has already been checked that lang and
// script are identical. If haveRegion does not occur in the ancestor chain of
// tag, it returns 255.
func parentDistance(haveRegion regionID, tag Tag) uint8 {
p := tag.Parent()
d := uint8(1)
for haveRegion != p.region {
if p.region == 0 {
return 255
}
p = p.Parent()
d++
}
return d
}
// regionDist wraps regionDistance with some exceptions to the algorithmic distance.
func regionDist(a, b regionID, lang langID) uint8 {
if lang == _en {
// Two variants of non-US English are close to each other, regardless of distance.
if a != _US && b != _US {
return 2
}
}
return uint8(regionDistance(a, b))
}
// regionDistance computes the distance between two regions based on the
// distance in the graph of region containments as defined in CLDR. It iterates
// over increasingly inclusive sets of groups, represented as bit vectors, until
// the source bit vector has bits in common with the destination vector.
func regionDistance(a, b regionID) int {
if a == b {
return 0
}
p, q := regionInclusion[a], regionInclusion[b]
if p < nRegionGroups {
p, q = q, p
}
set := regionInclusionBits
if q < nRegionGroups && set[p]&(1<<q) != 0 {
return 1
}
d := 2
for goal := set[q]; set[p]&goal == 0; p = regionInclusionNext[p] {
d++
}
return d
}
func (t Tag) variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// variantOrPrivateTagStr returns variants or private use tags.
func (t Tag) variantOrPrivateTagStr() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// equalsRest compares everything except the language.
func (a Tag) equalsRest(b Tag) bool {
// TODO: don't include extensions in this comparison. To do this efficiently,
// though, we should handle private tags separately.
return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr()
}
// isExactEquivalent returns true if canonicalizing the language will not alter
// the script or region of a tag.
func isExactEquivalent(l langID) bool {
for _, o := range notEquivalent {
if o == l {
return false
}
}
return true
}
var notEquivalent []langID
func init() {
// Create a list of all languages for which canonicalization may alter the
// script or region.
for _, lm := range langAliasMap {
tag := Tag{lang: langID(lm.from)}
if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 {
notEquivalent = append(notEquivalent, langID(lm.from))
}
}
}

859
vendor/golang.org/x/text/language/parse.go generated vendored Normal file
View File

@ -0,0 +1,859 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"errors"
"fmt"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/tag"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// errSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var errSyntax = errors.New("language: tag is not well-formed")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
v [8]byte
}
func mkErrInvalid(s []byte) error {
var e ValueError
copy(e.v[:], s)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == errSyntax && s.err != errSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.setError(errSyntax)
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(errSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(errSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the default canonicalization type.
func Parse(s string) (t Tag, err error) {
return Default.Parse(s)
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return und, errSyntax
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
t, err = parse(&scan, s)
t, changed := t.canonicalize(c)
if changed {
t.remakeString()
}
return t, err
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, errSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return und, errSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(errSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.lang, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.lang.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.lang = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.script, e = getScriptID(script, scan.token)
if t.script == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.region, e = getRegionID(scan.token)
if t.region == 0 {
scan.gobble(e)
} else {
scan.replace(t.region.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(mkErrInvalid(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort [][]byte
func (b bytesSort) Len() int {
return len(b)
}
func (b bytesSort) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bytesSort) Less(i, j int) bool {
return bytes.Compare(b[i], b[j]) == -1
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(errSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort(exts))
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort(attrs))
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(errSyntax)
end = keyStart
}
}
sort.Sort(bytesSort(keys))
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], bytes.Join(keys, separator))
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
}
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically
// only makes sense as the first argument. The resulting tag is returned after
// canonicalizing using the Default CanonType. If one or more errors are
// encountered, one of the errors is returned.
func Compose(part ...interface{}) (t Tag, err error) {
return Default.Compose(part...)
}
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically
// only makes sense as the first argument. The resulting tag is returned after
// canonicalizing using CanonType c. If one or more errors are encountered,
// one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
var b builder
if err = b.update(part...); err != nil {
return und, err
}
t, _ = b.tag.canonicalize(c)
if len(b.ext) > 0 || len(b.variant) > 0 {
sort.Sort(sortVariant(b.variant))
sort.Strings(b.ext)
if b.private != "" {
b.ext = append(b.ext, b.private)
}
n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variant...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.ext...)
t.str = string(buf[:p])
} else if b.private != "" {
t.str = b.private
t.remakeString()
}
return
}
type builder struct {
tag Tag
private string // the x extension
ext []string
variant []string
err error
}
func (b *builder) addExt(e string) {
if e == "" {
} else if e[0] == 'x' {
b.private = e
} else {
b.ext = append(b.ext, e)
}
}
var errInvalidArgument = errors.New("invalid Extension or Variant")
func (b *builder) update(part ...interface{}) (err error) {
replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
if s == "" {
b.err = errInvalidArgument
return true
}
for i, v := range *l {
if eq(v, s) {
(*l)[i] = s
return true
}
}
return false
}
for _, x := range part {
switch v := x.(type) {
case Tag:
b.tag.lang = v.lang
b.tag.region = v.region
b.tag.script = v.script
if v.str != "" {
b.variant = nil
for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
x, s = nextToken(s)
b.variant = append(b.variant, x)
}
b.ext, b.private = nil, ""
for i, e := int(v.pExt), ""; i < len(v.str); {
i, e = getExtension(v.str, i)
b.addExt(e)
}
}
case Base:
b.tag.lang = v.langID
case Script:
b.tag.script = v.scriptID
case Region:
b.tag.region = v.regionID
case Variant:
if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
b.variant = append(b.variant, v.variant)
}
case Extension:
if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
b.addExt(v.s)
}
case []Variant:
b.variant = nil
for _, x := range v {
b.update(x)
}
case []Extension:
b.ext, b.private = nil, ""
for _, e := range v {
b.update(e)
}
// TODO: support parsing of raw strings based on morphology or just extensions?
case error:
err = v
}
}
return
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariant []string
func (s sortVariant) Len() int {
return len(s)
}
func (s sortVariant) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariant) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}
func findExt(list []string, x byte) int {
for i, e := range list {
if e[0] == x {
return i
}
}
return -1
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// ParseAcceptLanguage parses the contents of a Accept-Language header as
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
// a list of corresponding quality weights. It is more permissive than RFC 2616
// and may return non-nil slices even if the input is not valid.
// The Tags will be sorted by highest weight first and then by first occurrence.
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {
continue
}
entry, weight := split(entry, ';')
// Scan the language.
t, err := Parse(entry)
if err != nil {
id, ok := acceptFallback[entry]
if !ok {
return nil, nil, err
}
t = Tag{lang: id}
}
// Scan the optional weight.
w := 1.0
if weight != "" {
weight = consume(weight, 'q')
weight = consume(weight, '=')
// consume returns the empty string when a token could not be
// consumed, resulting in an error for ParseFloat.
if w, err = strconv.ParseFloat(weight, 32); err != nil {
return nil, nil, errInvalidWeight
}
// Drop tags with a quality weight of 0.
if w <= 0 {
continue
}
}
tag = append(tag, t)
q = append(q, float32(w))
}
sortStable(&tagSort{tag, q})
return tag, q, nil
}
// consume removes a leading token c from s and returns the result or the empty
// string if there is no such token.
func consume(s string, c byte) string {
if s == "" || s[0] != c {
return ""
}
return strings.TrimSpace(s[1:])
}
func split(s string, c byte) (head, tail string) {
if i := strings.IndexByte(s, c); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
}
return strings.TrimSpace(s), ""
}
// Add hack mapping to deal with a small number of cases that that occur
// in Accept-Language (with reasonable frequency).
var acceptFallback = map[string]langID{
"english": _en,
"deutsch": _de,
"italian": _it,
"french": _fr,
"*": _mul, // defined in the spec to match all languages.
}
type tagSort struct {
tag []Tag
q []float32
}
func (s *tagSort) Len() int {
return len(s.q)
}
func (s *tagSort) Less(i, j int) bool {
return s.q[i] > s.q[j]
}
func (s *tagSort) Swap(i, j int) {
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
s.q[i], s.q[j] = s.q[j], s.q[i]
}

3547
vendor/golang.org/x/text/language/tables.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

143
vendor/golang.org/x/text/language/tags.go generated vendored Normal file
View File

@ -0,0 +1,143 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// TODO: Various sets of commonly use tags and regions.
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func MustParse(s string) Tag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func (c CanonType) MustParse(s string) Tag {
t, err := c.Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
func MustParseBase(s string) Base {
b, err := ParseBase(s)
if err != nil {
panic(err)
}
return b
}
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
}
return scr
}
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)
}
return r
}
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag{lang: _af} // af
Amharic Tag = Tag{lang: _am} // am
Arabic Tag = Tag{lang: _ar} // ar
ModernStandardArabic Tag = Tag{lang: _ar, region: _001} // ar-001
Azerbaijani Tag = Tag{lang: _az} // az
Bulgarian Tag = Tag{lang: _bg} // bg
Bengali Tag = Tag{lang: _bn} // bn
Catalan Tag = Tag{lang: _ca} // ca
Czech Tag = Tag{lang: _cs} // cs
Danish Tag = Tag{lang: _da} // da
German Tag = Tag{lang: _de} // de
Greek Tag = Tag{lang: _el} // el
English Tag = Tag{lang: _en} // en
AmericanEnglish Tag = Tag{lang: _en, region: _US} // en-US
BritishEnglish Tag = Tag{lang: _en, region: _GB} // en-GB
Spanish Tag = Tag{lang: _es} // es
EuropeanSpanish Tag = Tag{lang: _es, region: _ES} // es-ES
LatinAmericanSpanish Tag = Tag{lang: _es, region: _419} // es-419
Estonian Tag = Tag{lang: _et} // et
Persian Tag = Tag{lang: _fa} // fa
Finnish Tag = Tag{lang: _fi} // fi
Filipino Tag = Tag{lang: _fil} // fil
French Tag = Tag{lang: _fr} // fr
CanadianFrench Tag = Tag{lang: _fr, region: _CA} // fr-CA
Gujarati Tag = Tag{lang: _gu} // gu
Hebrew Tag = Tag{lang: _he} // he
Hindi Tag = Tag{lang: _hi} // hi
Croatian Tag = Tag{lang: _hr} // hr
Hungarian Tag = Tag{lang: _hu} // hu
Armenian Tag = Tag{lang: _hy} // hy
Indonesian Tag = Tag{lang: _id} // id
Icelandic Tag = Tag{lang: _is} // is
Italian Tag = Tag{lang: _it} // it
Japanese Tag = Tag{lang: _ja} // ja
Georgian Tag = Tag{lang: _ka} // ka
Kazakh Tag = Tag{lang: _kk} // kk
Khmer Tag = Tag{lang: _km} // km
Kannada Tag = Tag{lang: _kn} // kn
Korean Tag = Tag{lang: _ko} // ko
Kirghiz Tag = Tag{lang: _ky} // ky
Lao Tag = Tag{lang: _lo} // lo
Lithuanian Tag = Tag{lang: _lt} // lt
Latvian Tag = Tag{lang: _lv} // lv
Macedonian Tag = Tag{lang: _mk} // mk
Malayalam Tag = Tag{lang: _ml} // ml
Mongolian Tag = Tag{lang: _mn} // mn
Marathi Tag = Tag{lang: _mr} // mr
Malay Tag = Tag{lang: _ms} // ms
Burmese Tag = Tag{lang: _my} // my
Nepali Tag = Tag{lang: _ne} // ne
Dutch Tag = Tag{lang: _nl} // nl
Norwegian Tag = Tag{lang: _no} // no
Punjabi Tag = Tag{lang: _pa} // pa
Polish Tag = Tag{lang: _pl} // pl
Portuguese Tag = Tag{lang: _pt} // pt
BrazilianPortuguese Tag = Tag{lang: _pt, region: _BR} // pt-BR
EuropeanPortuguese Tag = Tag{lang: _pt, region: _PT} // pt-PT
Romanian Tag = Tag{lang: _ro} // ro
Russian Tag = Tag{lang: _ru} // ru
Sinhala Tag = Tag{lang: _si} // si
Slovak Tag = Tag{lang: _sk} // sk
Slovenian Tag = Tag{lang: _sl} // sl
Albanian Tag = Tag{lang: _sq} // sq
Serbian Tag = Tag{lang: _sr} // sr
SerbianLatin Tag = Tag{lang: _sr, script: _Latn} // sr-Latn
Swedish Tag = Tag{lang: _sv} // sv
Swahili Tag = Tag{lang: _sw} // sw
Tamil Tag = Tag{lang: _ta} // ta
Telugu Tag = Tag{lang: _te} // te
Thai Tag = Tag{lang: _th} // th
Turkish Tag = Tag{lang: _tr} // tr
Ukrainian Tag = Tag{lang: _uk} // uk
Urdu Tag = Tag{lang: _ur} // ur
Uzbek Tag = Tag{lang: _uz} // uz
Vietnamese Tag = Tag{lang: _vi} // vi
Chinese Tag = Tag{lang: _zh} // zh
SimplifiedChinese Tag = Tag{lang: _zh, script: _Hans} // zh-Hans
TraditionalChinese Tag = Tag{lang: _zh, script: _Hant} // zh-Hant
Zulu Tag = Tag{lang: _zu} // zu
)