Update publicsuffix-go to 6f3c5059 (#3826)

Incorporates performance improvements.
This commit is contained in:
Simone Carletti 2018-08-24 15:05:24 +02:00 committed by Daniel McCarney
parent f84d54313d
commit 36a1ded4a6
3 changed files with 78 additions and 88 deletions

4
Godeps/Godeps.json generated
View File

@ -259,8 +259,8 @@
}, },
{ {
"ImportPath": "github.com/weppos/publicsuffix-go/publicsuffix", "ImportPath": "github.com/weppos/publicsuffix-go/publicsuffix",
"Comment": "v0.4.0-20-gcbbcd04", "Comment": "v0.4.0-24-gf8afde6",
"Rev": "cbbcd048f995c801105a0083cb157c4fec9ea89c" "Rev": "f8afde64429e7c479c7e4f681f66fbf792dac9c6"
}, },
{ {
"ImportPath": "github.com/zmap/zcrypto/json", "ImportPath": "github.com/zmap/zcrypto/json",

View File

@ -11,7 +11,6 @@ import (
"io" "io"
"net/http/cookiejar" "net/http/cookiejar"
"os" "os"
"regexp"
"strings" "strings"
"golang.org/x/net/idna" "golang.org/x/net/idna"
@ -80,13 +79,14 @@ type FindOptions struct {
// List represents a Public Suffix List. // List represents a Public Suffix List.
type List struct { type List struct {
// rules is kept private because you should not access rules directly // rules is kept private because you should not access rules directly
// for lookup optimization the list will not be guaranteed to be a simple slice forever rules map[string]*Rule
rules []Rule
} }
// NewList creates a new empty list. // NewList creates a new empty list.
func NewList() *List { func NewList() *List {
return &List{} return &List{
rules: map[string]*Rule{},
}
} }
// NewListFromString parses a string that represents a Public Suffix source // NewListFromString parses a string that represents a Public Suffix source
@ -132,7 +132,7 @@ func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
// The list may be optimized internally for lookups, therefore the algorithm // The list may be optimized internally for lookups, therefore the algorithm
// will decide the best position for the new rule. // will decide the best position for the new rule.
func (l *List) AddRule(r *Rule) error { func (l *List) AddRule(r *Rule) error {
l.rules = append(l.rules, *r) l.rules[r.Value] = r
return nil return nil
} }
@ -195,43 +195,27 @@ Scanning:
// Find and returns the most appropriate rule for the domain name. // Find and returns the most appropriate rule for the domain name.
func (l *List) Find(name string, options *FindOptions) *Rule { func (l *List) Find(name string, options *FindOptions) *Rule {
var bestRule *Rule
if options == nil { if options == nil {
options = DefaultFindOptions options = DefaultFindOptions
} }
for _, r := range l.selectRules(name, options) { part := name
if r.Type == ExceptionType { for {
return &r rule, ok := l.rules[part]
if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) {
return rule
} }
if bestRule == nil || bestRule.Length < r.Length {
bestRule = &r i := strings.IndexRune(part, '.')
if i < 0 {
return options.DefaultRule
} }
part = part[i+1:]
} }
if bestRule != nil { return nil
return bestRule
}
return options.DefaultRule
}
func (l *List) selectRules(name string, options *FindOptions) []Rule {
var found []Rule
// In this phase the search is a simple sequential scan
for _, rule := range l.rules {
if !rule.Match(name) {
continue
}
if options.IgnorePrivate && rule.Private {
continue
}
found = append(found, rule)
}
return found
} }
// NewRule parses the rule content, creates and returns a Rule. // NewRule parses the rule content, creates and returns a Rule.
@ -309,36 +293,46 @@ func (r *Rule) Match(name string) bool {
// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>, // Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
// according to the rule definition and type. // according to the rule definition and type.
func (r *Rule) Decompose(name string) [2]string { func (r *Rule) Decompose(name string) (result [2]string) {
var parts []string if r == DefaultRule {
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]
return
}
switch r.Type { switch r.Type {
case NormalType:
name = strings.TrimSuffix(name, r.Value)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], r.Value
case WildcardType: case WildcardType:
parts = append([]string{`.*?`}, r.parts()...) name := strings.TrimSuffix(name, r.Value)
default: if len(name) == 0 {
parts = r.parts() return
}
name = name[:len(name)-1]
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
case ExceptionType:
i := strings.IndexRune(r.Value, '.')
if i < 0 {
return
}
suffix := r.Value[i+1:]
name = strings.TrimSuffix(name, suffix)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], suffix
} }
return
suffix := strings.Join(parts, `\.`)
re := regexp.MustCompile(fmt.Sprintf(`^(.+)\.(%s)$`, suffix))
matches := re.FindStringSubmatch(name)
if len(matches) < 3 {
return [2]string{"", ""}
}
return [2]string{matches[1], matches[2]}
}
func (r *Rule) parts() []string {
labels := Labels(r.Value)
if r.Type == ExceptionType {
return labels[1:]
}
if r.Type == WildcardType && r.Value == "" {
return []string{}
}
return labels
} }
// Labels decomposes given domain name into labels, // Labels decomposes given domain name into labels,
@ -432,7 +426,6 @@ func DomainFromListWithOptions(l *List, name string, options *FindOptions) (stri
if err != nil { if err != nil {
return "", err return "", err
} }
return dn.SLD + "." + dn.TLD, nil return dn.SLD + "." + dn.TLD, nil
} }
@ -458,12 +451,22 @@ func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*Doma
} }
r := l.Find(n, options) r := l.Find(n, options)
if tld := r.Decompose(n)[1]; tld == "" { parts := r.Decompose(n)
left, tld := parts[0], parts[1]
if tld == "" {
return nil, fmt.Errorf("%s is a suffix", n) return nil, fmt.Errorf("%s is a suffix", n)
} }
dn := &DomainName{Rule: r} dn := &DomainName{
dn.TLD, dn.SLD, dn.TRD = decompose(r, n) Rule: r,
TLD: tld,
}
if i := strings.LastIndex(left, "."); i < 0 {
dn.SLD = left
} else {
dn.TRD = left[:i]
dn.SLD = left[i+1:]
}
return dn, nil return dn, nil
} }
@ -471,31 +474,15 @@ func normalize(name string) (string, error) {
ret := strings.ToLower(name) ret := strings.ToLower(name)
if ret == "" { if ret == "" {
return "", fmt.Errorf("Name is blank") return "", fmt.Errorf("name is blank")
} }
if ret[0] == '.' { if ret[0] == '.' {
return "", fmt.Errorf("Name %s starts with a dot", ret) return "", fmt.Errorf("name %s starts with a dot", ret)
} }
return ret, nil return ret, nil
} }
func decompose(r *Rule, name string) (tld, sld, trd string) {
parts := r.Decompose(name)
left, tld := parts[0], parts[1]
dot := strings.LastIndex(left, ".")
if dot == -1 {
sld = left
trd = ""
} else {
sld = left[dot+1:]
trd = left[0:dot]
}
return
}
// ToASCII is a wrapper for idna.ToASCII. // ToASCII is a wrapper for idna.ToASCII.
// //
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months // This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months

View File

@ -3,10 +3,10 @@
package publicsuffix package publicsuffix
const defaultListVersion = "PSL version f8ccab (Wed Aug 8 09:06:53 2018)" const defaultListVersion = "PSL version 6cca94 (Mon Aug 13 08:12:02 2018)"
func init() { func init() {
r := [8618]Rule{ r := [8619]Rule{
{1, "ac", 1, false}, {1, "ac", 1, false},
{1, "com.ac", 2, false}, {1, "com.ac", 2, false},
{1, "edu.ac", 2, false}, {1, "edu.ac", 2, false},
@ -972,6 +972,7 @@ func init() {
{1, "my.id", 2, false}, {1, "my.id", 2, false},
{1, "net.id", 2, false}, {1, "net.id", 2, false},
{1, "or.id", 2, false}, {1, "or.id", 2, false},
{1, "ponpes.id", 2, false},
{1, "sch.id", 2, false}, {1, "sch.id", 2, false},
{1, "web.id", 2, false}, {1, "web.id", 2, false},
{1, "ie", 1, false}, {1, "ie", 1, false},
@ -8626,5 +8627,7 @@ func init() {
{1, "now.sh", 2, true}, {1, "now.sh", 2, true},
{1, "zone.id", 2, true}, {1, "zone.id", 2, true},
} }
DefaultList.rules = r[:] for i := range r {
DefaultList.AddRule(&r[i])
}
} }