Incorporates performance improvements.
This commit is contained in:
parent
f84d54313d
commit
36a1ded4a6
|
|
@ -259,8 +259,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"ImportPath": "github.com/weppos/publicsuffix-go/publicsuffix",
|
"ImportPath": "github.com/weppos/publicsuffix-go/publicsuffix",
|
||||||
"Comment": "v0.4.0-20-gcbbcd04",
|
"Comment": "v0.4.0-24-gf8afde6",
|
||||||
"Rev": "cbbcd048f995c801105a0083cb157c4fec9ea89c"
|
"Rev": "f8afde64429e7c479c7e4f681f66fbf792dac9c6"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"ImportPath": "github.com/zmap/zcrypto/json",
|
"ImportPath": "github.com/zmap/zcrypto/json",
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"net/http/cookiejar"
|
"net/http/cookiejar"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/net/idna"
|
"golang.org/x/net/idna"
|
||||||
|
|
@ -80,13 +79,14 @@ type FindOptions struct {
|
||||||
// List represents a Public Suffix List.
|
// List represents a Public Suffix List.
|
||||||
type List struct {
|
type List struct {
|
||||||
// rules is kept private because you should not access rules directly
|
// rules is kept private because you should not access rules directly
|
||||||
// for lookup optimization the list will not be guaranteed to be a simple slice forever
|
rules map[string]*Rule
|
||||||
rules []Rule
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewList creates a new empty list.
|
// NewList creates a new empty list.
|
||||||
func NewList() *List {
|
func NewList() *List {
|
||||||
return &List{}
|
return &List{
|
||||||
|
rules: map[string]*Rule{},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewListFromString parses a string that represents a Public Suffix source
|
// NewListFromString parses a string that represents a Public Suffix source
|
||||||
|
|
@ -132,7 +132,7 @@ func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
|
||||||
// The list may be optimized internally for lookups, therefore the algorithm
|
// The list may be optimized internally for lookups, therefore the algorithm
|
||||||
// will decide the best position for the new rule.
|
// will decide the best position for the new rule.
|
||||||
func (l *List) AddRule(r *Rule) error {
|
func (l *List) AddRule(r *Rule) error {
|
||||||
l.rules = append(l.rules, *r)
|
l.rules[r.Value] = r
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -195,43 +195,27 @@ Scanning:
|
||||||
|
|
||||||
// Find and returns the most appropriate rule for the domain name.
|
// Find and returns the most appropriate rule for the domain name.
|
||||||
func (l *List) Find(name string, options *FindOptions) *Rule {
|
func (l *List) Find(name string, options *FindOptions) *Rule {
|
||||||
var bestRule *Rule
|
|
||||||
|
|
||||||
if options == nil {
|
if options == nil {
|
||||||
options = DefaultFindOptions
|
options = DefaultFindOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range l.selectRules(name, options) {
|
part := name
|
||||||
if r.Type == ExceptionType {
|
for {
|
||||||
return &r
|
rule, ok := l.rules[part]
|
||||||
}
|
|
||||||
if bestRule == nil || bestRule.Length < r.Length {
|
if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) {
|
||||||
bestRule = &r
|
return rule
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if bestRule != nil {
|
|
||||||
return bestRule
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i := strings.IndexRune(part, '.')
|
||||||
|
if i < 0 {
|
||||||
return options.DefaultRule
|
return options.DefaultRule
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *List) selectRules(name string, options *FindOptions) []Rule {
|
part = part[i+1:]
|
||||||
var found []Rule
|
|
||||||
|
|
||||||
// In this phase the search is a simple sequential scan
|
|
||||||
for _, rule := range l.rules {
|
|
||||||
if !rule.Match(name) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if options.IgnorePrivate && rule.Private {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
found = append(found, rule)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return found
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRule parses the rule content, creates and returns a Rule.
|
// NewRule parses the rule content, creates and returns a Rule.
|
||||||
|
|
@ -309,36 +293,46 @@ func (r *Rule) Match(name string) bool {
|
||||||
|
|
||||||
// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
|
// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
|
||||||
// according to the rule definition and type.
|
// according to the rule definition and type.
|
||||||
func (r *Rule) Decompose(name string) [2]string {
|
func (r *Rule) Decompose(name string) (result [2]string) {
|
||||||
var parts []string
|
if r == DefaultRule {
|
||||||
|
i := strings.LastIndex(name, ".")
|
||||||
|
if i < 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
result[0], result[1] = name[:i], name[i+1:]
|
||||||
|
return
|
||||||
|
}
|
||||||
switch r.Type {
|
switch r.Type {
|
||||||
|
case NormalType:
|
||||||
|
name = strings.TrimSuffix(name, r.Value)
|
||||||
|
if len(name) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
result[0], result[1] = name[:len(name)-1], r.Value
|
||||||
case WildcardType:
|
case WildcardType:
|
||||||
parts = append([]string{`.*?`}, r.parts()...)
|
name := strings.TrimSuffix(name, r.Value)
|
||||||
default:
|
if len(name) == 0 {
|
||||||
parts = r.parts()
|
return
|
||||||
}
|
}
|
||||||
|
name = name[:len(name)-1]
|
||||||
suffix := strings.Join(parts, `\.`)
|
i := strings.LastIndex(name, ".")
|
||||||
re := regexp.MustCompile(fmt.Sprintf(`^(.+)\.(%s)$`, suffix))
|
if i < 0 {
|
||||||
|
return
|
||||||
matches := re.FindStringSubmatch(name)
|
|
||||||
if len(matches) < 3 {
|
|
||||||
return [2]string{"", ""}
|
|
||||||
}
|
}
|
||||||
|
result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
|
||||||
return [2]string{matches[1], matches[2]}
|
case ExceptionType:
|
||||||
|
i := strings.IndexRune(r.Value, '.')
|
||||||
|
if i < 0 {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
suffix := r.Value[i+1:]
|
||||||
func (r *Rule) parts() []string {
|
name = strings.TrimSuffix(name, suffix)
|
||||||
labels := Labels(r.Value)
|
if len(name) == 0 {
|
||||||
if r.Type == ExceptionType {
|
return
|
||||||
return labels[1:]
|
|
||||||
}
|
}
|
||||||
if r.Type == WildcardType && r.Value == "" {
|
result[0], result[1] = name[:len(name)-1], suffix
|
||||||
return []string{}
|
|
||||||
}
|
}
|
||||||
return labels
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Labels decomposes given domain name into labels,
|
// Labels decomposes given domain name into labels,
|
||||||
|
|
@ -432,7 +426,6 @@ func DomainFromListWithOptions(l *List, name string, options *FindOptions) (stri
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return dn.SLD + "." + dn.TLD, nil
|
return dn.SLD + "." + dn.TLD, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -458,12 +451,22 @@ func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*Doma
|
||||||
}
|
}
|
||||||
|
|
||||||
r := l.Find(n, options)
|
r := l.Find(n, options)
|
||||||
if tld := r.Decompose(n)[1]; tld == "" {
|
parts := r.Decompose(n)
|
||||||
|
left, tld := parts[0], parts[1]
|
||||||
|
if tld == "" {
|
||||||
return nil, fmt.Errorf("%s is a suffix", n)
|
return nil, fmt.Errorf("%s is a suffix", n)
|
||||||
}
|
}
|
||||||
|
|
||||||
dn := &DomainName{Rule: r}
|
dn := &DomainName{
|
||||||
dn.TLD, dn.SLD, dn.TRD = decompose(r, n)
|
Rule: r,
|
||||||
|
TLD: tld,
|
||||||
|
}
|
||||||
|
if i := strings.LastIndex(left, "."); i < 0 {
|
||||||
|
dn.SLD = left
|
||||||
|
} else {
|
||||||
|
dn.TRD = left[:i]
|
||||||
|
dn.SLD = left[i+1:]
|
||||||
|
}
|
||||||
return dn, nil
|
return dn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -471,31 +474,15 @@ func normalize(name string) (string, error) {
|
||||||
ret := strings.ToLower(name)
|
ret := strings.ToLower(name)
|
||||||
|
|
||||||
if ret == "" {
|
if ret == "" {
|
||||||
return "", fmt.Errorf("Name is blank")
|
return "", fmt.Errorf("name is blank")
|
||||||
}
|
}
|
||||||
if ret[0] == '.' {
|
if ret[0] == '.' {
|
||||||
return "", fmt.Errorf("Name %s starts with a dot", ret)
|
return "", fmt.Errorf("name %s starts with a dot", ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func decompose(r *Rule, name string) (tld, sld, trd string) {
|
|
||||||
parts := r.Decompose(name)
|
|
||||||
left, tld := parts[0], parts[1]
|
|
||||||
|
|
||||||
dot := strings.LastIndex(left, ".")
|
|
||||||
if dot == -1 {
|
|
||||||
sld = left
|
|
||||||
trd = ""
|
|
||||||
} else {
|
|
||||||
sld = left[dot+1:]
|
|
||||||
trd = left[0:dot]
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToASCII is a wrapper for idna.ToASCII.
|
// ToASCII is a wrapper for idna.ToASCII.
|
||||||
//
|
//
|
||||||
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
|
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,10 @@
|
||||||
|
|
||||||
package publicsuffix
|
package publicsuffix
|
||||||
|
|
||||||
const defaultListVersion = "PSL version f8ccab (Wed Aug 8 09:06:53 2018)"
|
const defaultListVersion = "PSL version 6cca94 (Mon Aug 13 08:12:02 2018)"
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
r := [8618]Rule{
|
r := [8619]Rule{
|
||||||
{1, "ac", 1, false},
|
{1, "ac", 1, false},
|
||||||
{1, "com.ac", 2, false},
|
{1, "com.ac", 2, false},
|
||||||
{1, "edu.ac", 2, false},
|
{1, "edu.ac", 2, false},
|
||||||
|
|
@ -972,6 +972,7 @@ func init() {
|
||||||
{1, "my.id", 2, false},
|
{1, "my.id", 2, false},
|
||||||
{1, "net.id", 2, false},
|
{1, "net.id", 2, false},
|
||||||
{1, "or.id", 2, false},
|
{1, "or.id", 2, false},
|
||||||
|
{1, "ponpes.id", 2, false},
|
||||||
{1, "sch.id", 2, false},
|
{1, "sch.id", 2, false},
|
||||||
{1, "web.id", 2, false},
|
{1, "web.id", 2, false},
|
||||||
{1, "ie", 1, false},
|
{1, "ie", 1, false},
|
||||||
|
|
@ -8626,5 +8627,7 @@ func init() {
|
||||||
{1, "now.sh", 2, true},
|
{1, "now.sh", 2, true},
|
||||||
{1, "zone.id", 2, true},
|
{1, "zone.id", 2, true},
|
||||||
}
|
}
|
||||||
DefaultList.rules = r[:]
|
for i := range r {
|
||||||
|
DefaultList.AddRule(&r[i])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue