540 lines
14 KiB
Go
540 lines
14 KiB
Go
//go:generate go run ../cmd/gen/gen.go
|
|
|
|
// Package publicsuffix provides a domain name parser
|
|
// based on data from the public suffix list http://publicsuffix.org/.
|
|
// A public suffix is one under which Internet users can directly register names.
|
|
package publicsuffix
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"net/http/cookiejar"
|
|
"os"
|
|
"strings"
|
|
|
|
"golang.org/x/net/idna"
|
|
)
|
|
|
|
const (
|
|
// Version identifies the current library version.
|
|
// This is a pro forma convention given that Go dependencies
|
|
// tends to be fetched directly from the repo.
|
|
Version = "0.40.2"
|
|
|
|
// NormalType represents a normal rule such as "com"
|
|
NormalType = 1
|
|
// WildcardType represents a wildcard rule such as "*.com"
|
|
WildcardType = 2
|
|
// ExceptionType represents an exception to a wildard rule
|
|
ExceptionType = 3
|
|
|
|
listTokenPrivateDomains = "===BEGIN PRIVATE DOMAINS==="
|
|
listTokenComment = "//"
|
|
)
|
|
|
|
// DefaultList is the default List and it is used by Parse and Domain.
|
|
var DefaultList = NewList()
|
|
|
|
// DefaultRule is the default Rule that represents "*".
|
|
var DefaultRule = MustNewRule("*")
|
|
|
|
// DefaultParserOptions are the default options used to parse a Public Suffix list.
|
|
var DefaultParserOptions = &ParserOption{PrivateDomains: true, ASCIIEncoded: false}
|
|
|
|
// DefaultFindOptions are the default options used to perform the lookup of rules in the list.
|
|
var DefaultFindOptions = &FindOptions{IgnorePrivate: false, DefaultRule: DefaultRule}
|
|
|
|
// Rule represents a single rule in a Public Suffix List.
|
|
type Rule struct {
|
|
Type int
|
|
Value string
|
|
Length int
|
|
Private bool
|
|
}
|
|
|
|
// ParserOption are the options you can use to customize the way a List
|
|
// is parsed from a file or a string.
|
|
type ParserOption struct {
|
|
// Set to false to skip the private domains when parsing.
|
|
// Default to true, which means the private domains are included.
|
|
PrivateDomains bool
|
|
|
|
// Set to false if the input is encoded in U-labels (Unicode)
|
|
// as opposite to A-labels.
|
|
// Default to false, which means the list is containing Unicode domains.
|
|
// This is the default because the original PSL currently contains Unicode.
|
|
ASCIIEncoded bool
|
|
}
|
|
|
|
// FindOptions are the options you can use to customize the way a Rule
|
|
// is searched within the list.
|
|
type FindOptions struct {
|
|
// Set to true to ignore the rules within the "Private" section of the Public Suffix List.
|
|
IgnorePrivate bool
|
|
|
|
// The default rule to use when no rule matches the input.
|
|
// The format Public Suffix algorithm states that the rule "*" should be used when no other rule matches,
|
|
// but some consumers may have different needs.
|
|
DefaultRule *Rule
|
|
}
|
|
|
|
// List represents a Public Suffix List.
|
|
type List struct {
|
|
// rules is kept private because you should not access rules directly
|
|
rules map[string]*Rule
|
|
}
|
|
|
|
// NewList creates a new empty list.
|
|
func NewList() *List {
|
|
return &List{
|
|
rules: map[string]*Rule{},
|
|
}
|
|
}
|
|
|
|
// NewListFromString parses a string that represents a Public Suffix source
|
|
// and returns a List initialized with the rules in the source.
|
|
func NewListFromString(src string, options *ParserOption) (*List, error) {
|
|
l := NewList()
|
|
_, err := l.LoadString(src, options)
|
|
return l, err
|
|
}
|
|
|
|
// NewListFromFile parses a string that represents a Public Suffix source
|
|
// and returns a List initialized with the rules in the source.
|
|
func NewListFromFile(path string, options *ParserOption) (*List, error) {
|
|
l := NewList()
|
|
_, err := l.LoadFile(path, options)
|
|
return l, err
|
|
}
|
|
|
|
// Load parses and loads a set of rules from an io.Reader into the current list.
|
|
func (l *List) Load(r io.Reader, options *ParserOption) ([]Rule, error) {
|
|
return l.parse(r, options)
|
|
}
|
|
|
|
// LoadString parses and loads a set of rules from a String into the current list.
|
|
func (l *List) LoadString(src string, options *ParserOption) ([]Rule, error) {
|
|
r := strings.NewReader(src)
|
|
return l.parse(r, options)
|
|
}
|
|
|
|
// LoadFile parses and loads a set of rules from a File into the current list.
|
|
func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
return l.parse(f, options)
|
|
}
|
|
|
|
// AddRule adds a new rule to the list.
|
|
//
|
|
// The exact position of the rule into the list is unpredictable.
|
|
// The list may be optimized internally for lookups, therefore the algorithm
|
|
// will decide the best position for the new rule.
|
|
func (l *List) AddRule(r *Rule) error {
|
|
l.rules[r.Value] = r
|
|
return nil
|
|
}
|
|
|
|
// Size returns the size of the list, which is the number of rules.
|
|
func (l *List) Size() int {
|
|
return len(l.rules)
|
|
}
|
|
|
|
func (l *List) parse(r io.Reader, options *ParserOption) ([]Rule, error) {
|
|
if options == nil {
|
|
options = DefaultParserOptions
|
|
}
|
|
var rules []Rule
|
|
|
|
scanner := bufio.NewScanner(r)
|
|
var section int // 1 == ICANN, 2 == PRIVATE
|
|
|
|
Scanning:
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
switch {
|
|
|
|
// skip blank lines
|
|
case line == "":
|
|
break
|
|
|
|
// include private domains or stop scanner
|
|
case strings.Contains(line, listTokenPrivateDomains):
|
|
if !options.PrivateDomains {
|
|
break Scanning
|
|
}
|
|
section = 2
|
|
|
|
// skip comments
|
|
case strings.HasPrefix(line, listTokenComment):
|
|
break
|
|
|
|
default:
|
|
var rule *Rule
|
|
var err error
|
|
|
|
if options.ASCIIEncoded {
|
|
rule, err = NewRule(line)
|
|
} else {
|
|
rule, err = NewRuleUnicode(line)
|
|
}
|
|
if err != nil {
|
|
return []Rule{}, err
|
|
}
|
|
|
|
rule.Private = (section == 2)
|
|
l.AddRule(rule)
|
|
rules = append(rules, *rule)
|
|
}
|
|
|
|
}
|
|
|
|
return rules, scanner.Err()
|
|
}
|
|
|
|
// Find and returns the most appropriate rule for the domain name.
|
|
func (l *List) Find(name string, options *FindOptions) *Rule {
|
|
if options == nil {
|
|
options = DefaultFindOptions
|
|
}
|
|
|
|
part := name
|
|
for {
|
|
rule, ok := l.rules[part]
|
|
|
|
if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) {
|
|
return rule
|
|
}
|
|
|
|
i := strings.IndexRune(part, '.')
|
|
if i < 0 {
|
|
return options.DefaultRule
|
|
}
|
|
|
|
part = part[i+1:]
|
|
}
|
|
|
|
}
|
|
|
|
// NewRule parses the rule content, creates and returns a Rule.
|
|
//
|
|
// The content of the rule MUST be encoded in ASCII (A-labels).
|
|
func NewRule(content string) (*Rule, error) {
|
|
var rule *Rule
|
|
var value string
|
|
|
|
switch content[0] {
|
|
case '*': // wildcard
|
|
if content == "*" {
|
|
value = ""
|
|
} else {
|
|
value = content[2:]
|
|
}
|
|
rule = &Rule{Type: WildcardType, Value: value, Length: len(Labels(value)) + 1}
|
|
case '!': // exception
|
|
value = content[1:]
|
|
rule = &Rule{Type: ExceptionType, Value: value, Length: len(Labels(value))}
|
|
default: // normal
|
|
value = content
|
|
rule = &Rule{Type: NormalType, Value: value, Length: len(Labels(value))}
|
|
}
|
|
|
|
return rule, nil
|
|
}
|
|
|
|
// NewRuleUnicode is like NewRule, but expects the content to be encoded in Unicode (U-labels).
|
|
func NewRuleUnicode(content string) (*Rule, error) {
|
|
var err error
|
|
|
|
content, err = ToASCII(content)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return NewRule(content)
|
|
}
|
|
|
|
// MustNewRule is like NewRule, but panics if the content cannot be parsed.
|
|
func MustNewRule(content string) *Rule {
|
|
rule, err := NewRule(content)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return rule
|
|
}
|
|
|
|
// Match checks if the rule matches the name.
|
|
//
|
|
// A domain name is said to match a rule if and only if all of the following conditions are met:
|
|
// - When the domain and rule are split into corresponding labels,
|
|
// that the domain contains as many or more labels than the rule.
|
|
// - Beginning with the right-most labels of both the domain and the rule,
|
|
// and continuing for all labels in the rule, one finds that for every pair,
|
|
// either they are identical, or that the label from the rule is "*".
|
|
//
|
|
// See https://publicsuffix.org/list/
|
|
func (r *Rule) Match(name string) bool {
|
|
left := strings.TrimSuffix(name, r.Value)
|
|
|
|
// the name contains as many labels than the rule
|
|
// this is a match, unless it's a wildcard
|
|
// because the wildcard requires one more label
|
|
if left == "" {
|
|
return r.Type != WildcardType
|
|
}
|
|
|
|
// if there is one more label, the rule match
|
|
// because either the rule is shorter than the domain
|
|
// or the rule is a wildcard and there is one more label
|
|
return left[len(left)-1:] == "."
|
|
}
|
|
|
|
// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
|
|
// according to the rule definition and type.
|
|
func (r *Rule) Decompose(name string) (result [2]string) {
|
|
if r == DefaultRule {
|
|
i := strings.LastIndexByte(name, '.')
|
|
if i < 0 {
|
|
return
|
|
}
|
|
result[0], result[1] = name[:i], name[i+1:]
|
|
return
|
|
}
|
|
switch r.Type {
|
|
case NormalType:
|
|
name = strings.TrimSuffix(name, r.Value)
|
|
if len(name) == 0 {
|
|
return
|
|
}
|
|
result[0], result[1] = name[:len(name)-1], r.Value
|
|
case WildcardType:
|
|
name := strings.TrimSuffix(name, r.Value)
|
|
if len(name) == 0 {
|
|
return
|
|
}
|
|
name = name[:len(name)-1]
|
|
i := strings.LastIndexByte(name, '.')
|
|
if i < 0 {
|
|
return
|
|
}
|
|
result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
|
|
case ExceptionType:
|
|
i := strings.IndexRune(r.Value, '.')
|
|
if i < 0 {
|
|
return
|
|
}
|
|
suffix := r.Value[i+1:]
|
|
name = strings.TrimSuffix(name, suffix)
|
|
if len(name) == 0 {
|
|
return
|
|
}
|
|
result[0], result[1] = name[:len(name)-1], suffix
|
|
}
|
|
return
|
|
}
|
|
|
|
// Labels decomposes given domain name into labels,
|
|
// corresponding to the dot-separated tokens.
|
|
func Labels(name string) []string {
|
|
return strings.Split(name, ".")
|
|
}
|
|
|
|
// DomainName represents a domain name.
|
|
type DomainName struct {
|
|
TLD string
|
|
SLD string
|
|
TRD string
|
|
Rule *Rule
|
|
}
|
|
|
|
// String joins the components of the domain name into a single string.
|
|
// Empty labels are skipped.
|
|
//
|
|
// Examples:
|
|
//
|
|
// DomainName{"com", "example"}.String()
|
|
// // example.com
|
|
// DomainName{"com", "example", "www"}.String()
|
|
// // www.example.com
|
|
func (d *DomainName) String() string {
|
|
switch {
|
|
case d.TLD == "":
|
|
return ""
|
|
case d.SLD == "":
|
|
return d.TLD
|
|
case d.TRD == "":
|
|
return d.SLD + "." + d.TLD
|
|
default:
|
|
return d.TRD + "." + d.SLD + "." + d.TLD
|
|
}
|
|
}
|
|
|
|
// Domain extract and return the domain name from the input
|
|
// using the default (Public Suffix) List.
|
|
//
|
|
// Examples:
|
|
//
|
|
// publicsuffix.Domain("example.com")
|
|
// // example.com
|
|
// publicsuffix.Domain("www.example.com")
|
|
// // example.com
|
|
// publicsuffix.Domain("www.example.co.uk")
|
|
// // example.co.uk
|
|
func Domain(name string) (string, error) {
|
|
return DomainFromListWithOptions(DefaultList, name, DefaultFindOptions)
|
|
}
|
|
|
|
// Parse decomposes the name into TLD, SLD, TRD
|
|
// using the default (Public Suffix) List,
|
|
// and returns the result as a DomainName
|
|
//
|
|
// Examples:
|
|
//
|
|
// list := NewList()
|
|
//
|
|
// publicsuffix.Parse("example.com")
|
|
// // &DomainName{"com", "example"}
|
|
// publicsuffix.Parse("www.example.com")
|
|
// // &DomainName{"com", "example", "www"}
|
|
// publicsuffix.Parse("www.example.co.uk")
|
|
// // &DomainName{"co.uk", "example"}
|
|
func Parse(name string) (*DomainName, error) {
|
|
return ParseFromListWithOptions(DefaultList, name, DefaultFindOptions)
|
|
}
|
|
|
|
// DomainFromListWithOptions extract and return the domain name from the input
|
|
// using the (Public Suffix) list passed as argument.
|
|
//
|
|
// Examples:
|
|
//
|
|
// list := NewList()
|
|
//
|
|
// publicsuffix.DomainFromListWithOptions(list, "example.com")
|
|
// // example.com
|
|
// publicsuffix.DomainFromListWithOptions(list, "www.example.com")
|
|
// // example.com
|
|
// publicsuffix.DomainFromListWithOptions(list, "www.example.co.uk")
|
|
// // example.co.uk
|
|
func DomainFromListWithOptions(l *List, name string, options *FindOptions) (string, error) {
|
|
dn, err := ParseFromListWithOptions(l, name, options)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return dn.SLD + "." + dn.TLD, nil
|
|
}
|
|
|
|
// ParseFromListWithOptions decomposes the name into TLD, SLD, TRD
|
|
// using the (Public Suffix) list passed as argument,
|
|
// and returns the result as a DomainName
|
|
//
|
|
// Examples:
|
|
//
|
|
// list := NewList()
|
|
//
|
|
// publicsuffix.ParseFromListWithOptions(list, "example.com")
|
|
// // &DomainName{"com", "example"}
|
|
// publicsuffix.ParseFromListWithOptions(list, "www.example.com")
|
|
// // &DomainName{"com", "example", "www"}
|
|
// publicsuffix.ParseFromListWithOptions(list, "www.example.co.uk")
|
|
// // &DomainName{"co.uk", "example"}
|
|
func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*DomainName, error) {
|
|
n, err := normalize(name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
r := l.Find(n, options)
|
|
if r == nil {
|
|
return nil, fmt.Errorf("no rule matching name %s", name)
|
|
}
|
|
|
|
parts := r.Decompose(n)
|
|
left, tld := parts[0], parts[1]
|
|
if tld == "" {
|
|
return nil, fmt.Errorf("%s is a suffix", n)
|
|
}
|
|
|
|
dn := &DomainName{
|
|
Rule: r,
|
|
TLD: tld,
|
|
}
|
|
if i := strings.LastIndexByte(left, '.'); i < 0 {
|
|
dn.SLD = left
|
|
} else {
|
|
dn.TRD = left[:i]
|
|
dn.SLD = left[i+1:]
|
|
}
|
|
return dn, nil
|
|
}
|
|
|
|
func normalize(name string) (string, error) {
|
|
ret := strings.ToLower(name)
|
|
|
|
if ret == "" {
|
|
return "", fmt.Errorf("name is blank")
|
|
}
|
|
if ret[0] == '.' {
|
|
return "", fmt.Errorf("name %s starts with a dot", ret)
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
// ToASCII is a wrapper for idna.ToASCII.
|
|
//
|
|
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
|
|
// and I can't call this package directly anymore. The wrapper performs some terrible-but-necessary
|
|
// before-after replacements to make sure an already ASCII input always results in the same output
|
|
// even if passed through ToASCII.
|
|
//
|
|
// See golang/net@67957fd0b1, golang/net@f2499483f9, golang/net@78ebe5c8b6,
|
|
// and weppos/publicsuffix-go#66.
|
|
func ToASCII(s string) (string, error) {
|
|
// .example.com should be .example.com
|
|
// ..example.com should be ..example.com
|
|
if strings.HasPrefix(s, ".") {
|
|
dotIndex := 0
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == '.' {
|
|
dotIndex = i
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
out, err := idna.ToASCII(s[dotIndex+1:])
|
|
out = s[:dotIndex+1] + out
|
|
return out, err
|
|
}
|
|
|
|
return idna.ToASCII(s)
|
|
}
|
|
|
|
// ToUnicode is a wrapper for idna.ToUnicode.
|
|
//
|
|
// See ToASCII for more details about why this wrapper exists.
|
|
func ToUnicode(s string) (string, error) {
|
|
return idna.ToUnicode(s)
|
|
}
|
|
|
|
// CookieJarList implements the cookiejar.PublicSuffixList interface.
|
|
var CookieJarList cookiejar.PublicSuffixList = cookiejarList{DefaultList}
|
|
|
|
type cookiejarList struct {
|
|
List *List
|
|
}
|
|
|
|
// PublicSuffix implements cookiejar.PublicSuffixList.
|
|
func (l cookiejarList) PublicSuffix(domain string) string {
|
|
rule := l.List.Find(domain, nil)
|
|
return rule.Decompose(domain)[1]
|
|
}
|
|
|
|
// PublicSuffix implements cookiejar.String.
|
|
func (cookiejarList) String() string {
|
|
return ListVersion
|
|
}
|