mirror of https://github.com/kubernetes/kops.git
510 lines
13 KiB
Go
510 lines
13 KiB
Go
package util
|
|
|
|
import (
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var level = 0
|
|
|
|
// func init() { log.SetFlags(0) }
|
|
//
|
|
// func debugf(f string, vs ...interface{}) {
|
|
// log.Printf(strings.Repeat(" ", level)+f, vs...)
|
|
// }
|
|
|
|
// ShortRegexpString tries to construct a short regexp that matches exactly the
|
|
// provided strings and nothing else.
|
|
//
|
|
// Warning: the current implementation may use a lot of time of memory.
|
|
func ShortRegexpString(vs ...string) (res string) {
|
|
cache := make(map[string][]string)
|
|
return render(shortRegexpString(vs, cache), false)
|
|
}
|
|
|
|
func shortRegexpString(vs []string, cache map[string][]string) (res []string) {
|
|
// Canonicalize (might turn the input into one of the trivial cases below)
|
|
if len(vs) > 1 {
|
|
sort.Strings(vs)
|
|
vs = removeDups(vs)
|
|
}
|
|
|
|
// Trivial cases.
|
|
switch len(vs) {
|
|
case 0:
|
|
return nil
|
|
case 1:
|
|
return []string{regexp.QuoteMeta(vs[0])} // Nothing else to do.
|
|
}
|
|
|
|
// level++
|
|
// defer func(s string) {
|
|
// level--
|
|
// debugf("ShortRegexpString(%s) = %#q", s, res)
|
|
// }(fmt.Sprintf("%#q", vs))
|
|
|
|
// The one to beat: just put ORs between them (after escaping meta-characters)
|
|
best := make([]string, len(vs))
|
|
for i := range vs {
|
|
best[i] = regexp.QuoteMeta(vs[i])
|
|
}
|
|
|
|
cacheKey := render(best, false)
|
|
bestCost := len(cacheKey)
|
|
|
|
if cached, ok := cache[cacheKey]; ok {
|
|
return cached
|
|
}
|
|
defer func(key string) {
|
|
// Put clauses in a canonical order and cache them.
|
|
sort.Strings(res)
|
|
cache[key] = res
|
|
}(cacheKey)
|
|
|
|
recurse := func(prefix, suffix string, data commonSs) (result []string) {
|
|
// debugf("> recurse(%#q, %#q, %v) on %#q", prefix, suffix, data, vs)
|
|
// defer func() {
|
|
// debugf(" recurse(%#q, %#q, %v) on %#q = %#q", prefix, suffix, data, vs, result)
|
|
// }()
|
|
|
|
//debugf("%v/%#q/%#q: %v\n", vs, prefix, suffix, data)
|
|
varying := make([]string, data.end-data.start)
|
|
allExist := true
|
|
var preExistingIndices []int
|
|
for i := data.start; i < data.end; i++ {
|
|
substr := vs[i][len(prefix) : len(vs[i])-len(suffix)]
|
|
varying[i-data.start] = substr
|
|
if allExist {
|
|
found := false
|
|
for i := 0; i < len(vs); i++ {
|
|
if i == data.start {
|
|
i = data.end - 1
|
|
continue
|
|
}
|
|
if substr == vs[i] {
|
|
found = true
|
|
preExistingIndices = append(preExistingIndices, i)
|
|
break
|
|
}
|
|
}
|
|
allExist = found
|
|
}
|
|
}
|
|
|
|
var others []string
|
|
// combined := make([]string, 0, len(preExistingIndices))
|
|
if allExist && (prefix == "" || suffix == "") {
|
|
others = make([]string, 0, len(vs)-2*len(preExistingIndices))
|
|
sort.Ints(preExistingIndices)
|
|
for i, k := 0, 0; i < len(vs) && k < len(preExistingIndices); i++ {
|
|
if i == data.start {
|
|
i = data.end - 1
|
|
continue
|
|
} else if i == preExistingIndices[k] {
|
|
// combined = append(combined, vs[i])
|
|
// debugf("Eliminating %#q", vs[i])
|
|
k++
|
|
} else {
|
|
others = append(others, vs[i])
|
|
}
|
|
}
|
|
} else {
|
|
others = make([]string, len(vs)-(data.end-data.start))
|
|
copy(others, vs[:data.start])
|
|
copy(others[data.start:], vs[data.end:])
|
|
}
|
|
|
|
middle := render(shortRegexpString(varying, cache), true)
|
|
// debugf(">> ShortRegexpString(%#q) = %#q", varying, middle)
|
|
|
|
prefix, suffix = regexp.QuoteMeta(prefix), regexp.QuoteMeta(suffix)
|
|
var cur string
|
|
switch {
|
|
case allExist && prefix == "": // M . S | M ==> M . S?
|
|
cur = middle + optional(suffix)
|
|
case allExist && suffix == "": // P . M | M ==> P? . M
|
|
cur = optional(prefix) + middle
|
|
default:
|
|
cur = prefix + middle + suffix
|
|
}
|
|
return append([]string{cur}, shortRegexpString(others, cache)...)
|
|
}
|
|
|
|
// Note that vs is still sorted here.
|
|
// debugf("Sorted: %#q", vs)
|
|
for prefix, preLoc := range commonPrefixes(vs, 1) {
|
|
suffix := sharedSuffix(len(prefix), vs[preLoc.start:preLoc.end])
|
|
strs := recurse(prefix, suffix, preLoc)
|
|
if c := cost(strs); c < bestCost { // || (c == len(best) && str < best) {
|
|
best = strs
|
|
bestCost = c
|
|
} else {
|
|
//debugf("! rejected %#q", str)
|
|
//debugf(" because: %#q", best)
|
|
}
|
|
}
|
|
|
|
sort.Sort(reverseStrings(vs))
|
|
// debugf("Reverse-sorted: %#q", vs)
|
|
for suffix, sufLoc := range commonSuffixes(vs, 1) {
|
|
// sufLoc := suffixes[suffix]
|
|
prefix := sharedPrefix(len(suffix), vs[sufLoc.start:sufLoc.end])
|
|
strs := recurse(prefix, suffix, sufLoc)
|
|
if c := cost(strs); c < bestCost { //|| (len(str) == len(best) && str < best) {
|
|
best = strs
|
|
bestCost = c
|
|
} else {
|
|
//debugf("! rejected %#q", str)
|
|
//debugf(" because: %#q", best)
|
|
}
|
|
}
|
|
|
|
singleChar := true
|
|
optional := ""
|
|
for i := range vs {
|
|
if len(vs[i]) == 0 {
|
|
optional = "?"
|
|
} else if len(vs[i]) != 1 {
|
|
// FIXME: should allow single non-ASCII characters
|
|
singleChar = false
|
|
break
|
|
}
|
|
}
|
|
if singleChar {
|
|
// Construct an array of characters in the right order:
|
|
// ']' first, '-' last, rest alphabetically
|
|
class := make([]byte, 0, len(vs))
|
|
last := ""
|
|
for i, s := range vs {
|
|
if s == "]" {
|
|
// Must be first
|
|
class = append(class, ']')
|
|
vs[i] = "" // delete
|
|
} else if s == "-" {
|
|
// Must be last
|
|
last = s
|
|
vs[i] = "" // delete
|
|
}
|
|
}
|
|
sortFirst := len(class)
|
|
for _, s := range vs {
|
|
class = append(class, s...)
|
|
}
|
|
sort.Sort(sortBytes(class[sortFirst:]))
|
|
class = append(class, last...)
|
|
|
|
// Collapse character ranges
|
|
w := 0
|
|
first := -1
|
|
for i := 0; i < len(class); i++ {
|
|
if first >= 0 {
|
|
// Do we need to finish the range?
|
|
if class[i] != class[i-1]+1 {
|
|
// Does it pay to use a range?
|
|
if i-first > 3 {
|
|
// Build a range
|
|
class[w-(i-first-1)] = '-'
|
|
class[w-(i-first-1)+1] = class[i-1]
|
|
// Rewind the write position
|
|
w = w - (i - first - 1) + 2
|
|
first = i
|
|
}
|
|
}
|
|
} else {
|
|
first = i
|
|
}
|
|
// Write the current character
|
|
class[w] = class[i]
|
|
w++
|
|
}
|
|
class = class[:w]
|
|
|
|
if len(class) == 1 {
|
|
str := regexp.QuoteMeta(string(class)) + optional
|
|
if len(str) <= bestCost {
|
|
best = []string{str}
|
|
bestCost = len(str)
|
|
}
|
|
}
|
|
if cost := len(class) + 2 + len(optional); cost <= bestCost {
|
|
best = []string{"[" + string(class) + "]" + optional}
|
|
bestCost = cost
|
|
}
|
|
}
|
|
|
|
return best
|
|
}
|
|
|
|
func render(clauses []string, asSingle bool) string {
|
|
switch len(clauses) {
|
|
case 0:
|
|
return "$.^" // Unmatchable?
|
|
case 1:
|
|
return clauses[0]
|
|
default:
|
|
if len(clauses[0]) == 0 {
|
|
clauses = clauses[1:]
|
|
if len(clauses) == 1 {
|
|
return optional(clauses[0])
|
|
}
|
|
return render(clauses, true) + "?"
|
|
}
|
|
|
|
result := strings.Join(clauses, "|")
|
|
if asSingle {
|
|
result = "(" + result + ")"
|
|
}
|
|
return result
|
|
}
|
|
}
|
|
|
|
func cost(clauses []string) int {
|
|
// TODO: real implementation
|
|
return len(render(clauses, false))
|
|
}
|
|
|
|
func optional(s string) string {
|
|
if len(s) > 1 {
|
|
s = "(" + s + ")?"
|
|
} else if s != "" {
|
|
s += "?"
|
|
}
|
|
return s
|
|
}
|
|
|
|
// removeDups removes duplicate strings from vs and returns it.
|
|
// It assumes that vs has been sorted such that duplicates are next to each
|
|
// other.
|
|
func removeDups(vs []string) []string {
|
|
insertPos := 1
|
|
for i := 1; i < len(vs); i++ {
|
|
if vs[i-1] != vs[i] {
|
|
vs[insertPos] = vs[i]
|
|
insertPos++
|
|
}
|
|
}
|
|
return vs[:insertPos]
|
|
}
|
|
|
|
func dup(vs []string) []string {
|
|
result := make([]string, len(vs))
|
|
copy(result, vs)
|
|
return result
|
|
}
|
|
|
|
// reverseStrings is a sort.Interface that sort strings by their reverse values.
|
|
type reverseStrings []string
|
|
|
|
func (rs reverseStrings) Less(i, j int) bool {
|
|
for m, n := len(rs[i])-1, len(rs[j])-1; m >= 0 && n >= 0; m, n = m-1, n-1 {
|
|
if rs[i][m] != rs[j][n] {
|
|
// We want to compare runes, not bytes. So find the start of the
|
|
// current runes and decode them.
|
|
for ; m > 0 && !utf8.RuneStart(rs[i][m]); m-- {
|
|
}
|
|
for ; n > 0 && !utf8.RuneStart(rs[j][n]); n-- {
|
|
}
|
|
ri, _ := utf8.DecodeRuneInString(rs[i][m:])
|
|
rj, _ := utf8.DecodeRuneInString(rs[j][n:])
|
|
return ri < rj
|
|
}
|
|
}
|
|
return len(rs[i]) < len(rs[j])
|
|
}
|
|
func (rs reverseStrings) Swap(i, j int) { rs[i], rs[j] = rs[j], rs[i] }
|
|
func (rs reverseStrings) Len() int { return len(rs) }
|
|
|
|
// sortBytes is a sort.Interface that sort bytes.
|
|
type sortBytes []byte
|
|
|
|
func (sb sortBytes) Less(i, j int) bool { return sb[i] < sb[j] }
|
|
func (sb sortBytes) Swap(i, j int) { sb[i], sb[j] = sb[j], sb[i] }
|
|
func (sb sortBytes) Len() int { return len(sb) }
|
|
|
|
// commonSs holds information on where to find a common substring.
|
|
type commonSs struct {
|
|
start, end int
|
|
}
|
|
|
|
// commonPrefixes returns a map from prefixes to number of occurrences. Not all
|
|
// strings in vs need to have a prefix for it to be returned.
|
|
// Assumes vs to have been sorted with sort.Strings()
|
|
func commonPrefixes(vs []string, minLength int) (result map[string]commonSs) {
|
|
result = make(map[string]commonSs)
|
|
for i := 0; i < len(vs)-1; i++ {
|
|
j := i + 1
|
|
k := 0
|
|
for ; k < len(vs[i]) && k < len(vs[j]); k++ {
|
|
if vs[i][k] != vs[j][k] {
|
|
break
|
|
}
|
|
}
|
|
if k < minLength {
|
|
continue
|
|
}
|
|
prefix := vs[i][:k]
|
|
if _, exists := result[prefix]; !exists {
|
|
first := prefixStart(vs[:i], prefix)
|
|
//debugf("prefixStart(%#q, %#q) == %v", vs[:i], prefix, first)
|
|
// prefixEnd(vs, prefix) - first + 1
|
|
// == prefixEnd(vs[first:], prefix) + 1
|
|
// == prefixEnd(vs[first+1:], prefix) + 2
|
|
end := first + 1 + prefixEnd(vs[first+1:], prefix)
|
|
result[prefix] = commonSs{
|
|
first, end,
|
|
}
|
|
//debugf("prefixEnd(%#q, %#q) == %v", vs, prefix, result[prefix].end)
|
|
}
|
|
}
|
|
// debugf("# %v..", result)
|
|
return result
|
|
}
|
|
|
|
func prefixStart(vs []string, prefix string) int {
|
|
if prefix == "" {
|
|
return 0
|
|
}
|
|
return findFirst(vs, func(s string) bool {
|
|
return strings.HasPrefix(s, prefix)
|
|
})
|
|
}
|
|
|
|
func prefixEnd(vs []string, prefix string) int {
|
|
if prefix == "" {
|
|
return len(vs)
|
|
}
|
|
//debugf("prefixEnd(%v, %#q)", vs, prefix)
|
|
return findFirst(vs, func(s string) bool {
|
|
return !strings.HasPrefix(s, prefix)
|
|
})
|
|
}
|
|
|
|
// commonSuffixes returns a map from suffixes to number of occurrences. Not all
|
|
// strings in vs need to have a suffix for it to be returned.
|
|
// Assumes vs to have been sorted using sort.Sort(reverseStrings(vs))
|
|
func commonSuffixes(vs []string, minLength int) (result map[string]commonSs) {
|
|
result = make(map[string]commonSs)
|
|
for i := 0; i < len(vs)-1; i++ {
|
|
j := i + 1
|
|
k := 0
|
|
for ; k < len(vs[i]) && k < len(vs[j]); k++ {
|
|
if vs[i][len(vs[i])-k-1] != vs[j][len(vs[j])-k-1] {
|
|
break
|
|
}
|
|
}
|
|
if k < minLength {
|
|
continue
|
|
}
|
|
suffix := vs[i][len(vs[i])-k:]
|
|
if _, exists := result[suffix]; !exists {
|
|
first := suffixStart(vs[:i], suffix)
|
|
//debugf("suffixStart<%#q>(%#q) == %v", suffix, vs[:i], first)
|
|
// suffixEnd(vs, suffix) - first + 1
|
|
// == suffixEnd(vs[first:], suffix) + 1
|
|
// == suffixEnd(vs[first+1:], suffix) + 2
|
|
end := first + 1 + suffixEnd(vs[first+1:], suffix)
|
|
result[suffix] = commonSs{
|
|
first, end,
|
|
}
|
|
//debugf("suffixEnd <%#q>(%#q) == %v", suffix, vs, result[suffix].end)
|
|
//debugf("selected(%#q): %q\n\n", suffix, vs[first:result[suffix].end])
|
|
}
|
|
}
|
|
// debugf("# ..%v", result)
|
|
return result
|
|
}
|
|
|
|
func suffixStart(vs []string, suffix string) int {
|
|
// //debugf("suffixStart(%#q, %#q)", vs, suffix)
|
|
if suffix == "" {
|
|
return 0
|
|
}
|
|
return findFirst(vs, func(s string) bool {
|
|
return strings.HasSuffix(s, suffix)
|
|
})
|
|
}
|
|
|
|
func suffixEnd(vs []string, suffix string) int {
|
|
// //debugf("suffixEnd (%#q, %#q)", vs, suffix)
|
|
if suffix == "" {
|
|
return len(vs)
|
|
}
|
|
return findFirst(vs, func(s string) bool {
|
|
return !strings.HasSuffix(s, suffix)
|
|
})
|
|
}
|
|
|
|
// findFirst finds the first element of vs that satisfies the predicate.
|
|
// It assumes that the first N strings don't match the predicate, and the rest
|
|
// do. If all of the strings satisfy the predicate, it returns 0, and if none
|
|
// do it returns len(vs).
|
|
func findFirst(vs []string, predicate func(string) bool) int {
|
|
l, h := -1, len(vs)
|
|
// Invariant: vs[l] does not match, vs[h] does.
|
|
// -1 and len(vs) are sentinal values, never tested but assumed to mismatch and match, respectively.
|
|
for l+1 < h {
|
|
m := (l + h) / 2 // Must now be a valid value
|
|
// //debugf("%d %d %d", l, m, h)
|
|
if predicate(vs[m]) {
|
|
h = m
|
|
} else {
|
|
l = m
|
|
}
|
|
}
|
|
//debugf("==> %d", h)
|
|
return h
|
|
}
|
|
|
|
// sharedPrefix returns the longest prefix which all the parameters share but
|
|
// ignores a number of characters at the end of each string.
|
|
func sharedPrefix(ignore int, vs []string) (result string) {
|
|
//debugf("sharedPrefix(%d, %#q)", ignore, vs)
|
|
// defer func() {
|
|
//debugf("==> %#q", result)
|
|
// }()
|
|
switch len(vs) {
|
|
case 0:
|
|
return ""
|
|
case 1:
|
|
return vs[0]
|
|
}
|
|
for i := 0; i < len(vs[0])-ignore; i++ {
|
|
for n := 1; n < len(vs); n++ {
|
|
if i >= len(vs[n])-ignore || vs[0][i] != vs[n][i] {
|
|
return vs[0][:i]
|
|
}
|
|
}
|
|
}
|
|
return vs[0][:len(vs[0])-ignore]
|
|
}
|
|
|
|
// sharedSuffix returns the longest suffix which all the parameters share but
|
|
// ignores a number of characters at the start of each string.
|
|
func sharedSuffix(ignore int, vs []string) (result string) {
|
|
//debugf("sharedSuffix(%d, %#q)", ignore, vs)
|
|
// defer func() {
|
|
//debugf("==> %#q", result)
|
|
// }()
|
|
switch len(vs) {
|
|
case 0:
|
|
return ""
|
|
case 1:
|
|
return vs[0]
|
|
}
|
|
first := vs[0]
|
|
for i := 0; i < len(first)-ignore; i++ {
|
|
for n := 1; n < len(vs); n++ {
|
|
cur := vs[n]
|
|
if i == len(cur)-ignore {
|
|
return cur[ignore:]
|
|
}
|
|
if first[len(first)-i-1] != cur[len(cur)-i-1] {
|
|
return first[len(first)-i:]
|
|
}
|
|
}
|
|
}
|
|
return first[ignore:]
|
|
}
|