mirror of https://github.com/knative/caching.git
167 lines
4.6 KiB
Go
167 lines
4.6 KiB
Go
/*
|
|
Copyright 2020 The Knative Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package hash
|
|
|
|
// This file contains the implementation of the subsetting algorithm for
|
|
// choosing a subset of input values in a consistent manner.
|
|
|
|
import (
|
|
"bytes"
|
|
"hash"
|
|
"hash/fnv"
|
|
"sort"
|
|
"strconv"
|
|
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
)
|
|
|
|
const (
|
|
startSalt = "start-angle-salt"
|
|
stepSalt = "step-angle-salt"
|
|
|
|
// universe represents the possible range of angles [0, universe).
|
|
// We want to have universe divide total range evenly to reduce bias.
|
|
universe = (1 << 11)
|
|
)
|
|
|
|
// computeAngle returns a uint64 number which represents
|
|
// a hash built off the given `n` string for consistent selection
|
|
// algorithm.
|
|
// We return uint64 here and cast after computing modulo, since
|
|
// int might 32 bits on 32 platforms and that would trim result.
|
|
func computeHash(n []byte, h hash.Hash64) uint64 {
|
|
h.Reset()
|
|
h.Write(n)
|
|
return h.Sum64()
|
|
}
|
|
|
|
type hashData struct {
|
|
// The set of all hashes for fast lookup and to name mapping
|
|
nameLookup map[int]string
|
|
// Sorted set of hashes for selection algorithm.
|
|
hashPool []int
|
|
// start angle
|
|
start int
|
|
// step angle
|
|
step int
|
|
}
|
|
|
|
func (hd *hashData) fromIndexSet(s sets.Int) sets.String {
|
|
ret := make(sets.String, len(s))
|
|
for v := range s {
|
|
ret.Insert(hd.nameForHIndex(v))
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func (hd *hashData) nameForHIndex(hi int) string {
|
|
return hd.nameLookup[hd.hashPool[hi]]
|
|
}
|
|
|
|
func buildHashes(in sets.String, target string) *hashData {
|
|
// Any one changing this function must execute
|
|
// `go test -run=TestOverlay -count=200`.
|
|
// This is to ensure there is no regression in the selection
|
|
// algorithm.
|
|
|
|
// Sorted list to ensure consistent results every time.
|
|
from := in.List()
|
|
// Write in two pieces, so we don't allocate temp string which is sum of both.
|
|
buf := bytes.NewBufferString(target)
|
|
buf.WriteString(startSalt)
|
|
hasher := fnv.New64a()
|
|
hd := &hashData{
|
|
nameLookup: make(map[int]string, len(from)),
|
|
hashPool: make([]int, len(from)),
|
|
start: int(computeHash(buf.Bytes(), hasher) % universe),
|
|
}
|
|
buf.Truncate(len(target)) // Discard the angle salt.
|
|
buf.WriteString(stepSalt)
|
|
hd.step = int(computeHash(buf.Bytes(), hasher) % universe)
|
|
|
|
for i, f := range from {
|
|
buf.Reset() // This retains the storage.
|
|
// Make unique sets for every target.
|
|
buf.WriteString(f)
|
|
buf.WriteString(target)
|
|
h := computeHash(buf.Bytes(), hasher)
|
|
hs := int(h % universe)
|
|
// Two values slotted to the same bucket.
|
|
// On average should happen with 1/universe probability.
|
|
_, ok := hd.nameLookup[hs]
|
|
for ok {
|
|
// Feed the hash as salt.
|
|
buf.WriteString(strconv.FormatUint(h, 16 /*append hex strings for shortness*/))
|
|
h = computeHash(buf.Bytes(), hasher)
|
|
hs = int(h % universe)
|
|
_, ok = hd.nameLookup[hs]
|
|
}
|
|
|
|
hd.hashPool[i] = hs
|
|
hd.nameLookup[hs] = f
|
|
}
|
|
// Sort for consistent mapping later.
|
|
sort.Slice(hd.hashPool, func(i, j int) bool {
|
|
return hd.hashPool[i] < hd.hashPool[j]
|
|
})
|
|
return hd
|
|
}
|
|
|
|
// ChooseSubset consistently chooses n items from `from`, using
|
|
// `target` as a seed value.
|
|
// ChooseSubset is an internal function and presumes sanitized inputs.
|
|
// TODO(vagababov): once initial impl is ready, think about how to cache
|
|
// the prepared data.
|
|
func ChooseSubset(from sets.String, n int, target string) sets.String {
|
|
if n >= len(from) {
|
|
return from
|
|
}
|
|
|
|
hashData := buildHashes(from, target)
|
|
|
|
// The algorithm for selection does the following:
|
|
// 0. Select angle to be the start angle
|
|
// 1. While n candidates are not selected
|
|
// 2. Find the index for that angle.
|
|
// 2.1. While that index is already selected pick next index
|
|
// 3. Advance angle by `step`
|
|
// 4. Goto 1.
|
|
selection := sets.NewInt()
|
|
angle := hashData.start
|
|
hpl := len(hashData.hashPool)
|
|
for len(selection) < n {
|
|
root := sort.Search(hpl, func(i int) bool {
|
|
return hashData.hashPool[i] >= angle
|
|
})
|
|
// Wrap around.
|
|
if root == hpl {
|
|
root = 0
|
|
}
|
|
// Already matched this one. Continue to the next index.
|
|
for selection.Has(root) {
|
|
root++
|
|
if root == hpl {
|
|
root = 0
|
|
}
|
|
}
|
|
selection.Insert(root)
|
|
angle = (angle + hashData.step) % universe
|
|
}
|
|
|
|
return hashData.fromIndexSet(selection)
|
|
}
|