/* Copyright 2020 The Knative Authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package hash import ( "fmt" "hash/fnv" "math" "sort" "testing" "github.com/davecgh/go-spew/spew" "github.com/google/go-cmp/cmp" "github.com/google/uuid" "k8s.io/apimachinery/pkg/util/sets" ) func ExampleChooseSubset_selectOne() { // This example shows how to do consistent bucket // assignment using ChooseSubset. tasks := sets.New[string]("task1", "task2", "task3") ret := ChooseSubset(tasks, 1, "my-key1") fmt.Println(ret.UnsortedList()[0]) ret = ChooseSubset(tasks, 1, "something/another-key") fmt.Println(ret.UnsortedList()[0]) // Output: task3 // task2 } func ExampleChooseSubset_selectMany() { // This example shows how to do consistent bucket // assignment using ChooseSubset. tasks := sets.New[string]("task1", "task2", "task3", "task4", "task5") ret := ChooseSubset(tasks, 2, "my-key1") fmt.Println(sets.List(ret)) // Output: [task3 task4] } func TestBuildHashes(t *testing.T) { const target = "a target to remember" set := sets.New[string]("a", "b", "c", "e", "f") hd1 := buildHashes(set, target) hd2 := buildHashes(set, target) t.Log("HashData = ", spew.Sprintf("%+v", hd1)) if !cmp.Equal(hd1, hd2, cmp.AllowUnexported(hashData{})) { t.Errorf("buildHashe is not consistent: diff(-want,+got):\n%s", cmp.Diff(hd1, hd2, cmp.AllowUnexported(hashData{}))) } if !sort.SliceIsSorted(hd1.hashPool, func(i, j int) bool { return hd1.hashPool[i] < hd1.hashPool[j] }) { t.Error("From list is not sorted:", hd1.hashPool) } } func TestChooseSubset(t *testing.T) { tests := []struct { name string from sets.Set[string] target string wantNum int want sets.Set[string] }{{ name: "return all", from: sets.New[string]("sun", "moon", "mars", "mercury"), target: "a target!", wantNum: 4, want: sets.New[string]("sun", "moon", "mars", "mercury"), }, { name: "subset 1", from: sets.New[string]("sun", "moon", "mars", "mercury"), target: "a target!", wantNum: 2, want: sets.New[string]("mercury", "moon"), }, { name: "subset 2", from: sets.New[string]("sun", "moon", "mars", "mercury"), target: "something else entirely", wantNum: 2, want: sets.New[string]("mercury", "mars"), }, { name: "select 3", from: sets.New[string]("sun", "moon", "mars", "mercury"), target: "something else entirely", wantNum: 3, want: sets.New[string]("mars", "mercury", "sun"), }} for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { got := ChooseSubset(tc.from, tc.wantNum, tc.target) if !got.Equal(tc.want) { t.Errorf("Chose = %v, want = %v, diff(-want,+got):\n%s", got, tc.want, cmp.Diff(tc.want, got)) } }) } } func TestCollisionHandling(t *testing.T) { const ( key1 = "b08006d4-81f9-42ee-808b-ea18a39cbd83" key2 = "c9dc8df4-8c8d-4077-8750-6d2c2113a23b" target = "e68a64e1-19d8-4855-9ffa-04f49223a059" ) // Verify baseline, that they collide. hasher := fnv.New64a() h1 := computeHash([]byte(key1+target), hasher) % universe hasher.Reset() h2 := computeHash([]byte(key2+target), hasher) % universe if h1 != h2 { t.Fatalf("Baseline incorrect keys don't collide %d != %d", h1, h2) } hd := buildHashes(sets.New[string](key1, key2), target) if got, want := len(hd.nameLookup), 2; got != want { t.Error("Did not resolve collision, only 1 key in the map") } } func TestOverlay(t *testing.T) { // Execute // `go test -run=TestOverlay -count=200` // To ensure assignments are still not skewed. const ( sources = 50 samples = 100000 selection = 10 want = samples * selection / sources threshold = want / 5 // 20% ) from := sets.New[string]() for i := 0; i < sources; i++ { from.Insert(uuid.NewString()) } freqs := make(map[string]int, sources) for i := 0; i < samples; i++ { target := uuid.NewString() got := ChooseSubset(from, selection, target) for k := range got { freqs[k]++ } } totalDiff := 0. for _, v := range freqs { diff := float64(v - want) adiff := math.Abs(diff) totalDiff += adiff if adiff > threshold { t.Errorf("Diff for %d is %v, larger than threshold: %d", v, diff, threshold) } } t.Log(totalDiff / float64(len(freqs))) } func BenchmarkSelection(b *testing.B) { const maxSet = 200 from := make([]string, maxSet) for i := 0; i < maxSet; i++ { from[i] = uuid.NewString() } for _, v := range []int{5, 10, 25, 50, 100, 150, maxSet} { for _, ss := range []int{1, 5, 10, 15, 20, 25} { b.Run(fmt.Sprintf("pool-%d-subset-%d", v, ss), func(b *testing.B) { target := uuid.NewString() in := sets.New[string](from[:v]...) for i := 0; i < b.N; i++ { ChooseSubset(in, 10, target) } }) } } }