* Add Histogram.RemoveSample().

* Add HistogramOptions.Epsilon() (bucket weight precision). * Return the middle of the bucket, not the bucket start in Percentile(). * Rename CircularBuffer to SlidingWindow. * Fix field names and comments.
2017-04-19 17:33:14 +02:00 · 2017-04-19 17:33:14 +02:00 · 1e90342183
parent b1a7703ab1
commit 1e90342183
6 changed files with 125 additions and 82 deletions
--- a/vertical-pod-autoscaler/recommender/util/histogram.go
+++ b/vertical-pod-autoscaler/recommender/util/histogram.go
@ -25,11 +25,13 @@ type Histogram interface {
 	// If the histogram is empty, Percentile() returns 0.0.
 	Percentile(percentile float64) float64

-	// Add a sample with a given value and weight. A sample can have
-	// negative weight, as long as the total weight of samples with the
-	// given value is not negative.
+	// Add a sample with a given value and weight.
 	AddSample(value float64, weight float64)

+	// Remove a sample with a given value and weight. Note that the total
+	// weight of samples with a given value cannot be negative.
+	SubtractSample(value float64, weight float64)
+
 	// Returns true if the histogram is empty.
 	Empty() bool
 }
@ -41,18 +43,19 @@ func NewHistogram(options HistogramOptions) Histogram {
 		options.NumBuckets() - 1, 0}
 }

-// Simple bucket-based implementation of the Histogram interface. Samples added
-// to the histogram are rounded down to the bucket boundary. Each bucket holds
-// the total weight of samples that belong to it.
+// Simple bucket-based implementation of the Histogram interface. Each bucket
+// holds the total weight of samples that belong to it.
+// Percentile() returns the middle of the correspodning bucket.
 // Resolution (bucket boundaries) of the histogram depends on the options.
 // There's no interpolation within buckets (i.e. one sample falls to exactly one
 // bucket).
+// A bucket is considered empty if its weight is smaller than options.Epsilon().
 type histogram struct {
 	// Bucketing scheme.
 	options *HistogramOptions
-	// Weight of samples in each bucket.
-	buckets []float64
-	// Weight of samples in all buckets.
+	// Cumulative weight of samples in each bucket.
+	bucketWeight []float64
+	// Total cumulative weight of samples in all buckets.
 	totalWeight float64
 	// Index of the first non-empty bucket if there's any. Otherwise index
 	// of the last bucket.
@ -62,18 +65,36 @@ type histogram struct {
 }

 func (h *histogram) AddSample(value float64, weight float64) {
+	if weight < 0.0 {
+		panic("sample weight must be non-negative")
+	}
 	bucket := (*h.options).FindBucket(value)
-	if h.buckets[bucket]+weight <= 0.0 {
-		h.clearBucket(bucket)
-	} else {
-		h.buckets[bucket] += weight
-		h.totalWeight += weight
-		if bucket < h.minBucket {
-			h.minBucket = bucket
-		}
-		if bucket > h.maxBucket {
-			h.maxBucket = bucket
-		}
+	h.bucketWeight[bucket] += weight
+	h.totalWeight += weight
+	if bucket < h.minBucket {
+		h.minBucket = bucket
+	}
+	if bucket > h.maxBucket {
+		h.maxBucket = bucket
+	}
+}
+func (h *histogram) SubtractSample(value float64, weight float64) {
+	if weight < 0.0 {
+		panic("sample weight must be non-negative")
+	}
+	bucket := (*h.options).FindBucket(value)
+	epsilon := (*h.options).Epsilon()
+	if weight > h.bucketWeight[bucket]-epsilon {
+		weight = h.bucketWeight[bucket]
+	}
+	h.totalWeight -= weight
+	h.bucketWeight[bucket] -= weight
+	lastBucket := (*h.options).NumBuckets() - 1
+	for h.bucketWeight[h.minBucket] < epsilon && h.minBucket < lastBucket {
+		h.minBucket++
+	}
+	for h.bucketWeight[h.maxBucket] < epsilon && h.maxBucket > 0 {
+		h.maxBucket--
 	}
 }

@ -85,26 +106,21 @@ func (h *histogram) Percentile(percentile float64) float64 {
 	threshold := percentile * h.totalWeight
 	bucket := h.minBucket
 	for ; bucket < h.maxBucket; bucket++ {
-		partialSum += h.buckets[bucket]
+		partialSum += h.bucketWeight[bucket]
 		if partialSum >= threshold {
 			break
 		}
 	}
-	return (*h.options).GetBucketStart(bucket)
+	bucketStart := (*h.options).GetBucketStart(bucket)
+	if bucket < (*h.options).NumBuckets()-1 {
+		// Return the middle of the bucket.
+		nextBucketStart := (*h.options).GetBucketStart(bucket + 1)
+		return (bucketStart + nextBucketStart) / 2.0
+	}
+	// For the last bucket return the bucket start.
+	return bucketStart
 }

 func (h *histogram) Empty() bool {
-	return h.totalWeight == 0.0
-}
-
-func (h *histogram) clearBucket(bucket int) {
-	h.totalWeight -= h.buckets[bucket]
-	h.buckets[bucket] = 0.0
-	lastBucket := (*h.options).NumBuckets() - 1
-	for h.buckets[h.minBucket] == 0.0 && h.minBucket < lastBucket {
-		h.minBucket++
-	}
-	for h.buckets[h.maxBucket] == 0.0 && h.maxBucket > 0 {
-		h.maxBucket--
-	}
+	return h.bucketWeight[h.minBucket] < (*h.options).Epsilon()
 }
--- a/vertical-pod-autoscaler/recommender/util/histogram_options.go
+++ b/vertical-pod-autoscaler/recommender/util/histogram_options.go
@ -23,6 +23,7 @@ import (

 // HistogramOptions define the number and size of buckets of a histogram.
 type HistogramOptions interface {
+	// Returns the number of buckets in the histogram.
 	NumBuckets() int
 	// Returns the index of the bucket to which the given value falls.
 	// If the value is outside of the range covered by the histogram, it
@ -31,44 +32,48 @@ type HistogramOptions interface {
 	// Returns the start of the bucket with a given index. If the index is
 	// outside the [0..NumBuckets() - 1] range, the result is undefined.
 	GetBucketStart(bucket int) float64
+	// Returns the minimum weight for a bucket to be considered non-empty.
+	Epsilon() float64
 }

 // NewLinearHistogramOptions returns HistogramOptions describing a histogram
 // with a given number of fixed-size buckets, with the first bucket starting
-// at 0.0. Requires maxValue > 0, bucketSize > 0.
+// at 0.0. Requires maxValue > 0, bucketSize > 0, epsilon > 0.
 func NewLinearHistogramOptions(
-	maxValue float64, bucketSize float64) (HistogramOptions, error) {
-	if maxValue <= 0.0 || bucketSize <= 0.0 {
+	maxValue float64, bucketSize float64, epsilon float64) (HistogramOptions, error) {
+	if maxValue <= 0.0 || bucketSize <= 0.0 || epsilon <= 0.0 {
 		return nil, errors.New("maxValue and bucketSize must both be positive")
 	}
 	numBuckets := int(math.Ceil(maxValue / bucketSize))
-	return &linearHistogramOptions{numBuckets, bucketSize}, nil
+	return &linearHistogramOptions{numBuckets, bucketSize, epsilon}, nil
 }

 // NewExponentialHistogramOptions returns HistogramOptions describing a
 // histogram with exponentially growing bucket boundaries. The first bucket
 // covers the range [0..firstBucketSize). Consecutive buckets are of the form
 // [x(n)..x(n) * ratio) for n = 1 .. numBuckets - 1.
-// Requires maxValue > 0, firstBucketSize > 0, ratio > 1.
+// Requires maxValue > 0, firstBucketSize > 0, ratio > 1, epsilon > 0.
 func NewExponentialHistogramOptions(
-	maxValue float64, firstBucketSize float64, ratio float64) (HistogramOptions, error) {
-	if maxValue <= 0.0 || firstBucketSize <= 0.0 || ratio <= 1.0 {
+	maxValue float64, firstBucketSize float64, ratio float64, epsilon float64) (HistogramOptions, error) {
+	if maxValue <= 0.0 || firstBucketSize <= 0.0 || ratio <= 1.0 || epsilon <= 0.0 {
 		return nil, errors.New(
-			"maxValue and firstBucketSize must be > 0.0, ratio must be > 1.0")
+			"maxValue, firstBucketSize and epsilon must be > 0.0, ratio must be > 1.0")
 	}
 	numBuckets := int(math.Ceil(math.Log(maxValue/firstBucketSize)/math.Log(ratio))) + 1
-	return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio}, nil
+	return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio, epsilon}, nil
 }

 type linearHistogramOptions struct {
 	numBuckets int
 	bucketSize float64
+	epsilon    float64
 }

 type exponentialHistogramOptions struct {
 	numBuckets      int
 	firstBucketSize float64
 	ratio           float64
+	epsilon         float64
 }

 func (o *linearHistogramOptions) NumBuckets() int {
@ -90,6 +95,10 @@ func (o *linearHistogramOptions) GetBucketStart(bucket int) float64 {
 	return float64(bucket) * o.bucketSize
 }

+func (o *linearHistogramOptions) Epsilon() float64 {
+	return o.epsilon
+}
+
 func (o *exponentialHistogramOptions) NumBuckets() int {
 	return o.numBuckets
 }
@ -111,3 +120,7 @@ func (o *exponentialHistogramOptions) GetBucketStart(bucket int) float64 {
 	}
 	return o.firstBucketSize * math.Pow(o.ratio, float64(bucket-1))
 }
+
+func (o *exponentialHistogramOptions) Epsilon() float64 {
+	return o.epsilon
+}
--- a/vertical-pod-autoscaler/recommender/util/histogram_options_test.go
+++ b/vertical-pod-autoscaler/recommender/util/histogram_options_test.go
@ -22,10 +22,15 @@ import (
 	"github.com/stretchr/testify/assert"
 )

+var (
+	epsilon = 0.001
+)
+
 // Test all methods of LinearHistogramOptions using a sample bucketing scheme.
 func TestLinearHistogramOptions(t *testing.T) {
-	o, err := NewLinearHistogramOptions(5.0, 0.3)
+	o, err := NewLinearHistogramOptions(5.0, 0.3, epsilon)
 	assert.Nil(t, err)
+	assert.Equal(t, epsilon, o.Epsilon())
 	assert.Equal(t, 17, o.NumBuckets())

 	assert.Equal(t, 0.0, o.GetBucketStart(0))
@ -39,8 +44,9 @@ func TestLinearHistogramOptions(t *testing.T) {

 // Test all methods of ExponentialHistogramOptions using a sample bucketing scheme.
 func TestExponentialHistogramOptions(t *testing.T) {
-	o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0)
+	o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0, epsilon)
 	assert.Nil(t, err)
+	assert.Equal(t, epsilon, o.Epsilon())
 	assert.Equal(t, 5, o.NumBuckets())

 	assert.Equal(t, 0.0, o.GetBucketStart(0))
--- a/vertical-pod-autoscaler/recommender/util/histogram_test.go
+++ b/vertical-pod-autoscaler/recommender/util/histogram_test.go
@ -22,10 +22,17 @@ import (
 	"github.com/stretchr/testify/assert"
 )

+var (
+	// Minimum precision of histogram values (relative).
+	valueEpsilon = 1e-15
+	// Minimum precision of histogram weights (absolute).
+	weightEpsilon = 1e-15
+)
+
 // Verifies that Percentile() returns 0.0 when called on an empty histogram for
 // any percentile.
 func TestPercentilesEmptyHistogram(t *testing.T) {
-	options, err := NewLinearHistogramOptions(1.0, 0.1)
+	options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
 	assert.Nil(t, err)
 	h := NewHistogram(options)
 	for p := -0.5; p <= 1.5; p += 0.5 {
@ -36,51 +43,51 @@ func TestPercentilesEmptyHistogram(t *testing.T) {
 // Verifies that Percentile() returns the correct values of selected
 // percentiles on the following histogram: { 1: 1, 2: 2, 3: 3, 4: 4 }.
 func TestPercentiles(t *testing.T) {
-	options, err := NewLinearHistogramOptions(10.0, 1.0)
+	options, err := NewLinearHistogramOptions(10.0, 1.0, weightEpsilon)
 	assert.Nil(t, err)
 	h := NewHistogram(options)
 	for i := 1; i <= 4; i++ {
 		h.AddSample(float64(i), float64(i))
 	}
-	assert.Equal(t, 1.0, h.Percentile(0.0))
-	assert.Equal(t, 1.0, h.Percentile(0.1))
-	assert.Equal(t, 2.0, h.Percentile(0.2))
-	assert.Equal(t, 2.0, h.Percentile(0.3))
-	assert.Equal(t, 3.0, h.Percentile(0.4))
-	assert.Equal(t, 3.0, h.Percentile(0.5))
-	assert.Equal(t, 3.0, h.Percentile(0.6))
-	assert.Equal(t, 4.0, h.Percentile(0.7))
-	assert.Equal(t, 4.0, h.Percentile(0.8))
-	assert.Equal(t, 4.0, h.Percentile(0.9))
-	assert.Equal(t, 4.0, h.Percentile(1.0))
+	assert.InEpsilon(t, 1.5, h.Percentile(0.0), valueEpsilon)
+	assert.InEpsilon(t, 1.5, h.Percentile(0.1), valueEpsilon)
+	assert.InEpsilon(t, 2.5, h.Percentile(0.2), valueEpsilon)
+	assert.InEpsilon(t, 2.5, h.Percentile(0.3), valueEpsilon)
+	assert.InEpsilon(t, 3.5, h.Percentile(0.4), valueEpsilon)
+	assert.InEpsilon(t, 3.5, h.Percentile(0.5), valueEpsilon)
+	assert.InEpsilon(t, 3.5, h.Percentile(0.6), valueEpsilon)
+	assert.InEpsilon(t, 4.5, h.Percentile(0.7), valueEpsilon)
+	assert.InEpsilon(t, 4.5, h.Percentile(0.8), valueEpsilon)
+	assert.InEpsilon(t, 4.5, h.Percentile(0.9), valueEpsilon)
+	assert.InEpsilon(t, 4.5, h.Percentile(1.0), valueEpsilon)
 }

 // Verifies that querying percentile < 0.0 returns the minimum value in the
 // histogram, while querying percentile > 1.0 returns the maximum of the
 // histogram.
 func TestPercentileOutOfBounds(t *testing.T) {
-	options, err := NewLinearHistogramOptions(1.0, 0.1)
+	options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
 	assert.Nil(t, err)
 	h := NewHistogram(options)
 	assert.Nil(t, err)
 	h.AddSample(0.1, 0.1)
 	h.AddSample(0.2, 0.2)

-	assert.Equal(t, 0.1, h.Percentile(-0.1))
-	assert.Equal(t, 0.2, h.Percentile(1.1))
+	assert.InEpsilon(t, 0.15, h.Percentile(-0.1), valueEpsilon)
+	assert.InEpsilon(t, 0.25, h.Percentile(1.1), valueEpsilon)
 }

 // Verifies that Empty() returns true on an empty histogram and false otherwise.
 func TestEmptyHistogram(t *testing.T) {
-	options, err := NewLinearHistogramOptions(1.0, 0.1)
+	options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
 	assert.Nil(t, err)
 	h := NewHistogram(options)
 	assert.Nil(t, err)
 	assert.True(t, h.Empty())
-	h.AddSample(0.1, 1.0) // Add a sample.
+	h.AddSample(0.1, weightEpsilon*2.5) // Sample weight = epsilon * 2.5.
 	assert.False(t, h.Empty())
-	h.AddSample(0.1, -0.5) // Remove part of a sample.
+	h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 1.5.
 	assert.False(t, h.Empty())
-	h.AddSample(0.1, -0.5) // Remove the remaining part of the sample.
+	h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 0.5.
 	assert.True(t, h.Empty())
 }
--- a/vertical-pod-autoscaler/recommender/util/circularbuffer.go
+++ b/vertical-pod-autoscaler/recommender/util/circularbuffer.go
@ -16,10 +16,10 @@ limitations under the License.

 package util

-// CircularBuffer is a queue with a fixed maximum capacity. Elements are
+// FloatSlidingWindow is a buffer with a fixed capacity. Elements are
 // inserted/removed in the FIFO order. Elements are removed from the buffer
-// when it runs out of capacity and a new element is inserted.
-type CircularBuffer interface {
+// only when it runs out of capacity and a new element is inserted.
+type FloatSlidingWindow interface {
 	// Add a value to the end of the queue. On overflow returns true and the
 	// oldest value, which is also removed from the buffer. Otherwise
 	// returns (false, _).
@ -35,29 +35,30 @@ type CircularBuffer interface {
 	Head() *float64
 }

-// NewCircularBuffer returns a new instance of CircularBufferImpl with a given
-// size.
-func NewCircularBuffer(size int) CircularBuffer {
+// NewFloatSlidingWindow returns a new instance of FloatSlidingWindowImpl with a
+// given size.
+func NewFloatSlidingWindow(size int) FloatSlidingWindow {
 	if size < 1 {
 		panic("Buffer size must be at least 1")
 	}
-	return &circularBuffer{make([]float64, 0), -1, size, false}
+	return &floatSlidingWindow{make([]float64, 0), -1, size, false}
 }

-type circularBuffer struct {
+type floatSlidingWindow struct {
 	buffer []float64
 	// Index of the most recently added element.
 	head int
 	// Max number of elements.
 	capacity int
-	// The number of elements in the buffer equals capacity.
+	// Whether the buffer is full, i.e. the number of elements in the buffer
+	// equals capacity.
 	isFull bool
 }

 // Head returns a pointer to the most recently added element. The pointer can be
 // used to modify the last element. It is only valid until the next call to
 // Push(). Returns nil if called on an empty buffer.
-func (b *circularBuffer) Head() *float64 {
+func (b *floatSlidingWindow) Head() *float64 {
 	if b.head == -1 {
 		return nil
 	}
@ -66,14 +67,14 @@ func (b *circularBuffer) Head() *float64 {

 // Contents returns the elements in the buffer, ordered by time of insertion
 // (oldest first).
-func (b *circularBuffer) Contents() []float64 {
+func (b *floatSlidingWindow) Contents() []float64 {
 	return append(b.buffer[b.head+1:], b.buffer[:b.head+1]...)
 }

-// Push adds a value to the end of the queue. On overflow returns true and the
-// oldest value, which is also removed from the buffer. Otherwise returns
+// Push adds a value to the end of the window. On overflow returns true and the
+// oldest value, which is also removed from the window. Otherwise returns
 // (false, _).
-func (b *circularBuffer) Push(value float64) (bool, float64) {
+func (b *floatSlidingWindow) Push(value float64) (bool, float64) {
 	b.head++
 	if b.head == b.capacity {
 		b.head = 0
--- a/vertical-pod-autoscaler/recommender/util/circularbuffer_test.go
+++ b/vertical-pod-autoscaler/recommender/util/circularbuffer_test.go
@ -22,8 +22,8 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-func TestCircularBuffer(t *testing.T) {
-	b := NewCircularBuffer(3)
+func TestFloatSlidingWindow(t *testing.T) {
+	b := NewFloatSlidingWindow(3)
 	overflow, discarded := b.Push(1.0)
 	assert.False(t, overflow)
 	assert.Equal(t, 1.0, *b.Head())