* Add Histogram.RemoveSample().

* Add HistogramOptions.Epsilon() (bucket weight precision).
* Return the middle of the bucket, not the bucket start in Percentile().
* Rename CircularBuffer to SlidingWindow.
* Fix field names and comments.
This commit is contained in:
kgrygiel 2017-04-19 17:33:14 +02:00
parent b1a7703ab1
commit 1e90342183
6 changed files with 125 additions and 82 deletions

View File

@ -25,11 +25,13 @@ type Histogram interface {
// If the histogram is empty, Percentile() returns 0.0.
Percentile(percentile float64) float64
// Add a sample with a given value and weight. A sample can have
// negative weight, as long as the total weight of samples with the
// given value is not negative.
// Add a sample with a given value and weight.
AddSample(value float64, weight float64)
// Remove a sample with a given value and weight. Note that the total
// weight of samples with a given value cannot be negative.
SubtractSample(value float64, weight float64)
// Returns true if the histogram is empty.
Empty() bool
}
@ -41,18 +43,19 @@ func NewHistogram(options HistogramOptions) Histogram {
options.NumBuckets() - 1, 0}
}
// Simple bucket-based implementation of the Histogram interface. Samples added
// to the histogram are rounded down to the bucket boundary. Each bucket holds
// the total weight of samples that belong to it.
// Simple bucket-based implementation of the Histogram interface. Each bucket
// holds the total weight of samples that belong to it.
// Percentile() returns the middle of the correspodning bucket.
// Resolution (bucket boundaries) of the histogram depends on the options.
// There's no interpolation within buckets (i.e. one sample falls to exactly one
// bucket).
// A bucket is considered empty if its weight is smaller than options.Epsilon().
type histogram struct {
// Bucketing scheme.
options *HistogramOptions
// Weight of samples in each bucket.
buckets []float64
// Weight of samples in all buckets.
// Cumulative weight of samples in each bucket.
bucketWeight []float64
// Total cumulative weight of samples in all buckets.
totalWeight float64
// Index of the first non-empty bucket if there's any. Otherwise index
// of the last bucket.
@ -62,18 +65,36 @@ type histogram struct {
}
func (h *histogram) AddSample(value float64, weight float64) {
if weight < 0.0 {
panic("sample weight must be non-negative")
}
bucket := (*h.options).FindBucket(value)
if h.buckets[bucket]+weight <= 0.0 {
h.clearBucket(bucket)
} else {
h.buckets[bucket] += weight
h.totalWeight += weight
if bucket < h.minBucket {
h.minBucket = bucket
}
if bucket > h.maxBucket {
h.maxBucket = bucket
}
h.bucketWeight[bucket] += weight
h.totalWeight += weight
if bucket < h.minBucket {
h.minBucket = bucket
}
if bucket > h.maxBucket {
h.maxBucket = bucket
}
}
func (h *histogram) SubtractSample(value float64, weight float64) {
if weight < 0.0 {
panic("sample weight must be non-negative")
}
bucket := (*h.options).FindBucket(value)
epsilon := (*h.options).Epsilon()
if weight > h.bucketWeight[bucket]-epsilon {
weight = h.bucketWeight[bucket]
}
h.totalWeight -= weight
h.bucketWeight[bucket] -= weight
lastBucket := (*h.options).NumBuckets() - 1
for h.bucketWeight[h.minBucket] < epsilon && h.minBucket < lastBucket {
h.minBucket++
}
for h.bucketWeight[h.maxBucket] < epsilon && h.maxBucket > 0 {
h.maxBucket--
}
}
@ -85,26 +106,21 @@ func (h *histogram) Percentile(percentile float64) float64 {
threshold := percentile * h.totalWeight
bucket := h.minBucket
for ; bucket < h.maxBucket; bucket++ {
partialSum += h.buckets[bucket]
partialSum += h.bucketWeight[bucket]
if partialSum >= threshold {
break
}
}
return (*h.options).GetBucketStart(bucket)
bucketStart := (*h.options).GetBucketStart(bucket)
if bucket < (*h.options).NumBuckets()-1 {
// Return the middle of the bucket.
nextBucketStart := (*h.options).GetBucketStart(bucket + 1)
return (bucketStart + nextBucketStart) / 2.0
}
// For the last bucket return the bucket start.
return bucketStart
}
func (h *histogram) Empty() bool {
return h.totalWeight == 0.0
}
func (h *histogram) clearBucket(bucket int) {
h.totalWeight -= h.buckets[bucket]
h.buckets[bucket] = 0.0
lastBucket := (*h.options).NumBuckets() - 1
for h.buckets[h.minBucket] == 0.0 && h.minBucket < lastBucket {
h.minBucket++
}
for h.buckets[h.maxBucket] == 0.0 && h.maxBucket > 0 {
h.maxBucket--
}
return h.bucketWeight[h.minBucket] < (*h.options).Epsilon()
}

View File

@ -23,6 +23,7 @@ import (
// HistogramOptions define the number and size of buckets of a histogram.
type HistogramOptions interface {
// Returns the number of buckets in the histogram.
NumBuckets() int
// Returns the index of the bucket to which the given value falls.
// If the value is outside of the range covered by the histogram, it
@ -31,44 +32,48 @@ type HistogramOptions interface {
// Returns the start of the bucket with a given index. If the index is
// outside the [0..NumBuckets() - 1] range, the result is undefined.
GetBucketStart(bucket int) float64
// Returns the minimum weight for a bucket to be considered non-empty.
Epsilon() float64
}
// NewLinearHistogramOptions returns HistogramOptions describing a histogram
// with a given number of fixed-size buckets, with the first bucket starting
// at 0.0. Requires maxValue > 0, bucketSize > 0.
// at 0.0. Requires maxValue > 0, bucketSize > 0, epsilon > 0.
func NewLinearHistogramOptions(
maxValue float64, bucketSize float64) (HistogramOptions, error) {
if maxValue <= 0.0 || bucketSize <= 0.0 {
maxValue float64, bucketSize float64, epsilon float64) (HistogramOptions, error) {
if maxValue <= 0.0 || bucketSize <= 0.0 || epsilon <= 0.0 {
return nil, errors.New("maxValue and bucketSize must both be positive")
}
numBuckets := int(math.Ceil(maxValue / bucketSize))
return &linearHistogramOptions{numBuckets, bucketSize}, nil
return &linearHistogramOptions{numBuckets, bucketSize, epsilon}, nil
}
// NewExponentialHistogramOptions returns HistogramOptions describing a
// histogram with exponentially growing bucket boundaries. The first bucket
// covers the range [0..firstBucketSize). Consecutive buckets are of the form
// [x(n)..x(n) * ratio) for n = 1 .. numBuckets - 1.
// Requires maxValue > 0, firstBucketSize > 0, ratio > 1.
// Requires maxValue > 0, firstBucketSize > 0, ratio > 1, epsilon > 0.
func NewExponentialHistogramOptions(
maxValue float64, firstBucketSize float64, ratio float64) (HistogramOptions, error) {
if maxValue <= 0.0 || firstBucketSize <= 0.0 || ratio <= 1.0 {
maxValue float64, firstBucketSize float64, ratio float64, epsilon float64) (HistogramOptions, error) {
if maxValue <= 0.0 || firstBucketSize <= 0.0 || ratio <= 1.0 || epsilon <= 0.0 {
return nil, errors.New(
"maxValue and firstBucketSize must be > 0.0, ratio must be > 1.0")
"maxValue, firstBucketSize and epsilon must be > 0.0, ratio must be > 1.0")
}
numBuckets := int(math.Ceil(math.Log(maxValue/firstBucketSize)/math.Log(ratio))) + 1
return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio}, nil
return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio, epsilon}, nil
}
type linearHistogramOptions struct {
numBuckets int
bucketSize float64
epsilon float64
}
type exponentialHistogramOptions struct {
numBuckets int
firstBucketSize float64
ratio float64
epsilon float64
}
func (o *linearHistogramOptions) NumBuckets() int {
@ -90,6 +95,10 @@ func (o *linearHistogramOptions) GetBucketStart(bucket int) float64 {
return float64(bucket) * o.bucketSize
}
func (o *linearHistogramOptions) Epsilon() float64 {
return o.epsilon
}
func (o *exponentialHistogramOptions) NumBuckets() int {
return o.numBuckets
}
@ -111,3 +120,7 @@ func (o *exponentialHistogramOptions) GetBucketStart(bucket int) float64 {
}
return o.firstBucketSize * math.Pow(o.ratio, float64(bucket-1))
}
func (o *exponentialHistogramOptions) Epsilon() float64 {
return o.epsilon
}

View File

@ -22,10 +22,15 @@ import (
"github.com/stretchr/testify/assert"
)
var (
epsilon = 0.001
)
// Test all methods of LinearHistogramOptions using a sample bucketing scheme.
func TestLinearHistogramOptions(t *testing.T) {
o, err := NewLinearHistogramOptions(5.0, 0.3)
o, err := NewLinearHistogramOptions(5.0, 0.3, epsilon)
assert.Nil(t, err)
assert.Equal(t, epsilon, o.Epsilon())
assert.Equal(t, 17, o.NumBuckets())
assert.Equal(t, 0.0, o.GetBucketStart(0))
@ -39,8 +44,9 @@ func TestLinearHistogramOptions(t *testing.T) {
// Test all methods of ExponentialHistogramOptions using a sample bucketing scheme.
func TestExponentialHistogramOptions(t *testing.T) {
o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0)
o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0, epsilon)
assert.Nil(t, err)
assert.Equal(t, epsilon, o.Epsilon())
assert.Equal(t, 5, o.NumBuckets())
assert.Equal(t, 0.0, o.GetBucketStart(0))

View File

@ -22,10 +22,17 @@ import (
"github.com/stretchr/testify/assert"
)
var (
// Minimum precision of histogram values (relative).
valueEpsilon = 1e-15
// Minimum precision of histogram weights (absolute).
weightEpsilon = 1e-15
)
// Verifies that Percentile() returns 0.0 when called on an empty histogram for
// any percentile.
func TestPercentilesEmptyHistogram(t *testing.T) {
options, err := NewLinearHistogramOptions(1.0, 0.1)
options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
assert.Nil(t, err)
h := NewHistogram(options)
for p := -0.5; p <= 1.5; p += 0.5 {
@ -36,51 +43,51 @@ func TestPercentilesEmptyHistogram(t *testing.T) {
// Verifies that Percentile() returns the correct values of selected
// percentiles on the following histogram: { 1: 1, 2: 2, 3: 3, 4: 4 }.
func TestPercentiles(t *testing.T) {
options, err := NewLinearHistogramOptions(10.0, 1.0)
options, err := NewLinearHistogramOptions(10.0, 1.0, weightEpsilon)
assert.Nil(t, err)
h := NewHistogram(options)
for i := 1; i <= 4; i++ {
h.AddSample(float64(i), float64(i))
}
assert.Equal(t, 1.0, h.Percentile(0.0))
assert.Equal(t, 1.0, h.Percentile(0.1))
assert.Equal(t, 2.0, h.Percentile(0.2))
assert.Equal(t, 2.0, h.Percentile(0.3))
assert.Equal(t, 3.0, h.Percentile(0.4))
assert.Equal(t, 3.0, h.Percentile(0.5))
assert.Equal(t, 3.0, h.Percentile(0.6))
assert.Equal(t, 4.0, h.Percentile(0.7))
assert.Equal(t, 4.0, h.Percentile(0.8))
assert.Equal(t, 4.0, h.Percentile(0.9))
assert.Equal(t, 4.0, h.Percentile(1.0))
assert.InEpsilon(t, 1.5, h.Percentile(0.0), valueEpsilon)
assert.InEpsilon(t, 1.5, h.Percentile(0.1), valueEpsilon)
assert.InEpsilon(t, 2.5, h.Percentile(0.2), valueEpsilon)
assert.InEpsilon(t, 2.5, h.Percentile(0.3), valueEpsilon)
assert.InEpsilon(t, 3.5, h.Percentile(0.4), valueEpsilon)
assert.InEpsilon(t, 3.5, h.Percentile(0.5), valueEpsilon)
assert.InEpsilon(t, 3.5, h.Percentile(0.6), valueEpsilon)
assert.InEpsilon(t, 4.5, h.Percentile(0.7), valueEpsilon)
assert.InEpsilon(t, 4.5, h.Percentile(0.8), valueEpsilon)
assert.InEpsilon(t, 4.5, h.Percentile(0.9), valueEpsilon)
assert.InEpsilon(t, 4.5, h.Percentile(1.0), valueEpsilon)
}
// Verifies that querying percentile < 0.0 returns the minimum value in the
// histogram, while querying percentile > 1.0 returns the maximum of the
// histogram.
func TestPercentileOutOfBounds(t *testing.T) {
options, err := NewLinearHistogramOptions(1.0, 0.1)
options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
assert.Nil(t, err)
h := NewHistogram(options)
assert.Nil(t, err)
h.AddSample(0.1, 0.1)
h.AddSample(0.2, 0.2)
assert.Equal(t, 0.1, h.Percentile(-0.1))
assert.Equal(t, 0.2, h.Percentile(1.1))
assert.InEpsilon(t, 0.15, h.Percentile(-0.1), valueEpsilon)
assert.InEpsilon(t, 0.25, h.Percentile(1.1), valueEpsilon)
}
// Verifies that Empty() returns true on an empty histogram and false otherwise.
func TestEmptyHistogram(t *testing.T) {
options, err := NewLinearHistogramOptions(1.0, 0.1)
options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
assert.Nil(t, err)
h := NewHistogram(options)
assert.Nil(t, err)
assert.True(t, h.Empty())
h.AddSample(0.1, 1.0) // Add a sample.
h.AddSample(0.1, weightEpsilon*2.5) // Sample weight = epsilon * 2.5.
assert.False(t, h.Empty())
h.AddSample(0.1, -0.5) // Remove part of a sample.
h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 1.5.
assert.False(t, h.Empty())
h.AddSample(0.1, -0.5) // Remove the remaining part of the sample.
h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 0.5.
assert.True(t, h.Empty())
}

View File

@ -16,10 +16,10 @@ limitations under the License.
package util
// CircularBuffer is a queue with a fixed maximum capacity. Elements are
// FloatSlidingWindow is a buffer with a fixed capacity. Elements are
// inserted/removed in the FIFO order. Elements are removed from the buffer
// when it runs out of capacity and a new element is inserted.
type CircularBuffer interface {
// only when it runs out of capacity and a new element is inserted.
type FloatSlidingWindow interface {
// Add a value to the end of the queue. On overflow returns true and the
// oldest value, which is also removed from the buffer. Otherwise
// returns (false, _).
@ -35,29 +35,30 @@ type CircularBuffer interface {
Head() *float64
}
// NewCircularBuffer returns a new instance of CircularBufferImpl with a given
// size.
func NewCircularBuffer(size int) CircularBuffer {
// NewFloatSlidingWindow returns a new instance of FloatSlidingWindowImpl with a
// given size.
func NewFloatSlidingWindow(size int) FloatSlidingWindow {
if size < 1 {
panic("Buffer size must be at least 1")
}
return &circularBuffer{make([]float64, 0), -1, size, false}
return &floatSlidingWindow{make([]float64, 0), -1, size, false}
}
type circularBuffer struct {
type floatSlidingWindow struct {
buffer []float64
// Index of the most recently added element.
head int
// Max number of elements.
capacity int
// The number of elements in the buffer equals capacity.
// Whether the buffer is full, i.e. the number of elements in the buffer
// equals capacity.
isFull bool
}
// Head returns a pointer to the most recently added element. The pointer can be
// used to modify the last element. It is only valid until the next call to
// Push(). Returns nil if called on an empty buffer.
func (b *circularBuffer) Head() *float64 {
func (b *floatSlidingWindow) Head() *float64 {
if b.head == -1 {
return nil
}
@ -66,14 +67,14 @@ func (b *circularBuffer) Head() *float64 {
// Contents returns the elements in the buffer, ordered by time of insertion
// (oldest first).
func (b *circularBuffer) Contents() []float64 {
func (b *floatSlidingWindow) Contents() []float64 {
return append(b.buffer[b.head+1:], b.buffer[:b.head+1]...)
}
// Push adds a value to the end of the queue. On overflow returns true and the
// oldest value, which is also removed from the buffer. Otherwise returns
// Push adds a value to the end of the window. On overflow returns true and the
// oldest value, which is also removed from the window. Otherwise returns
// (false, _).
func (b *circularBuffer) Push(value float64) (bool, float64) {
func (b *floatSlidingWindow) Push(value float64) (bool, float64) {
b.head++
if b.head == b.capacity {
b.head = 0

View File

@ -22,8 +22,8 @@ import (
"github.com/stretchr/testify/assert"
)
func TestCircularBuffer(t *testing.T) {
b := NewCircularBuffer(3)
func TestFloatSlidingWindow(t *testing.T) {
b := NewFloatSlidingWindow(3)
overflow, discarded := b.Push(1.0)
assert.False(t, overflow)
assert.Equal(t, 1.0, *b.Head())