Change the histogram options such that the largest bucket start is always >= maxValue.

This commit is contained in:
kgrygiel 2017-04-20 15:03:00 +02:00
parent 1e90342183
commit 61d2423dca
4 changed files with 31 additions and 21 deletions

View File

@ -33,7 +33,7 @@ type Histogram interface {
SubtractSample(value float64, weight float64)
// Returns true if the histogram is empty.
Empty() bool
IsEmpty() bool
}
// NewHistogram returns a new Histogram instance using given options.
@ -99,7 +99,7 @@ func (h *histogram) SubtractSample(value float64, weight float64) {
}
func (h *histogram) Percentile(percentile float64) float64 {
if h.Empty() {
if h.IsEmpty() {
return 0.0
}
partialSum := 0.0
@ -113,14 +113,15 @@ func (h *histogram) Percentile(percentile float64) float64 {
}
bucketStart := (*h.options).GetBucketStart(bucket)
if bucket < (*h.options).NumBuckets()-1 {
// Return the middle of the bucket.
nextBucketStart := (*h.options).GetBucketStart(bucket + 1)
return (bucketStart + nextBucketStart) / 2.0
// Return the middle point between the bucket boundaries.
bucketEnd := (*h.options).GetBucketStart(bucket + 1)
return (bucketStart + bucketEnd) / 2.0
}
// For the last bucket return the bucket start.
// Return the start of the last bucket (note that the last bucket
// doesn't have an upper bound).
return bucketStart
}
func (h *histogram) Empty() bool {
func (h *histogram) IsEmpty() bool {
return h.bucketWeight[h.minBucket] < (*h.options).Epsilon()
}

View File

@ -37,14 +37,15 @@ type HistogramOptions interface {
}
// NewLinearHistogramOptions returns HistogramOptions describing a histogram
// with a given number of fixed-size buckets, with the first bucket starting
// at 0.0. Requires maxValue > 0, bucketSize > 0, epsilon > 0.
// with a given number of fixed-size buckets, with the first bucket start at 0.0
// and the last bucket start larger or equal to maxValue.
// Requires maxValue > 0, bucketSize > 0, epsilon > 0.
func NewLinearHistogramOptions(
maxValue float64, bucketSize float64, epsilon float64) (HistogramOptions, error) {
if maxValue <= 0.0 || bucketSize <= 0.0 || epsilon <= 0.0 {
return nil, errors.New("maxValue and bucketSize must both be positive")
}
numBuckets := int(math.Ceil(maxValue / bucketSize))
numBuckets := int(math.Ceil(maxValue/bucketSize)) + 1
return &linearHistogramOptions{numBuckets, bucketSize, epsilon}, nil
}
@ -52,6 +53,7 @@ func NewLinearHistogramOptions(
// histogram with exponentially growing bucket boundaries. The first bucket
// covers the range [0..firstBucketSize). Consecutive buckets are of the form
// [x(n)..x(n) * ratio) for n = 1 .. numBuckets - 1.
// The last bucket start is larger or equal to maxValue.
// Requires maxValue > 0, firstBucketSize > 0, ratio > 1, epsilon > 0.
func NewExponentialHistogramOptions(
maxValue float64, firstBucketSize float64, ratio float64, epsilon float64) (HistogramOptions, error) {
@ -59,7 +61,7 @@ func NewExponentialHistogramOptions(
return nil, errors.New(
"maxValue, firstBucketSize and epsilon must be > 0.0, ratio must be > 1.0")
}
numBuckets := int(math.Ceil(math.Log(maxValue/firstBucketSize)/math.Log(ratio))) + 1
numBuckets := int(math.Ceil(math.Log(maxValue/firstBucketSize)/math.Log(ratio))) + 2
return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio, epsilon}, nil
}

View File

@ -31,15 +31,15 @@ func TestLinearHistogramOptions(t *testing.T) {
o, err := NewLinearHistogramOptions(5.0, 0.3, epsilon)
assert.Nil(t, err)
assert.Equal(t, epsilon, o.Epsilon())
assert.Equal(t, 17, o.NumBuckets())
assert.Equal(t, 18, o.NumBuckets())
assert.Equal(t, 0.0, o.GetBucketStart(0))
assert.Equal(t, 4.8, o.GetBucketStart(16))
assert.Equal(t, 5.1, o.GetBucketStart(17))
assert.Equal(t, 0, o.FindBucket(-1.0))
assert.Equal(t, 0, o.FindBucket(0.0))
assert.Equal(t, 4, o.FindBucket(1.3))
assert.Equal(t, 16, o.FindBucket(100.0))
assert.Equal(t, 17, o.FindBucket(100.0))
}
// Test all methods of ExponentialHistogramOptions using a sample bucketing scheme.
@ -47,17 +47,18 @@ func TestExponentialHistogramOptions(t *testing.T) {
o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0, epsilon)
assert.Nil(t, err)
assert.Equal(t, epsilon, o.Epsilon())
assert.Equal(t, 5, o.NumBuckets())
assert.Equal(t, 6, o.NumBuckets())
assert.Equal(t, 0.0, o.GetBucketStart(0))
assert.Equal(t, 10.0, o.GetBucketStart(1))
assert.Equal(t, 20.0, o.GetBucketStart(2))
assert.Equal(t, 40.0, o.GetBucketStart(3))
assert.Equal(t, 80.0, o.GetBucketStart(4))
assert.Equal(t, 160.0, o.GetBucketStart(5))
assert.Equal(t, 0, o.FindBucket(-1.0))
assert.Equal(t, 0, o.FindBucket(9.99))
assert.Equal(t, 1, o.FindBucket(10.0))
assert.Equal(t, 2, o.FindBucket(20.0))
assert.Equal(t, 4, o.FindBucket(100.0))
assert.Equal(t, 5, o.FindBucket(200.0))
}

View File

@ -75,19 +75,25 @@ func TestPercentileOutOfBounds(t *testing.T) {
assert.InEpsilon(t, 0.15, h.Percentile(-0.1), valueEpsilon)
assert.InEpsilon(t, 0.25, h.Percentile(1.1), valueEpsilon)
// Fill the boundary buckets.
h.AddSample(0.0, 0.1)
h.AddSample(1.0, 0.2)
assert.InEpsilon(t, 0.05, h.Percentile(-0.1), valueEpsilon)
assert.InEpsilon(t, 1.0, h.Percentile(1.1), valueEpsilon)
}
// Verifies that Empty() returns true on an empty histogram and false otherwise.
// Verifies that IsEmpty() returns true on an empty histogram and false otherwise.
func TestEmptyHistogram(t *testing.T) {
options, err := NewLinearHistogramOptions(1.0, 0.1, weightEpsilon)
assert.Nil(t, err)
h := NewHistogram(options)
assert.Nil(t, err)
assert.True(t, h.Empty())
assert.True(t, h.IsEmpty())
h.AddSample(0.1, weightEpsilon*2.5) // Sample weight = epsilon * 2.5.
assert.False(t, h.Empty())
assert.False(t, h.IsEmpty())
h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 1.5.
assert.False(t, h.Empty())
assert.False(t, h.IsEmpty())
h.SubtractSample(0.1, weightEpsilon) // Sample weight = epsilon * 0.5.
assert.True(t, h.Empty())
assert.True(t, h.IsEmpty())
}