diff options
Diffstat (limited to 'vendor/github.com/codahale/hdrhistogram/hdr.go')
-rw-r--r-- | vendor/github.com/codahale/hdrhistogram/hdr.go | 564 |
1 files changed, 564 insertions, 0 deletions
diff --git a/vendor/github.com/codahale/hdrhistogram/hdr.go b/vendor/github.com/codahale/hdrhistogram/hdr.go new file mode 100644 index 000000000..c97842926 --- /dev/null +++ b/vendor/github.com/codahale/hdrhistogram/hdr.go @@ -0,0 +1,564 @@ +// Package hdrhistogram provides an implementation of Gil Tene's HDR Histogram +// data structure. The HDR Histogram allows for fast and accurate analysis of +// the extreme ranges of data with non-normal distributions, like latency. +package hdrhistogram + +import ( + "fmt" + "math" +) + +// A Bracket is a part of a cumulative distribution. +type Bracket struct { + Quantile float64 + Count, ValueAt int64 +} + +// A Snapshot is an exported view of a Histogram, useful for serializing them. +// A Histogram can be constructed from it by passing it to Import. +type Snapshot struct { + LowestTrackableValue int64 + HighestTrackableValue int64 + SignificantFigures int64 + Counts []int64 +} + +// A Histogram is a lossy data structure used to record the distribution of +// non-normally distributed data (like latency) with a high degree of accuracy +// and a bounded degree of precision. +type Histogram struct { + lowestTrackableValue int64 + highestTrackableValue int64 + unitMagnitude int64 + significantFigures int64 + subBucketHalfCountMagnitude int32 + subBucketHalfCount int32 + subBucketMask int64 + subBucketCount int32 + bucketCount int32 + countsLen int32 + totalCount int64 + counts []int64 +} + +// New returns a new Histogram instance capable of tracking values in the given +// range and with the given amount of precision. +func New(minValue, maxValue int64, sigfigs int) *Histogram { + if sigfigs < 1 || 5 < sigfigs { + panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs)) + } + + largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs) + subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution)))) + + subBucketHalfCountMagnitude := subBucketCountMagnitude + if subBucketHalfCountMagnitude < 1 { + subBucketHalfCountMagnitude = 1 + } + subBucketHalfCountMagnitude-- + + unitMagnitude := int32(math.Floor(math.Log2(float64(minValue)))) + if unitMagnitude < 0 { + unitMagnitude = 0 + } + + subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1)) + + subBucketHalfCount := subBucketCount / 2 + subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude) + + // determine exponent range needed to support the trackable value with no + // overflow: + smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude) + bucketsNeeded := int32(1) + for smallestUntrackableValue < maxValue { + smallestUntrackableValue <<= 1 + bucketsNeeded++ + } + + bucketCount := bucketsNeeded + countsLen := (bucketCount + 1) * (subBucketCount / 2) + + return &Histogram{ + lowestTrackableValue: minValue, + highestTrackableValue: maxValue, + unitMagnitude: int64(unitMagnitude), + significantFigures: int64(sigfigs), + subBucketHalfCountMagnitude: subBucketHalfCountMagnitude, + subBucketHalfCount: subBucketHalfCount, + subBucketMask: subBucketMask, + subBucketCount: subBucketCount, + bucketCount: bucketCount, + countsLen: countsLen, + totalCount: 0, + counts: make([]int64, countsLen), + } +} + +// ByteSize returns an estimate of the amount of memory allocated to the +// histogram in bytes. +// +// N.B.: This does not take into account the overhead for slices, which are +// small, constant, and specific to the compiler version. +func (h *Histogram) ByteSize() int { + return 6*8 + 5*4 + len(h.counts)*8 +} + +// Merge merges the data stored in the given histogram with the receiver, +// returning the number of recorded values which had to be dropped. +func (h *Histogram) Merge(from *Histogram) (dropped int64) { + i := from.rIterator() + for i.next() { + v := i.valueFromIdx + c := i.countAtIdx + + if h.RecordValues(v, c) != nil { + dropped += c + } + } + + return +} + +// TotalCount returns total number of values recorded. +func (h *Histogram) TotalCount() int64 { + return h.totalCount +} + +// Max returns the approximate maximum recorded value. +func (h *Histogram) Max() int64 { + var max int64 + i := h.iterator() + for i.next() { + if i.countAtIdx != 0 { + max = i.highestEquivalentValue + } + } + return h.highestEquivalentValue(max) +} + +// Min returns the approximate minimum recorded value. +func (h *Histogram) Min() int64 { + var min int64 + i := h.iterator() + for i.next() { + if i.countAtIdx != 0 && min == 0 { + min = i.highestEquivalentValue + break + } + } + return h.lowestEquivalentValue(min) +} + +// Mean returns the approximate arithmetic mean of the recorded values. +func (h *Histogram) Mean() float64 { + if h.totalCount == 0 { + return 0 + } + var total int64 + i := h.iterator() + for i.next() { + if i.countAtIdx != 0 { + total += i.countAtIdx * h.medianEquivalentValue(i.valueFromIdx) + } + } + return float64(total) / float64(h.totalCount) +} + +// StdDev returns the approximate standard deviation of the recorded values. +func (h *Histogram) StdDev() float64 { + if h.totalCount == 0 { + return 0 + } + + mean := h.Mean() + geometricDevTotal := 0.0 + + i := h.iterator() + for i.next() { + if i.countAtIdx != 0 { + dev := float64(h.medianEquivalentValue(i.valueFromIdx)) - mean + geometricDevTotal += (dev * dev) * float64(i.countAtIdx) + } + } + + return math.Sqrt(geometricDevTotal / float64(h.totalCount)) +} + +// Reset deletes all recorded values and restores the histogram to its original +// state. +func (h *Histogram) Reset() { + h.totalCount = 0 + for i := range h.counts { + h.counts[i] = 0 + } +} + +// RecordValue records the given value, returning an error if the value is out +// of range. +func (h *Histogram) RecordValue(v int64) error { + return h.RecordValues(v, 1) +} + +// RecordCorrectedValue records the given value, correcting for stalls in the +// recording process. This only works for processes which are recording values +// at an expected interval (e.g., doing jitter analysis). Processes which are +// recording ad-hoc values (e.g., latency for incoming requests) can't take +// advantage of this. +func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error { + if err := h.RecordValue(v); err != nil { + return err + } + + if expectedInterval <= 0 || v <= expectedInterval { + return nil + } + + missingValue := v - expectedInterval + for missingValue >= expectedInterval { + if err := h.RecordValue(missingValue); err != nil { + return err + } + missingValue -= expectedInterval + } + + return nil +} + +// RecordValues records n occurrences of the given value, returning an error if +// the value is out of range. +func (h *Histogram) RecordValues(v, n int64) error { + idx := h.countsIndexFor(v) + if idx < 0 || int(h.countsLen) <= idx { + return fmt.Errorf("value %d is too large to be recorded", v) + } + h.counts[idx] += n + h.totalCount += n + + return nil +} + +// ValueAtQuantile returns the recorded value at the given quantile (0..100). +func (h *Histogram) ValueAtQuantile(q float64) int64 { + if q > 100 { + q = 100 + } + + total := int64(0) + countAtPercentile := int64(((q / 100) * float64(h.totalCount)) + 0.5) + + i := h.iterator() + for i.next() { + total += i.countAtIdx + if total >= countAtPercentile { + return h.highestEquivalentValue(i.valueFromIdx) + } + } + + return 0 +} + +// CumulativeDistribution returns an ordered list of brackets of the +// distribution of recorded values. +func (h *Histogram) CumulativeDistribution() []Bracket { + var result []Bracket + + i := h.pIterator(1) + for i.next() { + result = append(result, Bracket{ + Quantile: i.percentile, + Count: i.countToIdx, + ValueAt: i.highestEquivalentValue, + }) + } + + return result +} + +// SignificantFigures returns the significant figures used to create the +// histogram +func (h *Histogram) SignificantFigures() int64 { + return h.significantFigures +} + +// LowestTrackableValue returns the lower bound on values that will be added +// to the histogram +func (h *Histogram) LowestTrackableValue() int64 { + return h.lowestTrackableValue +} + +// HighestTrackableValue returns the upper bound on values that will be added +// to the histogram +func (h *Histogram) HighestTrackableValue() int64 { + return h.highestTrackableValue +} + +// Histogram bar for plotting +type Bar struct { + From, To, Count int64 +} + +// Pretty print as csv for easy plotting +func (b Bar) String() string { + return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count) +} + +// Distribution returns an ordered list of bars of the +// distribution of recorded values, counts can be normalized to a probability +func (h *Histogram) Distribution() (result []Bar) { + i := h.iterator() + for i.next() { + result = append(result, Bar{ + Count: i.countAtIdx, + From: h.lowestEquivalentValue(i.valueFromIdx), + To: i.highestEquivalentValue, + }) + } + + return result +} + +// Equals returns true if the two Histograms are equivalent, false if not. +func (h *Histogram) Equals(other *Histogram) bool { + switch { + case + h.lowestTrackableValue != other.lowestTrackableValue, + h.highestTrackableValue != other.highestTrackableValue, + h.unitMagnitude != other.unitMagnitude, + h.significantFigures != other.significantFigures, + h.subBucketHalfCountMagnitude != other.subBucketHalfCountMagnitude, + h.subBucketHalfCount != other.subBucketHalfCount, + h.subBucketMask != other.subBucketMask, + h.subBucketCount != other.subBucketCount, + h.bucketCount != other.bucketCount, + h.countsLen != other.countsLen, + h.totalCount != other.totalCount: + return false + default: + for i, c := range h.counts { + if c != other.counts[i] { + return false + } + } + } + return true +} + +// Export returns a snapshot view of the Histogram. This can be later passed to +// Import to construct a new Histogram with the same state. +func (h *Histogram) Export() *Snapshot { + return &Snapshot{ + LowestTrackableValue: h.lowestTrackableValue, + HighestTrackableValue: h.highestTrackableValue, + SignificantFigures: h.significantFigures, + Counts: append([]int64(nil), h.counts...), // copy + } +} + +// Import returns a new Histogram populated from the Snapshot data (which the +// caller must stop accessing). +func Import(s *Snapshot) *Histogram { + h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures)) + h.counts = s.Counts + totalCount := int64(0) + for i := int32(0); i < h.countsLen; i++ { + countAtIndex := h.counts[i] + if countAtIndex > 0 { + totalCount += countAtIndex + } + } + h.totalCount = totalCount + return h +} + +func (h *Histogram) iterator() *iterator { + return &iterator{ + h: h, + subBucketIdx: -1, + } +} + +func (h *Histogram) rIterator() *rIterator { + return &rIterator{ + iterator: iterator{ + h: h, + subBucketIdx: -1, + }, + } +} + +func (h *Histogram) pIterator(ticksPerHalfDistance int32) *pIterator { + return &pIterator{ + iterator: iterator{ + h: h, + subBucketIdx: -1, + }, + ticksPerHalfDistance: ticksPerHalfDistance, + } +} + +func (h *Histogram) sizeOfEquivalentValueRange(v int64) int64 { + bucketIdx := h.getBucketIndex(v) + subBucketIdx := h.getSubBucketIdx(v, bucketIdx) + adjustedBucket := bucketIdx + if subBucketIdx >= h.subBucketCount { + adjustedBucket++ + } + return int64(1) << uint(h.unitMagnitude+int64(adjustedBucket)) +} + +func (h *Histogram) valueFromIndex(bucketIdx, subBucketIdx int32) int64 { + return int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude) +} + +func (h *Histogram) lowestEquivalentValue(v int64) int64 { + bucketIdx := h.getBucketIndex(v) + subBucketIdx := h.getSubBucketIdx(v, bucketIdx) + return h.valueFromIndex(bucketIdx, subBucketIdx) +} + +func (h *Histogram) nextNonEquivalentValue(v int64) int64 { + return h.lowestEquivalentValue(v) + h.sizeOfEquivalentValueRange(v) +} + +func (h *Histogram) highestEquivalentValue(v int64) int64 { + return h.nextNonEquivalentValue(v) - 1 +} + +func (h *Histogram) medianEquivalentValue(v int64) int64 { + return h.lowestEquivalentValue(v) + (h.sizeOfEquivalentValueRange(v) >> 1) +} + +func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 { + return h.counts[h.countsIndex(bucketIdx, subBucketIdx)] +} + +func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 { + bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude) + offsetInBucket := subBucketIdx - h.subBucketHalfCount + return bucketBaseIdx + offsetInBucket +} + +func (h *Histogram) getBucketIndex(v int64) int32 { + pow2Ceiling := bitLen(v | h.subBucketMask) + return int32(pow2Ceiling - int64(h.unitMagnitude) - + int64(h.subBucketHalfCountMagnitude+1)) +} + +func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 { + return int32(v >> uint(int64(idx)+int64(h.unitMagnitude))) +} + +func (h *Histogram) countsIndexFor(v int64) int { + bucketIdx := h.getBucketIndex(v) + subBucketIdx := h.getSubBucketIdx(v, bucketIdx) + return int(h.countsIndex(bucketIdx, subBucketIdx)) +} + +type iterator struct { + h *Histogram + bucketIdx, subBucketIdx int32 + countAtIdx, countToIdx, valueFromIdx int64 + highestEquivalentValue int64 +} + +func (i *iterator) next() bool { + if i.countToIdx >= i.h.totalCount { + return false + } + + // increment bucket + i.subBucketIdx++ + if i.subBucketIdx >= i.h.subBucketCount { + i.subBucketIdx = i.h.subBucketHalfCount + i.bucketIdx++ + } + + if i.bucketIdx >= i.h.bucketCount { + return false + } + + i.countAtIdx = i.h.getCountAtIndex(i.bucketIdx, i.subBucketIdx) + i.countToIdx += i.countAtIdx + i.valueFromIdx = i.h.valueFromIndex(i.bucketIdx, i.subBucketIdx) + i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx) + + return true +} + +type rIterator struct { + iterator + countAddedThisStep int64 +} + +func (r *rIterator) next() bool { + for r.iterator.next() { + if r.countAtIdx != 0 { + r.countAddedThisStep = r.countAtIdx + return true + } + } + return false +} + +type pIterator struct { + iterator + seenLastValue bool + ticksPerHalfDistance int32 + percentileToIteratorTo float64 + percentile float64 +} + +func (p *pIterator) next() bool { + if !(p.countToIdx < p.h.totalCount) { + if p.seenLastValue { + return false + } + + p.seenLastValue = true + p.percentile = 100 + + return true + } + + if p.subBucketIdx == -1 && !p.iterator.next() { + return false + } + + var done = false + for !done { + currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount) + if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile { + p.percentile = p.percentileToIteratorTo + halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1)) + percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance + p.percentileToIteratorTo += 100.0 / percentileReportingTicks + return true + } + done = !p.iterator.next() + } + + return true +} + +func bitLen(x int64) (n int64) { + for ; x >= 0x8000; x >>= 16 { + n += 16 + } + if x >= 0x80 { + x >>= 8 + n += 8 + } + if x >= 0x8 { + x >>= 4 + n += 4 + } + if x >= 0x2 { + x >>= 2 + n += 2 + } + if x >= 0x1 { + n++ + } + return +} |