Merge pull request #1000 from letsencrypt/gc-stats-fix

Fix GC pause average mixup and add a few extra GC metrics
This commit is contained in:
bifurcation 2015-10-20 22:29:04 -04:00
commit ec0d19c3e2
2 changed files with 38 additions and 15 deletions

View File

@ -375,25 +375,45 @@ func FailOnError(err error, msg string) {
// ProfileCmd runs forever, sending Go runtime statistics to StatsD.
func ProfileCmd(profileName string, stats statsd.Statter) {
var memoryStats runtime.MemStats
prevNumGC := int64(0)
c := time.Tick(1 * time.Second)
for range c {
var memoryStats runtime.MemStats
runtime.ReadMemStats(&memoryStats)
// Gather goroutine count
stats.Gauge(fmt.Sprintf("%s.Gostats.Goroutines", profileName), int64(runtime.NumGoroutine()), 1.0)
// Gather various heap metrics
stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Alloc", profileName), int64(memoryStats.HeapAlloc), 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Objects", profileName), int64(memoryStats.HeapObjects), 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Idle", profileName), int64(memoryStats.HeapIdle), 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.InUse", profileName), int64(memoryStats.HeapInuse), 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Released", profileName), int64(memoryStats.HeapReleased), 1.0)
// Calculate average and last and convert from nanoseconds to milliseconds
gcPauseAvg := (int64(memoryStats.PauseTotalNs) / int64(len(memoryStats.PauseNs))) / 1000000
lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) / 1000000
stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPauseLatency", profileName), lastGC, 1.0)
// Gather various GC related metrics
if memoryStats.NumGC > 0 {
totalRecentGC := uint64(0)
realBufSize := uint32(256)
if memoryStats.NumGC < 256 {
realBufSize = memoryStats.NumGC
}
for _, pause := range memoryStats.PauseNs {
totalRecentGC += pause
}
gcPauseAvg := totalRecentGC / uint64(realBufSize)
lastGC := memoryStats.PauseNs[(memoryStats.NumGC+255)%256]
stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), int64(gcPauseAvg), 1.0)
stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), int64(lastGC), 1.0)
}
stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.NextAt", profileName), int64(memoryStats.NextGC), 1.0)
// Send both a counter and a gauge here we can much more easily observe
// the GC rate (versus the raw number of GCs) in graphing tools that don't
// like deltas
stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), int64(memoryStats.NumGC), 1.0)
gcInc := int64(memoryStats.NumGC) - prevNumGC
stats.Inc(fmt.Sprintf("%s.Gostats.Gc.Rate", profileName), gcInc, 1.0)
prevNumGC += gcInc
}
}

View File

@ -97,16 +97,19 @@ This list is split up into metric topics with the names of the clients that subm
* Client performance profiling (`cmd/boulder-*`)
```
[gauge] Boulder.{cmd-name}.Gostats.Goroutines
[gauge] Boulder.{cmd-name}.Gostats.Heap.Alloc
[gauge] Boulder.{cmd-name}.Gostats.Heap.Objects
[gauge] Boulder.{cmd-name}.Gostats.Heap.Idle
[gauge] Boulder.{cmd-name}.Gostats.Heap.InUse
[gauge] Boulder.{cmd-name}.Gostats.Heap.Released
[gauge] Boulder.{cmd-name}.Gostats.Gc.NextAt
[gauge] Boulder.{cmd-name}.Gostats.Gc.LastPauseLatency
[counter] Boulder.{cmd-name}.Gostats.Gc.Rate
[timing] Boulder.{cmd-name}.Gostats.Gc.PauseAvg
[gauge] Boulder.{cmd-name}.Gostats.Goroutines
[gauge] Boulder.{cmd-name}.Gostats.Heap.Alloc
[gauge] Boulder.{cmd-name}.Gostats.Heap.Objects
[gauge] Boulder.{cmd-name}.Gostats.Heap.Idle
[gauge] Boulder.{cmd-name}.Gostats.Heap.InUse
[gauge] Boulder.{cmd-name}.Gostats.Heap.Released
[gauge] Boulder.{cmd-name}.Gostats.Gc.NextAt
[gauge] Boulder.{cmd-name}.Gostats.Gc.Count
[gauge] Boulder.{cmd-name}.Gostats.Gc.LastPause
[timing] Boulder.{cmd-name}.Gostats.Gc.PauseAvg
```
* External certificate store loading (`cmd/external-cert-importer`)