From 2d320f1dd59c25e534037f6e0f415f882df6b836 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Sun, 18 Oct 2015 18:32:35 -0700 Subject: [PATCH 1/6] Fix GC pause average mixup and add a few extra GC metrics --- cmd/shell.go | 22 ++++++++++++++++------ docs/metrics/README.md | 20 +++++++++++--------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/cmd/shell.go b/cmd/shell.go index f48e06614..1f9d2ab5a 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -375,25 +375,35 @@ func FailOnError(err error, msg string) { // ProfileCmd runs forever, sending Go runtime statistics to StatsD. func ProfileCmd(profileName string, stats statsd.Statter) { + var memoryStats runtime.MemStats + prevNumGC := int64(0) c := time.Tick(1 * time.Second) for range c { - var memoryStats runtime.MemStats runtime.ReadMemStats(&memoryStats) + // Gather goroutine count stats.Gauge(fmt.Sprintf("%s.Gostats.Goroutines", profileName), int64(runtime.NumGoroutine()), 1.0) + // Gather various heap metrics stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Alloc", profileName), int64(memoryStats.HeapAlloc), 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Objects", profileName), int64(memoryStats.HeapObjects), 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Idle", profileName), int64(memoryStats.HeapIdle), 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.InUse", profileName), int64(memoryStats.HeapInuse), 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Heap.Released", profileName), int64(memoryStats.HeapReleased), 1.0) - // Calculate average and last and convert from nanoseconds to milliseconds - gcPauseAvg := (int64(memoryStats.PauseTotalNs) / int64(len(memoryStats.PauseNs))) / 1000000 - lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) / 1000000 - stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) - stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPauseLatency", profileName), lastGC, 1.0) + // Gather various GC related metrics + if memoryStats.NumGC > 0 { + gcPauseAvg := (int64(memoryStats.PauseTotalNs) / int64(memoryStats.NumGC)) + lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) + stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) + stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) + } stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.NextAt", profileName), int64(memoryStats.NextGC), 1.0) + // By using a counter instead of a gauge here we can much more easily observe + // the GC rate (versus the raw number of GCs) + gcInc := int64(memoryStats.NumGC) - prevNumGC + prevNumGC += gcInc + stats.Inc(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), gcInc, 1.0) } } diff --git a/docs/metrics/README.md b/docs/metrics/README.md index d219edefd..a8c2e6439 100644 --- a/docs/metrics/README.md +++ b/docs/metrics/README.md @@ -97,16 +97,18 @@ This list is split up into metric topics with the names of the clients that subm * Client performance profiling (`cmd/boulder-*`) ``` - [gauge] Boulder.{cmd-name}.Gostats.Goroutines - [gauge] Boulder.{cmd-name}.Gostats.Heap.Alloc - [gauge] Boulder.{cmd-name}.Gostats.Heap.Objects - [gauge] Boulder.{cmd-name}.Gostats.Heap.Idle - [gauge] Boulder.{cmd-name}.Gostats.Heap.InUse - [gauge] Boulder.{cmd-name}.Gostats.Heap.Released - [gauge] Boulder.{cmd-name}.Gostats.Gc.NextAt - [gauge] Boulder.{cmd-name}.Gostats.Gc.LastPauseLatency + [counter] Boulder.{cmd-name}.Gostats.Gc.Count - [timing] Boulder.{cmd-name}.Gostats.Gc.PauseAvg + [gauge] Boulder.{cmd-name}.Gostats.Goroutines + [gauge] Boulder.{cmd-name}.Gostats.Heap.Alloc + [gauge] Boulder.{cmd-name}.Gostats.Heap.Objects + [gauge] Boulder.{cmd-name}.Gostats.Heap.Idle + [gauge] Boulder.{cmd-name}.Gostats.Heap.InUse + [gauge] Boulder.{cmd-name}.Gostats.Heap.Released + [gauge] Boulder.{cmd-name}.Gostats.Gc.NextAt + [gauge] Boulder.{cmd-name}.Gostats.Gc.LastPause + + [timing] Boulder.{cmd-name}.Gostats.Gc.PauseAvg ``` * External certificate store loading (`cmd/external-cert-importer`) From e08a8f3160d323d70226da5aae1459e3049d671a Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Mon, 19 Oct 2015 13:54:55 -0700 Subject: [PATCH 2/6] Provide both gauge and counter --- cmd/shell.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cmd/shell.go b/cmd/shell.go index 1f9d2ab5a..0c039a6ec 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -399,11 +399,14 @@ func ProfileCmd(profileName string, stats statsd.Statter) { stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) } stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.NextAt", profileName), int64(memoryStats.NextGC), 1.0) - // By using a counter instead of a gauge here we can much more easily observe - // the GC rate (versus the raw number of GCs) - gcInc := int64(memoryStats.NumGC) - prevNumGC + // Send both a counter and a gauge here we can much more easily observe + // the GC rate (versus the raw number of GCs) in graphing tools that don't + // like deltas + gcInc := int64(memoryStats.NumGC) + stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), gcInc, 1.0) + gcInc -= prevNumGC + stats.Inc(fmt.Sprintf("%s.Gostats.Gc.Rate", profileName), gcInc, 1.0) prevNumGC += gcInc - stats.Inc(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), gcInc, 1.0) } } From 087b7a21bb6967bf49b6051e838c593ea6fcf2b8 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Mon, 19 Oct 2015 15:44:36 -0700 Subject: [PATCH 3/6] Collect recent average instead of average since start --- cmd/shell.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmd/shell.go b/cmd/shell.go index 0c039a6ec..8875db47d 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -393,7 +393,15 @@ func ProfileCmd(profileName string, stats statsd.Statter) { // Gather various GC related metrics if memoryStats.NumGC > 0 { - gcPauseAvg := (int64(memoryStats.PauseTotalNs) / int64(memoryStats.NumGC)) + totalRecentGC := uint64(0) + buffSize := 0 + for _, pause := range memoryStats.PauseNs { + if pause != 0 { + totalRecentGC += pause + buffSize++ + } + } + gcPauseAvg := int64(totalRecentGC) / int64(buffSize) lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) From 547348fc487ffd3d0759aee29dc4f2da02f1b59b Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Mon, 19 Oct 2015 15:53:29 -0700 Subject: [PATCH 4/6] Review fix --- cmd/shell.go | 14 +++++--------- docs/metrics/README.md | 3 ++- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/cmd/shell.go b/cmd/shell.go index 8875db47d..ddd755b5f 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -28,6 +28,7 @@ import ( "fmt" "io/ioutil" "log" + "math" "net" "net/http" _ "net/http/pprof" // HTTP performance profiling, added transparently to HTTP APIs @@ -394,14 +395,10 @@ func ProfileCmd(profileName string, stats statsd.Statter) { // Gather various GC related metrics if memoryStats.NumGC > 0 { totalRecentGC := uint64(0) - buffSize := 0 for _, pause := range memoryStats.PauseNs { - if pause != 0 { - totalRecentGC += pause - buffSize++ - } + totalRecentGC += pause } - gcPauseAvg := int64(totalRecentGC) / int64(buffSize) + gcPauseAvg := int64(totalRecentGC) / int64(math.Min(256.0, float64(memoryStats.NumGC))) lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) @@ -410,9 +407,8 @@ func ProfileCmd(profileName string, stats statsd.Statter) { // Send both a counter and a gauge here we can much more easily observe // the GC rate (versus the raw number of GCs) in graphing tools that don't // like deltas - gcInc := int64(memoryStats.NumGC) - stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), gcInc, 1.0) - gcInc -= prevNumGC + stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.Count", profileName), int64(memoryStats.NumGC), 1.0) + gcInc := int64(memoryStats.NumGC) - prevNumGC stats.Inc(fmt.Sprintf("%s.Gostats.Gc.Rate", profileName), gcInc, 1.0) prevNumGC += gcInc } diff --git a/docs/metrics/README.md b/docs/metrics/README.md index a8c2e6439..e3fb41db1 100644 --- a/docs/metrics/README.md +++ b/docs/metrics/README.md @@ -97,7 +97,7 @@ This list is split up into metric topics with the names of the clients that subm * Client performance profiling (`cmd/boulder-*`) ``` - [counter] Boulder.{cmd-name}.Gostats.Gc.Count + [counter] Boulder.{cmd-name}.Gostats.Gc.Rate [gauge] Boulder.{cmd-name}.Gostats.Goroutines [gauge] Boulder.{cmd-name}.Gostats.Heap.Alloc @@ -106,6 +106,7 @@ This list is split up into metric topics with the names of the clients that subm [gauge] Boulder.{cmd-name}.Gostats.Heap.InUse [gauge] Boulder.{cmd-name}.Gostats.Heap.Released [gauge] Boulder.{cmd-name}.Gostats.Gc.NextAt + [gauge] Boulder.{cmd-name}.Gostats.Gc.Count [gauge] Boulder.{cmd-name}.Gostats.Gc.LastPause [timing] Boulder.{cmd-name}.Gostats.Gc.PauseAvg From d7e707849d5816ff82cb2d7bc7d904a07b1ebed1 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Tue, 20 Oct 2015 19:04:54 -0700 Subject: [PATCH 5/6] Don't bother with all the casting to do math.Min --- cmd/shell.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/shell.go b/cmd/shell.go index ddd755b5f..93ad8853b 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -28,7 +28,6 @@ import ( "fmt" "io/ioutil" "log" - "math" "net" "net/http" _ "net/http/pprof" // HTTP performance profiling, added transparently to HTTP APIs @@ -395,10 +394,14 @@ func ProfileCmd(profileName string, stats statsd.Statter) { // Gather various GC related metrics if memoryStats.NumGC > 0 { totalRecentGC := uint64(0) + numGC := int64(256) + if memoryStats.NumGC < 256 { + numGC = int64(memoryStats.NumGC) + } for _, pause := range memoryStats.PauseNs { totalRecentGC += pause } - gcPauseAvg := int64(totalRecentGC) / int64(math.Min(256.0, float64(memoryStats.NumGC))) + gcPauseAvg := int64(totalRecentGC) / numGC lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) From 4164d28916a77c24b4cf445962b4d022a802af25 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Tue, 20 Oct 2015 19:21:04 -0700 Subject: [PATCH 6/6] Review fixes --- cmd/shell.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/shell.go b/cmd/shell.go index 93ad8853b..5d7c16ac9 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -394,17 +394,17 @@ func ProfileCmd(profileName string, stats statsd.Statter) { // Gather various GC related metrics if memoryStats.NumGC > 0 { totalRecentGC := uint64(0) - numGC := int64(256) + realBufSize := uint32(256) if memoryStats.NumGC < 256 { - numGC = int64(memoryStats.NumGC) + realBufSize = memoryStats.NumGC } for _, pause := range memoryStats.PauseNs { totalRecentGC += pause } - gcPauseAvg := int64(totalRecentGC) / numGC - lastGC := int64(memoryStats.PauseNs[(memoryStats.NumGC+255)%256]) - stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), gcPauseAvg, 1.0) - stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), lastGC, 1.0) + gcPauseAvg := totalRecentGC / uint64(realBufSize) + lastGC := memoryStats.PauseNs[(memoryStats.NumGC+255)%256] + stats.Timing(fmt.Sprintf("%s.Gostats.Gc.PauseAvg", profileName), int64(gcPauseAvg), 1.0) + stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.LastPause", profileName), int64(lastGC), 1.0) } stats.Gauge(fmt.Sprintf("%s.Gostats.Gc.NextAt", profileName), int64(memoryStats.NextGC), 1.0) // Send both a counter and a gauge here we can much more easily observe