Move rpc delivery timing stuff to new metrics lib
This commit is contained in:
parent
2677c4e314
commit
370cd07bc9
|
|
@ -10,11 +10,8 @@ package main
|
||||||
// broker to look for anomalies.
|
// broker to look for anomalies.
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/streadway/amqp"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/streadway/amqp"
|
||||||
|
|
@ -22,6 +19,7 @@ import (
|
||||||
"github.com/letsencrypt/boulder/analysis"
|
"github.com/letsencrypt/boulder/analysis"
|
||||||
"github.com/letsencrypt/boulder/cmd"
|
"github.com/letsencrypt/boulder/cmd"
|
||||||
blog "github.com/letsencrypt/boulder/log"
|
blog "github.com/letsencrypt/boulder/log"
|
||||||
|
"github.com/letsencrypt/boulder/metrics"
|
||||||
"github.com/letsencrypt/boulder/rpc"
|
"github.com/letsencrypt/boulder/rpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -41,70 +39,6 @@ const (
|
||||||
AmqpImmediate = false
|
AmqpImmediate = false
|
||||||
)
|
)
|
||||||
|
|
||||||
type timings struct {
|
|
||||||
deliveryTimings map[string]time.Time
|
|
||||||
dtMu sync.Mutex
|
|
||||||
|
|
||||||
stats statsd.Statter
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *timings) size() int {
|
|
||||||
t.dtMu.Lock()
|
|
||||||
defer t.dtMu.Unlock()
|
|
||||||
return len(t.deliveryTimings)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *timings) get(id string) time.Time {
|
|
||||||
t.dtMu.Lock()
|
|
||||||
defer t.dtMu.Unlock()
|
|
||||||
return t.deliveryTimings[id]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *timings) add(id string) {
|
|
||||||
t.dtMu.Lock()
|
|
||||||
defer t.dtMu.Unlock()
|
|
||||||
t.deliveryTimings[id] = time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *timings) delete(id string) {
|
|
||||||
t.dtMu.Lock()
|
|
||||||
defer t.dtMu.Unlock()
|
|
||||||
delete(t.deliveryTimings, id)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *timings) timeDelivery(d amqp.Delivery) {
|
|
||||||
// If d is a call add to deliveryTimings and increment openCalls, if it is a
|
|
||||||
// response then get time.Since original call from deliveryTiming, send timing metric, and
|
|
||||||
// decrement openCalls, in both cases send the gauge RpcCallsWaiting and increment the counter
|
|
||||||
// RpcTraffic with the byte length of the RPC body.
|
|
||||||
t.stats.Inc("RPC.Traffic", int64(len(d.Body)), 1.0)
|
|
||||||
t.stats.Gauge("RPC.CallsWaiting", int64(t.size()), 1.0)
|
|
||||||
|
|
||||||
if d.ReplyTo != "" {
|
|
||||||
t.add(fmt.Sprintf("%s:%s", d.CorrelationId, d.ReplyTo))
|
|
||||||
} else {
|
|
||||||
rpcSent := t.get(fmt.Sprintf("%s:%s", d.CorrelationId, d.RoutingKey))
|
|
||||||
if rpcSent != *new(time.Time) {
|
|
||||||
respTime := time.Since(rpcSent)
|
|
||||||
t.delete(fmt.Sprintf("%s:%s", d.CorrelationId, d.RoutingKey))
|
|
||||||
|
|
||||||
// Check if the call failed
|
|
||||||
state := "Success"
|
|
||||||
var resp struct {
|
|
||||||
Error rpc.RPCError
|
|
||||||
}
|
|
||||||
json.Unmarshal(d.Body, &resp)
|
|
||||||
if resp.Error.Value != "" {
|
|
||||||
state = "Error"
|
|
||||||
}
|
|
||||||
t.stats.Inc(fmt.Sprintf("RPC.Rate.%s", state), 1, 1.0)
|
|
||||||
t.stats.TimingDuration(fmt.Sprintf("RPC.ResponseTime.%s.%s", d.Type, state), respTime, 1.0)
|
|
||||||
} else {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func startMonitor(rpcCh *amqp.Channel, logger *blog.AuditLogger, stats statsd.Statter) {
|
func startMonitor(rpcCh *amqp.Channel, logger *blog.AuditLogger, stats statsd.Statter) {
|
||||||
ae := analysisengine.NewLoggingAnalysisEngine()
|
ae := analysisengine.NewLoggingAnalysisEngine()
|
||||||
|
|
||||||
|
|
@ -162,14 +96,11 @@ func startMonitor(rpcCh *amqp.Channel, logger *blog.AuditLogger, stats statsd.St
|
||||||
cmd.FailOnError(err, "Could not subscribe to queue")
|
cmd.FailOnError(err, "Could not subscribe to queue")
|
||||||
}
|
}
|
||||||
|
|
||||||
timings := timings{
|
rpcMonitor := metrics.NewRPCMonitor(stats)
|
||||||
deliveryTimings: make(map[string]time.Time),
|
|
||||||
stats: stats,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run forever.
|
// Run forever.
|
||||||
for d := range deliveries {
|
for d := range deliveries {
|
||||||
go timings.timeDelivery(d)
|
go rpcMonitor.TimeDelivery(d)
|
||||||
|
|
||||||
// Pass each message to the Analysis Engine
|
// Pass each message to the Analysis Engine
|
||||||
err = ae.ProcessMessage(d)
|
err = ae.ProcessMessage(d)
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@ import (
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/codegangsta/cli"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/codegangsta/cli"
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/streadway/amqp"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/streadway/amqp"
|
||||||
"github.com/letsencrypt/boulder/metrics"
|
|
||||||
|
|
||||||
"github.com/letsencrypt/boulder/cmd"
|
"github.com/letsencrypt/boulder/cmd"
|
||||||
blog "github.com/letsencrypt/boulder/log"
|
blog "github.com/letsencrypt/boulder/log"
|
||||||
|
"github.com/letsencrypt/boulder/metrics"
|
||||||
"github.com/letsencrypt/boulder/rpc"
|
"github.com/letsencrypt/boulder/rpc"
|
||||||
"github.com/letsencrypt/boulder/wfe"
|
"github.com/letsencrypt/boulder/wfe"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -6,13 +6,18 @@
|
||||||
package metrics
|
package metrics
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/cactus/go-statsd-client/statsd"
|
||||||
|
"github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/streadway/amqp"
|
||||||
|
|
||||||
|
"github.com/letsencrypt/boulder/rpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
// HTTPMonitor stores some server state
|
// HTTPMonitor stores some server state
|
||||||
|
|
@ -62,3 +67,73 @@ func (h *HTTPMonitor) watchAndServe(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
h.stats.TimingDuration(fmt.Sprintf("%s.HTTP.ResponseTime.%s.%s", h.statsPrefix, endpoint, state), cClosed, 1.0)
|
h.stats.TimingDuration(fmt.Sprintf("%s.HTTP.ResponseTime.%s.%s", h.statsPrefix, endpoint, state), cClosed, 1.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RPCMonitor stores rpc delivery state
|
||||||
|
type RPCMonitor struct {
|
||||||
|
deliveryTimings map[string]time.Time
|
||||||
|
dtMu *sync.Mutex
|
||||||
|
|
||||||
|
stats statsd.Statter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRPCMonitor returns a new initialized RPCMonitor
|
||||||
|
func NewRPCMonitor(stats statsd.Statter) RPCMonitor {
|
||||||
|
return RPCMonitor{stats: stats, deliveryTimings: make(map[string]time.Time), dtMu: &sync.Mutex{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPCMonitor) size() int {
|
||||||
|
r.dtMu.Lock()
|
||||||
|
defer r.dtMu.Unlock()
|
||||||
|
return len(r.deliveryTimings)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPCMonitor) get(id string) time.Time {
|
||||||
|
r.dtMu.Lock()
|
||||||
|
defer r.dtMu.Unlock()
|
||||||
|
return r.deliveryTimings[id]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPCMonitor) add(id string) {
|
||||||
|
r.dtMu.Lock()
|
||||||
|
defer r.dtMu.Unlock()
|
||||||
|
r.deliveryTimings[id] = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPCMonitor) delete(id string) {
|
||||||
|
r.dtMu.Lock()
|
||||||
|
defer r.dtMu.Unlock()
|
||||||
|
delete(r.deliveryTimings, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TimeDelivery takes a single RPC delivery and provides metrics to StatsD about it
|
||||||
|
func (r *RPCMonitor) TimeDelivery(d amqp.Delivery) {
|
||||||
|
// If d is a call add to deliveryTimings and increment openCalls, if it is a
|
||||||
|
// response then get time.Since original call from deliveryTiming, send timing metric, and
|
||||||
|
// decrement openCalls, in both cases send the gauge RpcCallsWaiting and increment the counter
|
||||||
|
// RpcTraffic with the byte length of the RPC body.
|
||||||
|
r.stats.Inc("RPC.Traffic", int64(len(d.Body)), 1.0)
|
||||||
|
r.stats.Gauge("RPC.CallsWaiting", int64(r.size()), 1.0)
|
||||||
|
|
||||||
|
if d.ReplyTo != "" {
|
||||||
|
r.add(fmt.Sprintf("%s:%s", d.CorrelationId, d.ReplyTo))
|
||||||
|
} else {
|
||||||
|
rpcSent := r.get(fmt.Sprintf("%s:%s", d.CorrelationId, d.RoutingKey))
|
||||||
|
if rpcSent != *new(time.Time) {
|
||||||
|
respTime := time.Since(rpcSent)
|
||||||
|
r.delete(fmt.Sprintf("%s:%s", d.CorrelationId, d.RoutingKey))
|
||||||
|
|
||||||
|
// Check if the call failed
|
||||||
|
state := "Success"
|
||||||
|
var resp struct {
|
||||||
|
Error rpc.RPCError
|
||||||
|
}
|
||||||
|
json.Unmarshal(d.Body, &resp)
|
||||||
|
if resp.Error.Value != "" {
|
||||||
|
state = "Error"
|
||||||
|
}
|
||||||
|
r.stats.Inc(fmt.Sprintf("RPC.Rate.%s", state), 1, 1.0)
|
||||||
|
r.stats.TimingDuration(fmt.Sprintf("RPC.ResponseTime.%s.%s", d.Type, state), respTime, 1.0)
|
||||||
|
} else {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue