mirror of https://github.com/linkerd/linkerd2.git
Fix race condition in web service (#3883)
Fixes #3859, followup to #3769 The addition of the web service's `statCache` introduced a race condition on the `h.statCache` variable, that is read and written in `handleAPIStat()` without mutext guards. I've moved the `statCache` initialization into `/web/srv/server.go` to avoid this problem. The issue can be easily reproduced with ```bash $ bin/web dev $ for run in {1..2}; do curl 'http://localhost:7777/api/tps-reports?resource_type=deployment&namespace=linkerd&tcp_stats=true&resource_name=linkerd-destination&window=1m' & done [1] 11672 [2] 11673 {"ok":{"statTables":[{"podGroup":{"rows":[{"resource":{"namespace":"linkerd","type":"deployment","name":"linkerd-destination"},"timeWindow":"1m","status":"","meshedPodCount":"1","runningPodCount":"1","failedPodCount":"0","stats":{"successCount":"18","failureCount":"0","latencyMsP50":"1","latencyMsP95":"9","latencyMsP99":"10","actualSuccessCount":"0","actualFailureCount":"0"},"tcpStats":{"openConnections":"7","readBytesTotal":"23174","writeBytesTotal":"22946"},"tsStats":null,"errorsByPod":{}}]}}]}}{"ok":{"statTables":[{"podGroup":{"rows":[{"resource":{"namespace":"linkerd","type":"deployment","name":"linkerd-destination"},"timeWindow":"1m","status":"","meshedPodCount":"1","runningPodCount":"1","failedPodCount":"0","stats":{"successCount":"18","failureCount":"0","latencyMsP50":"1","latencyMsP95":"9","latencyMsP99":"10","actualSuccessCount":"0","actualFailureCount":"0"},"tcpStats":{"openConnections":"7","readBytesTotal":"23174","writeBytesTotal":"22946"},"tsStats":null,"errorsByPod":{}}]}}]}}[1]- Done curl 'http://localhost:7777/api/tps-reports?resource_type=deployment&namespace=linkerd&tcp_stats=true&resource_name=linkerd-destination&window=1m' [2]+ Done curl 'http://localhost:7777/api/tps-reports?resource_type=deployment&namespace=linkerd&tcp_stats=true&resource_name=linkerd-destination&window=1m' ================== WARNING: DATA RACE Read at 0x00c000192308 by goroutine 58: github.com/linkerd/linkerd2/web/srv.(*handler).handleAPIStat() /home/alpeb/src/linkerd2/web/srv/api_handlers.go:140 +0x61 github.com/linkerd/linkerd2/web/srv.(*handler).handleAPIStat-fm() /home/alpeb/src/linkerd2/web/srv/api_handlers.go:138 +0x7d github.com/julienschmidt/httprouter.(*Router).ServeHTTP() /home/alpeb/go/pkg/mod/github.com/julienschmidt/httprouter@v1.2.0/router.go:334 +0x10b7 github.com/linkerd/linkerd2/web/srv.(*Server).ServeHTTP() /home/alpeb/src/linkerd2/web/srv/server.go:69 +0x4c0 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerCounter.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:100 +0xf8 net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerResponseSize.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:196 +0x104 net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerDuration.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:68 +0x13c net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 go.opencensus.io/plugin/ochttp.(*Handler).ServeHTTP() /home/alpeb/go/pkg/mod/go.opencensus.io@v0.22.0/plugin/ochttp/server.go:86 +0x3f9 net/http.serverHandler.ServeHTTP() /usr/local/go/src/net/http/server.go:2802 +0xce net/http.(*conn).serve() /usr/local/go/src/net/http/server.go:1890 +0x837 Previous write at 0x00c000192308 by goroutine 56: github.com/linkerd/linkerd2/web/srv.(*handler).handleAPIStat() /home/alpeb/src/linkerd2/web/srv/api_handlers.go:141 +0xd5e github.com/linkerd/linkerd2/web/srv.(*handler).handleAPIStat-fm() /home/alpeb/src/linkerd2/web/srv/api_handlers.go:138 +0x7d github.com/julienschmidt/httprouter.(*Router).ServeHTTP() /home/alpeb/go/pkg/mod/github.com/julienschmidt/httprouter@v1.2.0/router.go:334 +0x10b7 github.com/linkerd/linkerd2/web/srv.(*Server).ServeHTTP() /home/alpeb/src/linkerd2/web/srv/server.go:69 +0x4c0 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerCounter.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:100 +0xf8 net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerResponseSize.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:196 +0x104 net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerDuration.func1() /home/alpeb/go/pkg/mod/github.com/prometheus/client_golang@v1.2.1/prometheus/promhttp/instrument_server.go:68 +0x13c net/http.HandlerFunc.ServeHTTP() /usr/local/go/src/net/http/server.go:2007 +0x51 go.opencensus.io/plugin/ochttp.(*Handler).ServeHTTP() /home/alpeb/go/pkg/mod/go.opencensus.io@v0.22.0/plugin/ochttp/server.go:86 +0x3f9 net/http.serverHandler.ServeHTTP() /usr/local/go/src/net/http/server.go:2802 +0xce net/http.(*conn).serve() /usr/local/go/src/net/http/server.go:1890 +0x837 Goroutine 58 (running) created at: net/http.(*Server).Serve() /usr/local/go/src/net/http/server.go:2927 +0x5be net/http.(*Server).ListenAndServe() /usr/local/go/src/net/http/server.go:2825 +0x102 main.main.func1() /home/alpeb/src/linkerd2/web/main.go:105 +0xdd Goroutine 56 (running) created at: net/http.(*Server).Serve() /usr/local/go/src/net/http/server.go:2927 +0x5be net/http.(*Server).ListenAndServe() /usr/local/go/src/net/http/server.go:2825 +0x102 main.main.func1() /home/alpeb/src/linkerd2/web/main.go:105 +0xdd ```
This commit is contained in:
parent
2c0b6efc17
commit
419b9f1502
|
@ -21,24 +21,14 @@ import (
|
|||
"github.com/linkerd/linkerd2/pkg/k8s"
|
||||
"github.com/linkerd/linkerd2/pkg/protohttp"
|
||||
"github.com/linkerd/linkerd2/pkg/tap"
|
||||
"github.com/patrickmn/go-cache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
// Control Frame payload size can be no bigger than 125 bytes. 2 bytes are
|
||||
// reserved for the status code when formatting the message.
|
||||
maxControlFrameMsgSize = 123
|
||||
|
||||
// statExpiration indicates when items in the stat cache expire.
|
||||
statExpiration = 1500 * time.Millisecond
|
||||
|
||||
// statCleanupInterval indicates how often expired items in the stat cache
|
||||
// are cleaned up.
|
||||
statCleanupInterval = 5 * time.Minute
|
||||
)
|
||||
// Control Frame payload size can be no bigger than 125 bytes. 2 bytes are
|
||||
// reserved for the status code when formatting the message.
|
||||
const maxControlFrameMsgSize = 123
|
||||
|
||||
type (
|
||||
jsonError struct {
|
||||
|
@ -136,11 +126,6 @@ func (h *handler) handleAPIServices(w http.ResponseWriter, req *http.Request, p
|
|||
}
|
||||
|
||||
func (h *handler) handleAPIStat(w http.ResponseWriter, req *http.Request, p httprouter.Params) {
|
||||
// Initialize stat cache if needed
|
||||
if h.statCache == nil {
|
||||
h.statCache = cache.New(statExpiration, statCleanupInterval)
|
||||
}
|
||||
|
||||
// Try to get stat summary from cache using the query as key
|
||||
cachedResultJSON, ok := h.statCache.Get(req.URL.RawQuery)
|
||||
if ok {
|
||||
|
|
|
@ -17,11 +17,19 @@ import (
|
|||
"github.com/linkerd/linkerd2/pkg/healthcheck"
|
||||
"github.com/linkerd/linkerd2/pkg/k8s"
|
||||
"github.com/linkerd/linkerd2/pkg/prometheus"
|
||||
"github.com/patrickmn/go-cache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
timeout = 10 * time.Second
|
||||
|
||||
// statExpiration indicates when items in the stat cache expire.
|
||||
statExpiration = 1500 * time.Millisecond
|
||||
|
||||
// statCleanupInterval indicates how often expired items in the stat cache
|
||||
// are cleaned up.
|
||||
statCleanupInterval = 5 * time.Minute
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -108,6 +116,7 @@ func NewServer(
|
|||
clusterDomain: clusterDomain,
|
||||
grafanaProxy: newGrafanaProxy(grafanaAddr),
|
||||
hc: hc,
|
||||
statCache: cache.New(statExpiration, statCleanupInterval),
|
||||
}
|
||||
|
||||
httpServer := &http.Server{
|
||||
|
|
Loading…
Reference in New Issue