add metrics and traces for egress dials
Kubernetes-commit: fbb1fb8902c06cbcce47a025ce22fe260b27a697
This commit is contained in:
parent
b214a49983
commit
079efffdb4
|
@ -22,16 +22,20 @@ import (
|
|||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"google.golang.org/grpc"
|
||||
"io/ioutil"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/apiserver/pkg/apis/apiserver"
|
||||
"k8s.io/klog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
client "sigs.k8s.io/apiserver-network-proxy/konnectivity-client/pkg/client"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/apiserver/pkg/apis/apiserver"
|
||||
egressmetrics "k8s.io/apiserver/pkg/server/egressselector/metrics"
|
||||
"k8s.io/klog"
|
||||
utiltrace "k8s.io/utils/trace"
|
||||
client "sigs.k8s.io/apiserver-network-proxy/konnectivity-client/pkg/client"
|
||||
)
|
||||
|
||||
var directDialer utilnet.DialFunc = http.DefaultTransport.(*http.Transport).DialContext
|
||||
|
@ -152,7 +156,10 @@ func createConnectTCPDialer(tcpTransport *apiserver.TCPTransport) (utilnet.DialF
|
|||
certPool = nil
|
||||
}
|
||||
contextDialer := func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
trace := utiltrace.New("Proxy via HTTP Connect over TCP", utiltrace.Field{Key: "address", Value: addr})
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
klog.V(4).Infof("Sending request to %q.", addr)
|
||||
start := time.Now()
|
||||
proxyConn, err := tls.Dial("tcp", proxyAddress,
|
||||
&tls.Config{
|
||||
Certificates: []tls.Certificate{clientCerts},
|
||||
|
@ -160,27 +167,46 @@ func createConnectTCPDialer(tcpTransport *apiserver.TCPTransport) (utilnet.DialF
|
|||
},
|
||||
)
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportTCP, egressmetrics.StageDial)
|
||||
return nil, fmt.Errorf("dialing proxy %q failed: %v", proxyAddress, err)
|
||||
}
|
||||
return tunnelHTTPConnect(proxyConn, proxyAddress, addr)
|
||||
ret, err := tunnelHTTPConnect(proxyConn, proxyAddress, addr)
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportTCP, egressmetrics.StageProxy)
|
||||
return nil, err
|
||||
}
|
||||
egressmetrics.Metrics.ObserveDialLatency(time.Since(start), egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportTCP)
|
||||
return ret, nil
|
||||
}
|
||||
return contextDialer, nil
|
||||
}
|
||||
|
||||
func createConnectUDSDialer(udsConfig *apiserver.UDSTransport) (utilnet.DialFunc, error) {
|
||||
contextDialer := func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
trace := utiltrace.New("Proxy via HTTP Connect over UDS", utiltrace.Field{Key: "address", Value: addr})
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
start := time.Now()
|
||||
proxyConn, err := net.Dial("unix", udsConfig.UDSName)
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportUDS, egressmetrics.StageDial)
|
||||
return nil, fmt.Errorf("dialing proxy %q failed: %v", udsConfig.UDSName, err)
|
||||
}
|
||||
return tunnelHTTPConnect(proxyConn, udsConfig.UDSName, addr)
|
||||
ret, err := tunnelHTTPConnect(proxyConn, udsConfig.UDSName, addr)
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportUDS, egressmetrics.StageProxy)
|
||||
return nil, err
|
||||
}
|
||||
egressmetrics.Metrics.ObserveDialLatency(time.Since(start), egressmetrics.ProtocolHTTPConnect, egressmetrics.TransportUDS)
|
||||
return ret, nil
|
||||
}
|
||||
return contextDialer, nil
|
||||
}
|
||||
|
||||
func createGRPCUDSDialer(udsName string) (utilnet.DialFunc, error) {
|
||||
contextDialer := func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
|
||||
trace := utiltrace.New("Proxy via GRPC over UDS", utiltrace.Field{Key: "address", Value: addr})
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
start := time.Now()
|
||||
dialOption := grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) {
|
||||
c, err := net.Dial("unix", udsName)
|
||||
if err != nil {
|
||||
|
@ -191,13 +217,16 @@ func createGRPCUDSDialer(udsName string) (utilnet.DialFunc, error) {
|
|||
|
||||
tunnel, err := client.CreateGrpcTunnel(udsName, dialOption, grpc.WithInsecure())
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolGRPC, egressmetrics.TransportUDS, egressmetrics.StageDial)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
proxyConn, err := tunnel.Dial("tcp", addr)
|
||||
if err != nil {
|
||||
egressmetrics.Metrics.ObserveDialFailure(egressmetrics.ProtocolGRPC, egressmetrics.TransportUDS, egressmetrics.StageProxy)
|
||||
return nil, err
|
||||
}
|
||||
egressmetrics.Metrics.ObserveDialLatency(time.Since(start), egressmetrics.ProtocolGRPC, egressmetrics.TransportUDS)
|
||||
return proxyConn, nil
|
||||
}
|
||||
return contextDialer, nil
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "egress_dialer"
|
||||
|
||||
// ProtocolHTTPConnect means that the proxy protocol is http-connect.
|
||||
ProtocolHTTPConnect = "http_connect"
|
||||
// ProtocolHTTPGRPC means that the proxy protocol is the GRPC protocol.
|
||||
ProtocolGRPC = "grpc"
|
||||
// TransportTCP means that the transport is TCP.
|
||||
TransportTCP = "tcp"
|
||||
// TransportUDS means that the transport is UDS.
|
||||
TransportUDS = "uds"
|
||||
// StageTransport indicates that the dial failed at dialing to the proxy server.
|
||||
StageDial = "dial"
|
||||
// StageProtocol indicates that the dial failed at requesting the proxy server to proxy.
|
||||
StageProxy = "proxy"
|
||||
)
|
||||
|
||||
var (
|
||||
// Use buckets ranging from 5 ms to 12.5 seconds.
|
||||
latencyBuckets = []float64{0.005, 0.025, 0.1, 0.5, 2.5, 12.5}
|
||||
latencySummaryMaxAge = 5 * time.Hour
|
||||
|
||||
// Metrics provides access to all dial metrics.
|
||||
Metrics = newDialMetrics()
|
||||
)
|
||||
|
||||
// DialMetrics instruments dials to proxy server with prometheus metrics.
|
||||
type DialMetrics struct {
|
||||
latencies *metrics.HistogramVec
|
||||
failures *metrics.CounterVec
|
||||
}
|
||||
|
||||
// newDialMetrics create a new DialMetrics, configured with default metric names.
|
||||
func newDialMetrics() *DialMetrics {
|
||||
latencies := metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dial_duration_seconds",
|
||||
Help: "Dial latency histogram in seconds, labeled by the protocol (http-connect or grpc), transport (tcp or uds)",
|
||||
Buckets: latencyBuckets,
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"protocol", "transport"},
|
||||
)
|
||||
|
||||
failures := metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dial_failure_count",
|
||||
Help: "Dial failure count, labeled by the protocol (http-connect or grpc), transport (tcp or uds), and stage (dial or proxy). The stage indicates at which stage the dial failed",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"protocol", "transport", "stage"},
|
||||
)
|
||||
|
||||
legacyregistry.MustRegister(latencies)
|
||||
legacyregistry.MustRegister(failures)
|
||||
return &DialMetrics{latencies: latencies, failures: failures}
|
||||
}
|
||||
|
||||
// Reset resets the metrics.
|
||||
func (m *DialMetrics) Reset() {
|
||||
m.latencies.Reset()
|
||||
m.failures.Reset()
|
||||
}
|
||||
|
||||
// ObserveDialLatency records the latency of a dial, labeled by protocol, transport.
|
||||
func (m *DialMetrics) ObserveDialLatency(elapsed time.Duration, protocol, transport string) {
|
||||
m.latencies.WithLabelValues(protocol, transport).Observe(elapsed.Seconds())
|
||||
}
|
||||
|
||||
// ObserverDialFailure records a failed dial, labeled by protocol, transport, and the stage the dial failed at.
|
||||
func (m *DialMetrics) ObserveDialFailure(protocol, transport, stage string) {
|
||||
m.failures.WithLabelValues(protocol, transport, stage).Inc()
|
||||
}
|
Loading…
Reference in New Issue