profiling: add internal changes to support profiling of gRPC (#3158)

This commit is contained in:
Adhityaa Chandrasekar 2019-12-11 09:06:38 -08:00 committed by Doug Fawley
parent 505c0d6440
commit 021bd5734e
9 changed files with 984 additions and 0 deletions

View File

@ -0,0 +1,272 @@
// +build !appengine
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package buffer
import (
"errors"
"math/bits"
"runtime"
"sync"
"sync/atomic"
"unsafe"
)
type queue struct {
// An array of pointers as references to the items stored in this queue.
arr []unsafe.Pointer
// The maximum number of elements this queue may store before it wraps around
// and overwrites older values. Must be an exponent of 2.
size uint32
// Always size - 1. A bitwise AND is performed with this mask in place of a
// modulo operation by the Push operation.
mask uint32
// Each Push operation into this queue increments the acquired counter before
// proceeding forwarding with the actual write to arr. This counter is also
// used by the Drain operation's drainWait subroutine to wait for all pushes
// to complete.
acquired uint32
// After the completion of a Push operation, the written counter is
// incremented. Also used by drainWait to wait for all pushes to complete.
written uint32
}
// Allocates and returns a new *queue. size needs to be a exponent of two.
func newQueue(size uint32) *queue {
return &queue{
arr: make([]unsafe.Pointer, size),
size: size,
mask: size - 1,
}
}
// drainWait blocks the caller until all Pushes on this queue are complete.
func (q *queue) drainWait() {
for atomic.LoadUint32(&q.acquired) != atomic.LoadUint32(&q.written) {
runtime.Gosched()
}
}
// A queuePair has two queues. At any given time, Pushes go into the queue
// referenced by queuePair.q. The active queue gets switched when there's a
// drain operation on the circular buffer.
type queuePair struct {
q0 unsafe.Pointer
q1 unsafe.Pointer
q unsafe.Pointer
}
// Allocates and returns a new *queuePair with its internal queues allocated.
func newQueuePair(size uint32) *queuePair {
qp := &queuePair{}
qp.q0 = unsafe.Pointer(newQueue(size))
qp.q1 = unsafe.Pointer(newQueue(size))
qp.q = qp.q0
return qp
}
// Switches the current queue for future Pushes to proceed to the other queue
// so that there's no blocking in Push. Returns a pointer to the old queue that
// was in place before the switch.
func (qp *queuePair) switchQueues() *queue {
// Even though we have mutual exclusion across drainers (thanks to mu.Lock in
// drain), Push operations may access qp.q whilst we're writing to it.
if atomic.CompareAndSwapPointer(&qp.q, qp.q0, qp.q1) {
return (*queue)(qp.q0)
}
atomic.CompareAndSwapPointer(&qp.q, qp.q1, qp.q0)
return (*queue)(qp.q1)
}
// In order to not have expensive modulo operations, we require the maximum
// number of elements in the circular buffer (N) to be an exponent of two to
// use a bitwise AND mask. Since a CircularBuffer is a collection of queuePairs
// (see below), we need to divide N; since exponents of two are only divisible
// by other exponents of two, we use floorCPUCount number of queuePairs within
// each CircularBuffer.
//
// Floor of the number of CPUs (and not the ceiling) was found to the be the
// optimal number through experiments.
func floorCPUCount() uint32 {
floorExponent := bits.Len32(uint32(runtime.NumCPU())) - 1
if floorExponent < 0 {
floorExponent = 0
}
return 1 << uint32(floorExponent)
}
var numCircularBufferPairs = floorCPUCount()
// CircularBuffer is a lock-free data structure that supports Push and Drain
// operations.
//
// Note that CircularBuffer is built for performance more than reliability.
// That is, some Push operations may fail without retries in some situations
// (such as during a Drain operation). Order of pushes is not maintained
// either; that is, if A was pushed before B, the Drain operation may return an
// array with B before A. These restrictions are acceptable within gRPC's
// profiling, but if your use-case does not permit these relaxed constraints
// or if performance is not a primary concern, you should probably use a
// lock-based data structure such as internal/buffer.UnboundedBuffer.
type CircularBuffer struct {
drainMutex sync.Mutex
qp []*queuePair
// qpn is an monotonically incrementing counter that's used to determine
// which queuePair a Push operation should write to. This approach's
// performance was found to be better than writing to a random queue.
qpn uint32
qpMask uint32
}
var errInvalidCircularBufferSize = errors.New("buffer size is not an exponent of two")
// NewCircularBuffer allocates a circular buffer of size size and returns a
// reference to the struct. Only circular buffers of size 2^k are allowed
// (saves us from having to do expensive modulo operations).
func NewCircularBuffer(size uint32) (*CircularBuffer, error) {
if size&(size-1) != 0 {
return nil, errInvalidCircularBufferSize
}
n := numCircularBufferPairs
if size/numCircularBufferPairs < 8 {
// If each circular buffer is going to hold less than a very small number
// of items (let's say 8), using multiple circular buffers is very likely
// wasteful. Instead, fallback to one circular buffer holding everything.
n = 1
}
cb := &CircularBuffer{
qp: make([]*queuePair, n),
qpMask: n - 1,
}
for i := uint32(0); i < n; i++ {
cb.qp[i] = newQueuePair(size / n)
}
return cb, nil
}
// Push pushes an element in to the circular buffer. Guaranteed to complete in
// a finite number of steps (also lock-free). Does not guarantee that push
// order will be retained. Does not guarantee that the operation will succeed
// if a Drain operation concurrently begins execution.
func (cb *CircularBuffer) Push(x interface{}) {
n := atomic.AddUint32(&cb.qpn, 1) & cb.qpMask
qptr := atomic.LoadPointer(&cb.qp[n].q)
q := (*queue)(qptr)
acquired := atomic.AddUint32(&q.acquired, 1) - 1
// If true, it means that we have incremented acquired before any queuePair
// was switched, and therefore before any drainWait completion. Therefore, it
// is safe to proceed with the Push operation on this queue. Otherwise, it
// means that a Drain operation has begun execution, but we don't know how
// far along the process it is. If it is past the drainWait check, it is not
// safe to proceed with the Push operation. We choose to drop this sample
// entirely instead of retrying, as retrying may potentially send the Push
// operation into a spin loop (we want to guarantee completion of the Push
// operation within a finite time). Before exiting, we increment written so
// that any existing drainWaits can proceed.
if atomic.LoadPointer(&cb.qp[n].q) != qptr {
atomic.AddUint32(&q.written, 1)
return
}
// At this point, we're definitely writing to the right queue. That is, one
// of the following is true:
// 1. No drainer is in execution on this queue.
// 2. A drainer is in execution on this queue and it is waiting at the
// acquired == written barrier.
//
// Let's say two Pushes A and B happen on the same queue. Say A and B are
// q.size apart; i.e. they get the same index. That is,
//
// index_A = index_B
// acquired_A + q.size = acquired_B
//
// We say "B has wrapped around A" when this happens. In this case, since A
// occurred before B, B's Push should be the final value. However, we
// accommodate A being the final value because wrap-arounds are extremely
// rare and accounting for them requires an additional counter and a
// significant performance penalty. Note that the below approach never leads
// to any data corruption.
index := acquired & q.mask
atomic.StorePointer(&q.arr[index], unsafe.Pointer(&x))
// Allows any drainWait checks to proceed.
atomic.AddUint32(&q.written, 1)
}
// Dereferences non-nil pointers from arr into result. Range of elements from
// arr that are copied is [from, to). Assumes that the result slice is already
// allocated and is large enough to hold all the elements that might be copied.
// Also assumes mutual exclusion on the array of pointers.
func dereferenceAppend(result []interface{}, arr []unsafe.Pointer, from, to uint32) []interface{} {
for i := from; i < to; i++ {
// We have mutual exclusion on arr, there's no need for atomics.
x := (*interface{})(arr[i])
if x != nil {
result = append(result, *x)
}
}
return result
}
// Drain allocates and returns an array of things Pushed in to the circular
// buffer. Push order is not maintained; that is, if B was Pushed after A,
// drain may return B at a lower index than A in the returned array.
func (cb *CircularBuffer) Drain() []interface{} {
cb.drainMutex.Lock()
qs := make([]*queue, len(cb.qp))
for i := 0; i < len(cb.qp); i++ {
qs[i] = cb.qp[i].switchQueues()
}
var wg sync.WaitGroup
wg.Add(int(len(qs)))
for i := 0; i < len(qs); i++ {
go func(qi int) {
qs[qi].drainWait()
wg.Done()
}(i)
}
wg.Wait()
result := make([]interface{}, 0)
for i := 0; i < len(qs); i++ {
if qs[i].acquired < qs[i].size {
result = dereferenceAppend(result, qs[i].arr, 0, qs[i].acquired)
} else {
result = dereferenceAppend(result, qs[i].arr, 0, qs[i].size)
}
}
for i := 0; i < len(qs); i++ {
atomic.StoreUint32(&qs[i].acquired, 0)
atomic.StoreUint32(&qs[i].written, 0)
}
cb.drainMutex.Unlock()
return result
}

View File

@ -0,0 +1,38 @@
// +build appengine
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Appengine does not support stats because of lack of support for unsafe
// pointers, which are necessary to efficiently store and retrieve things into
// and from a circular buffer. As a result, Push does not do anything and Drain
// returns an empty slice.
package buffer
type CircularBuffer struct{}
func NewCircularBuffer(size uint32) (*CircularBuffer, error) {
return nil, nil
}
func (cb *CircularBuffer) Push(x interface{}) {
}
func (cb *CircularBuffer) Drain() []interface{} {
return nil
}

View File

@ -0,0 +1,178 @@
// +build !appengine
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package buffer
import (
"fmt"
"sync"
"testing"
"time"
)
func TestCircularBufferSerial(t *testing.T) {
var size, i uint32
var result []interface{}
size = 1 << 15
cb, err := NewCircularBuffer(size)
if err != nil {
t.Fatalf("error allocating CircularBuffer: %v", err)
}
for i = 0; i < size/2; i++ {
cb.Push(i)
}
result = cb.Drain()
if uint32(len(result)) != size/2 {
t.Fatalf("len(result) = %d; want %d", len(result), size/2)
}
// The returned result isn't necessarily sorted.
seen := make(map[uint32]bool)
for _, r := range result {
seen[r.(uint32)] = true
}
for i = 0; i < uint32(len(result)); i++ {
if !seen[i] {
t.Fatalf("seen[%d] = false; want true", i)
}
}
for i = 0; i < size; i++ {
cb.Push(i)
}
result = cb.Drain()
if uint32(len(result)) != size {
t.Fatalf("len(result) = %d; want %d", len(result), size/2)
}
}
func TestCircularBufferOverflow(t *testing.T) {
var size, i uint32
var result []interface{}
size = 1 << 10
cb, err := NewCircularBuffer(size)
if err != nil {
t.Fatalf("error allocating CircularBuffer: %v", err)
}
for i = 0; i < 10*size; i++ {
cb.Push(i)
}
result = cb.Drain()
if uint32(len(result)) != size {
t.Fatalf("len(result) = %d; want %d", len(result), size)
}
for idx, x := range result {
if x.(uint32) < size {
t.Fatalf("result[%d] = %d; want it to be >= %d", idx, x, size)
}
}
}
func TestCircularBufferConcurrent(t *testing.T) {
for tn := 0; tn < 2; tn++ {
var size uint32
var result []interface{}
size = 1 << 6
cb, err := NewCircularBuffer(size)
if err != nil {
t.Fatalf("error allocating CircularBuffer: %v", err)
}
type item struct {
R uint32
N uint32
T time.Time
}
var wg sync.WaitGroup
for r := uint32(0); r < 1024; r++ {
wg.Add(1)
go func(r uint32) {
for n := uint32(0); n < size; n++ {
cb.Push(item{R: r, N: n, T: time.Now()})
}
wg.Done()
}(r)
}
// Wait for all goroutines to finish only in one test. Draining
// concurrently while Pushes are still happening will test for races in the
// Draining lock.
if tn == 0 {
wg.Wait()
}
result = cb.Drain()
// Can't expect the buffer to be full if the Pushes aren't necessarily done.
if tn == 0 {
if uint32(len(result)) != size {
t.Fatalf("len(result) = %d; want %d", len(result), size)
}
}
// There can be absolutely no expectation on the order of the data returned
// by Drain because: (a) everything is happening concurrently (b) a
// round-robin is used to write to different queues (and therefore
// different cachelines) for less write contention.
// Wait for all goroutines to complete before moving on to other tests. If
// the benchmarks run after this, it might affect performance unfairly.
wg.Wait()
}
}
func BenchmarkCircularBuffer(b *testing.B) {
x := 1
for size := 1 << 16; size <= 1<<20; size <<= 1 {
for routines := 1; routines <= 1<<8; routines <<= 1 {
b.Run(fmt.Sprintf("goroutines:%d/size:%d", routines, size), func(b *testing.B) {
cb, err := NewCircularBuffer(uint32(size))
if err != nil {
b.Fatalf("error allocating CircularBuffer: %v", err)
}
perRoutine := b.N / routines
var wg sync.WaitGroup
for r := 0; r < routines; r++ {
wg.Add(1)
go func() {
for i := 0; i < perRoutine; i++ {
cb.Push(&x)
}
wg.Done()
}()
}
wg.Wait()
})
}
}
}

View File

@ -0,0 +1,81 @@
// +build grpcgoid
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package profiling
import (
"runtime"
)
// This stubbed function usually returns zero (see goid_regular.go); however,
// if grpc is built with `-tags 'grpcgoid'`, a runtime.Goid function, which
// does not exist in the Go standard library, is expected. While not necessary,
// sometimes, visualising grpc profiling data in trace-viewer is much nicer
// with goroutines separated from each other.
//
// Several other approaches were considered before arriving at this:
//
// 1. Using a CGO module: CGO usually has access to some things that regular
// Go does not. Till go1.4, CGO used to have access to the goroutine struct
// because the Go runtime was written in C. However, 1.5+ uses a native Go
// runtime; as a result, CGO does not have access to the goroutine structure
// anymore in modern Go. Besides, CGO interop wasn't fast enough (estimated
// to be ~170ns/op). This would also make building grpc require a C
// compiler, which isn't a requirement currently, breaking a lot of stuff.
//
// 2. Using runtime.Stack stacktrace: While this would remove the need for a
// modified Go runtime, this is ridiculously slow, thanks to the all the
// string processing shenanigans required to extract the goroutine ID (about
// ~2000ns/op).
//
// 3. Using Go version-specific build tags: For any given Go version, the
// goroutine struct has a fixed structure. As a result, the goroutine ID
// could be extracted if we know the offset using some assembly. This would
// be faster then #1 and #2, but is harder to maintain. This would require
// special Go code that's both architecture-specific and go version-specific
// (a quadratic number of variants to maintain).
//
// 4. This approach, which requires a simple modification [1] to the Go runtime
// to expose the current goroutine's ID. This is the chosen approach and it
// takes about ~2 ns/op, which is negligible in the face of the tens of
// microseconds that grpc takes to complete a RPC request.
//
// [1] To make the goroutine ID visible to Go programs apply the following
// change to the runtime2.go file in your Go runtime installation:
//
// diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
// --- a/src/runtime/runtime2.go
// +++ b/src/runtime/runtime2.go
// @@ -392,6 +392,10 @@ type stack struct {
// hi uintptr
// }
//
// +func Goid() int64 {
// + return getg().goid
// +}
// +
// type g struct {
// // Stack parameters.
// // stack describes the actual stack memory: [stack.lo, stack.hi).
//
// The exposed runtime.Goid() function will return a int64 goroutine ID.
func goid() int64 {
return runtime.Goid()
}

View File

@ -0,0 +1,29 @@
// +build !grpcgoid
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package profiling
// This dummy function always returns 0. In some modified dev environments,
// this may be replaced with a call to a function in a modified Go runtime that
// retrieves the goroutine ID efficiently. See goid_modified.go for a different
// version of goId that requires a grpcgoid build tag to compile.
func goid() int64 {
return 0
}

View File

@ -0,0 +1,221 @@
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// The profiling package contains two logical components: buffer.go and
// profiling.go. The former implements a circular buffer (a.k.a. ring buffer)
// in a lock-free manner using atomics. This ring buffer is used by
// profiling.go to store various statistics. For example, StreamStats is a
// circular buffer of Stat objects, each of which is comprised of Timers.
//
// This abstraction is designed to accommodate more stats in the future; for
// example, if one wants to profile the load balancing layer, which is
// independent of RPC queries, a separate CircularBuffer can be used.
//
// Note that the circular buffer simply takes any interface{}. In the future,
// more types of measurements (such as the number of memory allocations) could
// be measured, which might require a different type of object being pushed
// into the circular buffer.
package profiling
import (
"sync"
"sync/atomic"
"time"
"google.golang.org/grpc/internal/profiling/buffer"
)
// 0 or 1 representing profiling off and on, respectively. Use IsEnabled and
// Enable to get and set this in a safe manner.
var profilingEnabled uint32
// IsEnabled returns whether or not profiling is enabled.
func IsEnabled() bool {
return atomic.LoadUint32(&profilingEnabled) > 0
}
// Enable turns profiling on and off.
//
// Note that it is impossible to enable profiling for one server and leave it
// turned off for another. This is intentional and by design -- if the status
// of profiling was server-specific, clients wouldn't be able to profile
// themselves. As a result, Enable turns profiling on and off for all servers
// and clients in the binary. Each stat will be, however, tagged with whether
// it's a client stat or a server stat; so you should be able to filter for the
// right type of stats in post-processing.
func Enable(enabled bool) {
if enabled {
atomic.StoreUint32(&profilingEnabled, 1)
} else {
atomic.StoreUint32(&profilingEnabled, 0)
}
}
// A Timer represents the wall-clock beginning and ending of a logical
// operation.
type Timer struct {
// Tags is a comma-separated list of strings (usually forward-slash-separated
// hierarchical strings) used to categorize a Timer.
Tags string
// Begin marks the beginning of this timer. The timezone is unspecified, but
// must use the same timezone as End; this is so shave off the small, but
// non-zero time required to convert to a standard timezone such as UTC.
Begin time.Time
// End marks the end of a timer.
End time.Time
// Each Timer must be started and ended within the same goroutine; GoID
// captures this goroutine ID. The Go runtime does not typically expose this
// information, so this is set to zero in the typical case. However, a
// trivial patch to the runtime package can make this field useful. See
// goid_modified.go in this package for more details.
GoID int64
}
// NewTimer creates and returns a new Timer object. This is useful when you
// don't already have a Stat object to associate this Timer with; for example,
// before the context of a new RPC query is created, a Timer may be needed to
// measure transport-related operations.
//
// Use AppendTimer to append the returned Timer to a Stat.
func NewTimer(tags string) *Timer {
return &Timer{
Tags: tags,
Begin: time.Now(),
GoID: goid(),
}
}
// Egress sets the End field of a timer to the current time.
func (timer *Timer) Egress() {
if timer == nil {
return
}
timer.End = time.Now()
}
// A Stat is a collection of Timers that represent timing information for
// different components within this Stat. For example, a Stat may be used to
// reference the entire lifetime of an RPC request, with Timers within it
// representing different components such as encoding, compression, and
// transport.
//
// The user is expected to use the included helper functions to do operations
// on the Stat such as creating or appending a new timer. Direct operations on
// the Stat's exported fields (which are exported for encoding reasons) may
// lead to data races.
type Stat struct {
// Tags is a comma-separated list of strings used to categorize a Stat.
Tags string
// Stats may also need to store other unstructured information specific to
// this stat. For example, a StreamStat will use these bytes to encode the
// connection ID and stream ID for each RPC to uniquely identify it. The
// encoding that must be used is unspecified.
Metadata []byte
// A collection of *Timers and a mutex for append operations on the slice.
mu sync.Mutex
Timers []*Timer
}
// A power of two that's large enough to hold all timers within an average RPC
// request (defined to be a unary request) without any reallocation. A typical
// unary RPC creates 80-100 timers for various things. While this number is
// purely anecdotal and may change in the future as the resolution of profiling
// increases or decreases, it serves as a good estimate for what the initial
// allocation size should be.
const defaultStatAllocatedTimers int32 = 128
// NewStat creates and returns a new Stat object.
func NewStat(tags string) *Stat {
return &Stat{
Tags: tags,
Timers: make([]*Timer, 0, defaultStatAllocatedTimers),
}
}
// NewTimer creates a Timer object within the given stat if stat is non-nil.
// The value passed in tags will be attached to the newly created Timer.
// NewTimer also automatically sets the Begin value of the Timer to the current
// time. The user is expected to call stat.Egress with the returned index as
// argument to mark the end.
func (stat *Stat) NewTimer(tags string) *Timer {
if stat == nil {
return nil
}
timer := &Timer{
Tags: tags,
GoID: goid(),
Begin: time.Now(),
}
stat.mu.Lock()
stat.Timers = append(stat.Timers, timer)
stat.mu.Unlock()
return timer
}
// AppendTimer appends a given Timer object to the internal slice of timers. A
// deep copy of the timer is made (i.e. no reference is retained to this
// pointer) and the user is expected to lose their reference to the timer to
// allow the Timer object to be garbage collected.
func (stat *Stat) AppendTimer(timer *Timer) {
if stat == nil || timer == nil {
return
}
stat.mu.Lock()
stat.Timers = append(stat.Timers, timer)
stat.mu.Unlock()
}
// statsInitialized is 0 before InitStats has been called. Changed to 1 by
// exactly one call to InitStats.
var statsInitialized int32
// Stats for the last defaultStreamStatsBufsize RPCs will be stored in memory.
// This is can be configured by the registering server at profiling service
// initialization with google.golang.org/grpc/profiling/service.ProfilingConfig
const defaultStreamStatsSize uint32 = 16 << 10
// StreamStats is a CircularBuffer containing data from the last N RPC calls
// served, where N is set by the user. This will contain both server stats and
// client stats (but each stat will be tagged with whether it's a server or a
// client in its Tags).
var StreamStats *buffer.CircularBuffer
// InitStats initializes all the relevant Stat objects. Must be called exactly
// once per lifetime of a process; calls after the first one are ignored.
func InitStats(streamStatsSize uint32) error {
var err error
if !atomic.CompareAndSwapInt32(&statsInitialized, 0, 1) {
// If initialized, do nothing.
return nil
}
if streamStatsSize == 0 {
streamStatsSize = defaultStreamStatsSize
}
StreamStats, err = buffer.NewCircularBuffer(streamStatsSize)
if err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,147 @@
// +build !appengine
/*
*
* Copyright 2019 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package profiling
import (
"fmt"
"strconv"
"sync"
"testing"
"time"
"google.golang.org/grpc/internal/profiling/buffer"
)
func TestProfiling(t *testing.T) {
cb, err := buffer.NewCircularBuffer(128)
if err != nil {
t.Fatalf("error creating circular buffer: %v", err)
}
stat := NewStat("foo")
cb.Push(stat)
bar := func(n int) {
if n%2 == 0 {
defer stat.NewTimer(strconv.Itoa(n)).Egress()
} else {
timer := NewTimer(strconv.Itoa(n))
stat.AppendTimer(timer)
defer timer.Egress()
}
time.Sleep(1 * time.Microsecond)
}
numTimers := int(8 * defaultStatAllocatedTimers)
for i := 0; i < numTimers; i++ {
bar(i)
}
results := cb.Drain()
if len(results) != 1 {
t.Fatalf("len(results) = %d; want 1", len(results))
}
statReturned := results[0].(*Stat)
if stat.Tags != "foo" {
t.Fatalf("stat.Tags = %s; want foo", stat.Tags)
}
if len(stat.Timers) != numTimers {
t.Fatalf("len(stat.Timers) = %d; want %d", len(stat.Timers), numTimers)
}
lastIdx := 0
for i, timer := range statReturned.Timers {
// Check that they're in the order of append.
if n, err := strconv.Atoi(timer.Tags); err != nil && n != lastIdx {
t.Fatalf("stat.Timers[%d].Tags = %s; wanted %d", i, timer.Tags, lastIdx)
}
// Check that the timestamps are consistent.
if diff := timer.End.Sub(timer.Begin); diff.Nanoseconds() < 1000 {
t.Fatalf("stat.Timers[%d].End - stat.Timers[%d].Begin = %v; want >= 1000ns", i, i, diff)
}
lastIdx++
}
}
func TestProfilingRace(t *testing.T) {
stat := NewStat("foo")
var wg sync.WaitGroup
numTimers := int(8 * defaultStatAllocatedTimers) // also tests the slice growth code path
wg.Add(numTimers)
for i := 0; i < numTimers; i++ {
go func(n int) {
defer wg.Done()
if n%2 == 0 {
defer stat.NewTimer(strconv.Itoa(n)).Egress()
} else {
timer := NewTimer(strconv.Itoa(n))
stat.AppendTimer(timer)
defer timer.Egress()
}
}(i)
}
wg.Wait()
if len(stat.Timers) != numTimers {
t.Fatalf("len(stat.Timers) = %d; want %d", len(stat.Timers), numTimers)
}
// The timers need not be ordered, so we can't expect them to be consecutive
// like above.
seen := make(map[int]bool)
for i, timer := range stat.Timers {
n, err := strconv.Atoi(timer.Tags)
if err != nil {
t.Fatalf("stat.Timers[%d].Tags = %s; wanted integer", i, timer.Tags)
}
seen[n] = true
}
for i := 0; i < numTimers; i++ {
if _, ok := seen[i]; !ok {
t.Fatalf("seen[%d] = false or does not exist; want it to be true", i)
}
}
}
func BenchmarkProfiling(b *testing.B) {
for routines := 1; routines <= 1<<8; routines <<= 1 {
b.Run(fmt.Sprintf("goroutines:%d", routines), func(b *testing.B) {
perRoutine := b.N / routines
stat := NewStat("foo")
var wg sync.WaitGroup
wg.Add(routines)
for r := 0; r < routines; r++ {
go func() {
for i := 0; i < perRoutine; i++ {
stat.NewTimer("bar").Egress()
}
wg.Done()
}()
}
wg.Wait()
})
}
}

View File

@ -45,6 +45,11 @@ import (
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
) )
// clientConnectionCounter counts the number of connections a client has
// initiated (equal to the number of http2Clients created). Must be accessed
// atomically.
var clientConnectionCounter uint64
// http2Client implements the ClientTransport interface with HTTP2. // http2Client implements the ClientTransport interface with HTTP2.
type http2Client struct { type http2Client struct {
lastRead int64 // Keep this field 64-bit aligned. Accessed atomically. lastRead int64 // Keep this field 64-bit aligned. Accessed atomically.
@ -126,6 +131,8 @@ type http2Client struct {
onClose func() onClose func()
bufferPool *bufferPool bufferPool *bufferPool
connectionID uint64
} }
func dial(ctx context.Context, fn func(context.Context, string) (net.Conn, error), addr string) (net.Conn, error) { func dial(ctx context.Context, fn func(context.Context, string) (net.Conn, error), addr string) (net.Conn, error) {
@ -329,6 +336,8 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr TargetInfo, opts Conne
} }
} }
t.connectionID = atomic.AddUint64(&clientConnectionCounter, 1)
if err := t.framer.writer.Flush(); err != nil { if err := t.framer.writer.Flush(); err != nil {
return nil, err return nil, err
} }

View File

@ -62,6 +62,10 @@ var (
statusRawProto = internal.StatusRawProto.(func(*status.Status) *spb.Status) statusRawProto = internal.StatusRawProto.(func(*status.Status) *spb.Status)
) )
// serverConnectionCounter counts the number of connections a server has seen
// (equal to the number of http2Servers created). Must be accessed atomically.
var serverConnectionCounter uint64
// http2Server implements the ServerTransport interface with HTTP2. // http2Server implements the ServerTransport interface with HTTP2.
type http2Server struct { type http2Server struct {
lastRead int64 // Keep this field 64-bit aligned. Accessed atomically. lastRead int64 // Keep this field 64-bit aligned. Accessed atomically.
@ -121,6 +125,8 @@ type http2Server struct {
channelzID int64 // channelz unique identification number channelzID int64 // channelz unique identification number
czData *channelzData czData *channelzData
bufferPool *bufferPool bufferPool *bufferPool
connectionID uint64
} }
// newHTTP2Server constructs a ServerTransport based on HTTP2. ConnectionError is // newHTTP2Server constructs a ServerTransport based on HTTP2. ConnectionError is
@ -250,6 +256,9 @@ func newHTTP2Server(conn net.Conn, config *ServerConfig) (_ ServerTransport, err
if channelz.IsOn() { if channelz.IsOn() {
t.channelzID = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.remoteAddr, t.localAddr)) t.channelzID = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.remoteAddr, t.localAddr))
} }
t.connectionID = atomic.AddUint64(&serverConnectionCounter, 1)
t.framer.writer.Flush() t.framer.writer.Flush()
defer func() { defer func() {