caching/vendor/contrib.go.opencensus.io/exporter/stackdriver/metrics_batcher.go

233 lines
6.1 KiB
Go

// Copyright 2019, OpenCensus Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stackdriver
import (
"context"
"fmt"
"regexp"
"strconv"
"strings"
"sync"
"time"
monitoring "cloud.google.com/go/monitoring/apiv3"
monitoringpb "google.golang.org/genproto/googleapis/monitoring/v3"
)
const (
minNumWorkers = 1
minReqsChanSize = 5
)
type metricsBatcher struct {
projectName string
allTss []*monitoringpb.TimeSeries
allErrs []error
// Counts all dropped TimeSeries by this metricsBatcher.
droppedTimeSeries int
workers []*worker
// reqsChan, respsChan and wg are shared between metricsBatcher and worker goroutines.
reqsChan chan *monitoringpb.CreateTimeSeriesRequest
respsChan chan *response
wg *sync.WaitGroup
}
func newMetricsBatcher(ctx context.Context, projectID string, numWorkers int, mc *monitoring.MetricClient, timeout time.Duration) *metricsBatcher {
if numWorkers < minNumWorkers {
numWorkers = minNumWorkers
}
workers := make([]*worker, 0, numWorkers)
reqsChanSize := numWorkers
if reqsChanSize < minReqsChanSize {
reqsChanSize = minReqsChanSize
}
reqsChan := make(chan *monitoringpb.CreateTimeSeriesRequest, reqsChanSize)
respsChan := make(chan *response, numWorkers)
var wg sync.WaitGroup
wg.Add(numWorkers)
for i := 0; i < numWorkers; i++ {
w := newWorker(ctx, mc, reqsChan, respsChan, &wg, timeout)
workers = append(workers, w)
go w.start()
}
return &metricsBatcher{
projectName: fmt.Sprintf("projects/%s", projectID),
allTss: make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload),
droppedTimeSeries: 0,
workers: workers,
wg: &wg,
reqsChan: reqsChan,
respsChan: respsChan,
}
}
func (mb *metricsBatcher) recordDroppedTimeseries(numTimeSeries int, errs ...error) {
mb.droppedTimeSeries += numTimeSeries
for _, err := range errs {
if err != nil {
mb.allErrs = append(mb.allErrs, err)
}
}
}
func (mb *metricsBatcher) addTimeSeries(ts *monitoringpb.TimeSeries) {
mb.allTss = append(mb.allTss, ts)
if len(mb.allTss) == maxTimeSeriesPerUpload {
mb.sendReqToChan()
mb.allTss = make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload)
}
}
func (mb *metricsBatcher) close(ctx context.Context) error {
// Send any remaining time series, must be <200
if len(mb.allTss) > 0 {
mb.sendReqToChan()
}
close(mb.reqsChan)
mb.wg.Wait()
for i := 0; i < len(mb.workers); i++ {
resp := <-mb.respsChan
mb.recordDroppedTimeseries(resp.droppedTimeSeries, resp.errs...)
}
close(mb.respsChan)
numErrors := len(mb.allErrs)
if numErrors == 0 {
return nil
}
if numErrors == 1 {
return mb.allErrs[0]
}
errMsgs := make([]string, 0, numErrors)
for _, err := range mb.allErrs {
errMsgs = append(errMsgs, err.Error())
}
return fmt.Errorf("[%s]", strings.Join(errMsgs, "; "))
}
// sendReqToChan grabs all the timeseies in this metricsBatcher, puts them
// to a CreateTimeSeriesRequest and sends the request to reqsChan.
func (mb *metricsBatcher) sendReqToChan() {
req := &monitoringpb.CreateTimeSeriesRequest{
Name: mb.projectName,
TimeSeries: mb.allTss,
}
mb.reqsChan <- req
}
// regex to extract min-max ranges from error response strings in the format "timeSeries[(min-max,...)] ..." (max is optional)
var timeSeriesErrRegex = regexp.MustCompile(`: timeSeries\[([0-9]+(?:-[0-9]+)?(?:,[0-9]+(?:-[0-9]+)?)*)\]`)
// sendReq sends create time series requests to Stackdriver,
// and returns the count of dropped time series and error.
func sendReq(ctx context.Context, c *monitoring.MetricClient, req *monitoringpb.CreateTimeSeriesRequest) (int, error) {
// c == nil only happens in unit tests where we don't make real calls to Stackdriver server
if c == nil {
return 0, nil
}
err := createTimeSeries(ctx, c, req)
if err == nil {
return 0, nil
}
droppedTimeSeriesRangeMatches := timeSeriesErrRegex.FindAllStringSubmatch(err.Error(), -1)
if !strings.HasPrefix(err.Error(), "One or more TimeSeries could not be written:") || len(droppedTimeSeriesRangeMatches) == 0 {
return len(req.TimeSeries), err
}
dropped := 0
for _, submatches := range droppedTimeSeriesRangeMatches {
for i := 1; i < len(submatches); i++ {
for _, rng := range strings.Split(submatches[i], ",") {
rngSlice := strings.Split(rng, "-")
// strconv errors not possible due to regex above
min, _ := strconv.Atoi(rngSlice[0])
max := min
if len(rngSlice) > 1 {
max, _ = strconv.Atoi(rngSlice[1])
}
dropped += max - min + 1
}
}
}
return dropped, err
}
type worker struct {
ctx context.Context
timeout time.Duration
mc *monitoring.MetricClient
resp *response
respsChan chan *response
reqsChan chan *monitoringpb.CreateTimeSeriesRequest
wg *sync.WaitGroup
}
func newWorker(
ctx context.Context,
mc *monitoring.MetricClient,
reqsChan chan *monitoringpb.CreateTimeSeriesRequest,
respsChan chan *response,
wg *sync.WaitGroup,
timeout time.Duration) *worker {
return &worker{
ctx: ctx,
mc: mc,
resp: &response{},
reqsChan: reqsChan,
respsChan: respsChan,
wg: wg,
}
}
func (w *worker) start() {
for req := range w.reqsChan {
w.sendReqWithTimeout(req)
}
w.respsChan <- w.resp
w.wg.Done()
}
func (w *worker) sendReqWithTimeout(req *monitoringpb.CreateTimeSeriesRequest) {
ctx, cancel := newContextWithTimeout(w.ctx, w.timeout)
defer cancel()
w.recordDroppedTimeseries(sendReq(ctx, w.mc, req))
}
func (w *worker) recordDroppedTimeseries(numTimeSeries int, err error) {
w.resp.droppedTimeSeries += numTimeSeries
if err != nil {
w.resp.errs = append(w.resp.errs, err)
}
}
type response struct {
droppedTimeSeries int
errs []error
}