mirror of https://github.com/knative/caching.git
233 lines
6.1 KiB
Go
233 lines
6.1 KiB
Go
// Copyright 2019, OpenCensus Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package stackdriver
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
monitoring "cloud.google.com/go/monitoring/apiv3"
|
|
monitoringpb "google.golang.org/genproto/googleapis/monitoring/v3"
|
|
)
|
|
|
|
const (
|
|
minNumWorkers = 1
|
|
minReqsChanSize = 5
|
|
)
|
|
|
|
type metricsBatcher struct {
|
|
projectName string
|
|
allTss []*monitoringpb.TimeSeries
|
|
allErrs []error
|
|
|
|
// Counts all dropped TimeSeries by this metricsBatcher.
|
|
droppedTimeSeries int
|
|
|
|
workers []*worker
|
|
// reqsChan, respsChan and wg are shared between metricsBatcher and worker goroutines.
|
|
reqsChan chan *monitoringpb.CreateTimeSeriesRequest
|
|
respsChan chan *response
|
|
wg *sync.WaitGroup
|
|
}
|
|
|
|
func newMetricsBatcher(ctx context.Context, projectID string, numWorkers int, mc *monitoring.MetricClient, timeout time.Duration) *metricsBatcher {
|
|
if numWorkers < minNumWorkers {
|
|
numWorkers = minNumWorkers
|
|
}
|
|
workers := make([]*worker, 0, numWorkers)
|
|
reqsChanSize := numWorkers
|
|
if reqsChanSize < minReqsChanSize {
|
|
reqsChanSize = minReqsChanSize
|
|
}
|
|
reqsChan := make(chan *monitoringpb.CreateTimeSeriesRequest, reqsChanSize)
|
|
respsChan := make(chan *response, numWorkers)
|
|
var wg sync.WaitGroup
|
|
wg.Add(numWorkers)
|
|
for i := 0; i < numWorkers; i++ {
|
|
w := newWorker(ctx, mc, reqsChan, respsChan, &wg, timeout)
|
|
workers = append(workers, w)
|
|
go w.start()
|
|
}
|
|
return &metricsBatcher{
|
|
projectName: fmt.Sprintf("projects/%s", projectID),
|
|
allTss: make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload),
|
|
droppedTimeSeries: 0,
|
|
workers: workers,
|
|
wg: &wg,
|
|
reqsChan: reqsChan,
|
|
respsChan: respsChan,
|
|
}
|
|
}
|
|
|
|
func (mb *metricsBatcher) recordDroppedTimeseries(numTimeSeries int, errs ...error) {
|
|
mb.droppedTimeSeries += numTimeSeries
|
|
for _, err := range errs {
|
|
if err != nil {
|
|
mb.allErrs = append(mb.allErrs, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (mb *metricsBatcher) addTimeSeries(ts *monitoringpb.TimeSeries) {
|
|
mb.allTss = append(mb.allTss, ts)
|
|
if len(mb.allTss) == maxTimeSeriesPerUpload {
|
|
mb.sendReqToChan()
|
|
mb.allTss = make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload)
|
|
}
|
|
}
|
|
|
|
func (mb *metricsBatcher) close(ctx context.Context) error {
|
|
// Send any remaining time series, must be <200
|
|
if len(mb.allTss) > 0 {
|
|
mb.sendReqToChan()
|
|
}
|
|
|
|
close(mb.reqsChan)
|
|
mb.wg.Wait()
|
|
for i := 0; i < len(mb.workers); i++ {
|
|
resp := <-mb.respsChan
|
|
mb.recordDroppedTimeseries(resp.droppedTimeSeries, resp.errs...)
|
|
}
|
|
close(mb.respsChan)
|
|
|
|
numErrors := len(mb.allErrs)
|
|
if numErrors == 0 {
|
|
return nil
|
|
}
|
|
|
|
if numErrors == 1 {
|
|
return mb.allErrs[0]
|
|
}
|
|
|
|
errMsgs := make([]string, 0, numErrors)
|
|
for _, err := range mb.allErrs {
|
|
errMsgs = append(errMsgs, err.Error())
|
|
}
|
|
return fmt.Errorf("[%s]", strings.Join(errMsgs, "; "))
|
|
}
|
|
|
|
// sendReqToChan grabs all the timeseies in this metricsBatcher, puts them
|
|
// to a CreateTimeSeriesRequest and sends the request to reqsChan.
|
|
func (mb *metricsBatcher) sendReqToChan() {
|
|
req := &monitoringpb.CreateTimeSeriesRequest{
|
|
Name: mb.projectName,
|
|
TimeSeries: mb.allTss,
|
|
}
|
|
mb.reqsChan <- req
|
|
}
|
|
|
|
// regex to extract min-max ranges from error response strings in the format "timeSeries[(min-max,...)] ..." (max is optional)
|
|
var timeSeriesErrRegex = regexp.MustCompile(`: timeSeries\[([0-9]+(?:-[0-9]+)?(?:,[0-9]+(?:-[0-9]+)?)*)\]`)
|
|
|
|
// sendReq sends create time series requests to Stackdriver,
|
|
// and returns the count of dropped time series and error.
|
|
func sendReq(ctx context.Context, c *monitoring.MetricClient, req *monitoringpb.CreateTimeSeriesRequest) (int, error) {
|
|
// c == nil only happens in unit tests where we don't make real calls to Stackdriver server
|
|
if c == nil {
|
|
return 0, nil
|
|
}
|
|
|
|
err := createTimeSeries(ctx, c, req)
|
|
if err == nil {
|
|
return 0, nil
|
|
}
|
|
|
|
droppedTimeSeriesRangeMatches := timeSeriesErrRegex.FindAllStringSubmatch(err.Error(), -1)
|
|
if !strings.HasPrefix(err.Error(), "One or more TimeSeries could not be written:") || len(droppedTimeSeriesRangeMatches) == 0 {
|
|
return len(req.TimeSeries), err
|
|
}
|
|
|
|
dropped := 0
|
|
for _, submatches := range droppedTimeSeriesRangeMatches {
|
|
for i := 1; i < len(submatches); i++ {
|
|
for _, rng := range strings.Split(submatches[i], ",") {
|
|
rngSlice := strings.Split(rng, "-")
|
|
|
|
// strconv errors not possible due to regex above
|
|
min, _ := strconv.Atoi(rngSlice[0])
|
|
max := min
|
|
if len(rngSlice) > 1 {
|
|
max, _ = strconv.Atoi(rngSlice[1])
|
|
}
|
|
|
|
dropped += max - min + 1
|
|
}
|
|
}
|
|
}
|
|
return dropped, err
|
|
}
|
|
|
|
type worker struct {
|
|
ctx context.Context
|
|
timeout time.Duration
|
|
mc *monitoring.MetricClient
|
|
|
|
resp *response
|
|
|
|
respsChan chan *response
|
|
reqsChan chan *monitoringpb.CreateTimeSeriesRequest
|
|
|
|
wg *sync.WaitGroup
|
|
}
|
|
|
|
func newWorker(
|
|
ctx context.Context,
|
|
mc *monitoring.MetricClient,
|
|
reqsChan chan *monitoringpb.CreateTimeSeriesRequest,
|
|
respsChan chan *response,
|
|
wg *sync.WaitGroup,
|
|
timeout time.Duration) *worker {
|
|
return &worker{
|
|
ctx: ctx,
|
|
mc: mc,
|
|
resp: &response{},
|
|
reqsChan: reqsChan,
|
|
respsChan: respsChan,
|
|
wg: wg,
|
|
}
|
|
}
|
|
|
|
func (w *worker) start() {
|
|
for req := range w.reqsChan {
|
|
w.sendReqWithTimeout(req)
|
|
}
|
|
w.respsChan <- w.resp
|
|
w.wg.Done()
|
|
}
|
|
|
|
func (w *worker) sendReqWithTimeout(req *monitoringpb.CreateTimeSeriesRequest) {
|
|
ctx, cancel := newContextWithTimeout(w.ctx, w.timeout)
|
|
defer cancel()
|
|
|
|
w.recordDroppedTimeseries(sendReq(ctx, w.mc, req))
|
|
}
|
|
|
|
func (w *worker) recordDroppedTimeseries(numTimeSeries int, err error) {
|
|
w.resp.droppedTimeSeries += numTimeSeries
|
|
if err != nil {
|
|
w.resp.errs = append(w.resp.errs, err)
|
|
}
|
|
}
|
|
|
|
type response struct {
|
|
droppedTimeSeries int
|
|
errs []error
|
|
}
|