// Copyright 2019, OpenCensus Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package stackdriver import ( "context" "fmt" "regexp" "strconv" "strings" "sync" "time" monitoring "cloud.google.com/go/monitoring/apiv3" monitoringpb "google.golang.org/genproto/googleapis/monitoring/v3" ) const ( minNumWorkers = 1 minReqsChanSize = 5 ) type metricsBatcher struct { projectName string allTss []*monitoringpb.TimeSeries allErrs []error // Counts all dropped TimeSeries by this metricsBatcher. droppedTimeSeries int workers []*worker // reqsChan, respsChan and wg are shared between metricsBatcher and worker goroutines. reqsChan chan *monitoringpb.CreateTimeSeriesRequest respsChan chan *response wg *sync.WaitGroup } func newMetricsBatcher(ctx context.Context, projectID string, numWorkers int, mc *monitoring.MetricClient, timeout time.Duration) *metricsBatcher { if numWorkers < minNumWorkers { numWorkers = minNumWorkers } workers := make([]*worker, 0, numWorkers) reqsChanSize := numWorkers if reqsChanSize < minReqsChanSize { reqsChanSize = minReqsChanSize } reqsChan := make(chan *monitoringpb.CreateTimeSeriesRequest, reqsChanSize) respsChan := make(chan *response, numWorkers) var wg sync.WaitGroup wg.Add(numWorkers) for i := 0; i < numWorkers; i++ { w := newWorker(ctx, mc, reqsChan, respsChan, &wg, timeout) workers = append(workers, w) go w.start() } return &metricsBatcher{ projectName: fmt.Sprintf("projects/%s", projectID), allTss: make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload), droppedTimeSeries: 0, workers: workers, wg: &wg, reqsChan: reqsChan, respsChan: respsChan, } } func (mb *metricsBatcher) recordDroppedTimeseries(numTimeSeries int, errs ...error) { mb.droppedTimeSeries += numTimeSeries for _, err := range errs { if err != nil { mb.allErrs = append(mb.allErrs, err) } } } func (mb *metricsBatcher) addTimeSeries(ts *monitoringpb.TimeSeries) { mb.allTss = append(mb.allTss, ts) if len(mb.allTss) == maxTimeSeriesPerUpload { mb.sendReqToChan() mb.allTss = make([]*monitoringpb.TimeSeries, 0, maxTimeSeriesPerUpload) } } func (mb *metricsBatcher) close(ctx context.Context) error { // Send any remaining time series, must be <200 if len(mb.allTss) > 0 { mb.sendReqToChan() } close(mb.reqsChan) mb.wg.Wait() for i := 0; i < len(mb.workers); i++ { resp := <-mb.respsChan mb.recordDroppedTimeseries(resp.droppedTimeSeries, resp.errs...) } close(mb.respsChan) numErrors := len(mb.allErrs) if numErrors == 0 { return nil } if numErrors == 1 { return mb.allErrs[0] } errMsgs := make([]string, 0, numErrors) for _, err := range mb.allErrs { errMsgs = append(errMsgs, err.Error()) } return fmt.Errorf("[%s]", strings.Join(errMsgs, "; ")) } // sendReqToChan grabs all the timeseies in this metricsBatcher, puts them // to a CreateTimeSeriesRequest and sends the request to reqsChan. func (mb *metricsBatcher) sendReqToChan() { req := &monitoringpb.CreateTimeSeriesRequest{ Name: mb.projectName, TimeSeries: mb.allTss, } mb.reqsChan <- req } // regex to extract min-max ranges from error response strings in the format "timeSeries[(min-max,...)] ..." (max is optional) var timeSeriesErrRegex = regexp.MustCompile(`: timeSeries\[([0-9]+(?:-[0-9]+)?(?:,[0-9]+(?:-[0-9]+)?)*)\]`) // sendReq sends create time series requests to Stackdriver, // and returns the count of dropped time series and error. func sendReq(ctx context.Context, c *monitoring.MetricClient, req *monitoringpb.CreateTimeSeriesRequest) (int, error) { // c == nil only happens in unit tests where we don't make real calls to Stackdriver server if c == nil { return 0, nil } err := createTimeSeries(ctx, c, req) if err == nil { return 0, nil } droppedTimeSeriesRangeMatches := timeSeriesErrRegex.FindAllStringSubmatch(err.Error(), -1) if !strings.HasPrefix(err.Error(), "One or more TimeSeries could not be written:") || len(droppedTimeSeriesRangeMatches) == 0 { return len(req.TimeSeries), err } dropped := 0 for _, submatches := range droppedTimeSeriesRangeMatches { for i := 1; i < len(submatches); i++ { for _, rng := range strings.Split(submatches[i], ",") { rngSlice := strings.Split(rng, "-") // strconv errors not possible due to regex above min, _ := strconv.Atoi(rngSlice[0]) max := min if len(rngSlice) > 1 { max, _ = strconv.Atoi(rngSlice[1]) } dropped += max - min + 1 } } } return dropped, err } type worker struct { ctx context.Context timeout time.Duration mc *monitoring.MetricClient resp *response respsChan chan *response reqsChan chan *monitoringpb.CreateTimeSeriesRequest wg *sync.WaitGroup } func newWorker( ctx context.Context, mc *monitoring.MetricClient, reqsChan chan *monitoringpb.CreateTimeSeriesRequest, respsChan chan *response, wg *sync.WaitGroup, timeout time.Duration) *worker { return &worker{ ctx: ctx, mc: mc, resp: &response{}, reqsChan: reqsChan, respsChan: respsChan, wg: wg, } } func (w *worker) start() { for req := range w.reqsChan { w.sendReqWithTimeout(req) } w.respsChan <- w.resp w.wg.Done() } func (w *worker) sendReqWithTimeout(req *monitoringpb.CreateTimeSeriesRequest) { ctx, cancel := newContextWithTimeout(w.ctx, w.timeout) defer cancel() w.recordDroppedTimeseries(sendReq(ctx, w.mc, req)) } func (w *worker) recordDroppedTimeseries(numTimeSeries int, err error) { w.resp.droppedTimeSeries += numTimeSeries if err != nil { w.resp.errs = append(w.resp.errs, err) } } type response struct { droppedTimeSeries int errs []error }