ocsp/responder: fix goroutine leak in multiSource (#6116)

In multiSource, if the primary source returned before the secondary source, we would leak the goroutine waiting on results from the secondary source. This was because that goroutine was writing to a channel that no longer had any readers. Fix that by making the channel in getResponse buffered, so getResponse can always write to its channel, regardless of whether there are readers. I've confirmed that the unittest fails when run without the fix.
2022-05-16 11:41:58 -07:00 · 2022-05-16 11:41:58 -07:00 · 81f724ba53
parent 8e156f4dd9
commit 81f724ba53
2 changed files with 40 additions and 1 deletions
--- a/ocsp/responder/multi_source.go
+++ b/ocsp/responder/multi_source.go
@ -109,7 +109,10 @@ type responseResult struct {
 // getResponse provides a thin wrapper around an underlying Source's Response
 // method, calling it in a goroutine and passing the result back on a channel.
 func getResponse(ctx context.Context, src Source, req *ocsp.Request) chan responseResult {
-	responseChan := make(chan responseResult)
+	// Use a buffer so the following goroutine can exit as soon as it's done,
+	// rather than blocking on a reader (which would introduce a risk that the
+	// nother ever reads, leaking the goroutine).
+	responseChan := make(chan responseResult, 1)

 	go func() {
 		defer close(responseChan)
--- a/ocsp/responder/multi_source_test.go
+++ b/ocsp/responder/multi_source_test.go
@ -3,7 +3,9 @@ package responder
 import (
 	"context"
 	"errors"
+	"runtime"
 	"testing"
+	"time"

 	blog "github.com/letsencrypt/boulder/log"
 	"github.com/letsencrypt/boulder/metrics"
@ -30,6 +32,40 @@ func (src *failSource) Response(context.Context, *ocsp.Request) (*Response, erro
 	return nil, errors.New("failure")
 }

+// timeoutSource is a Source that will not return until its chan is closed.
+type timeoutSource struct {
+	ch <-chan struct{}
+}
+
+func (src *timeoutSource) Response(context.Context, *ocsp.Request) (*Response, error) {
+	<-src.ch
+	return nil, errors.New("failure")
+}
+
+func TestSecondaryTimeout(t *testing.T) {
+	ch := make(chan struct{})
+	src, err := NewMultiSource(&succeedSource{}, &timeoutSource{ch: ch}, metrics.NoopRegisterer, blog.NewMock())
+	test.AssertNotError(t, err, "failed to create multiSource")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
+	defer cancel()
+
+	starting_goroutines := runtime.NumGoroutine()
+
+	for i := 0; i < 1000; i++ {
+		_, err = src.Response(ctx, &ocsp.Request{})
+		test.AssertNotError(t, err, "unexpected error")
+	}
+
+	close(ch)
+	// Wait for the goroutines to exit
+	time.Sleep(40 * time.Millisecond)
+	goroutine_diff := runtime.NumGoroutine() - starting_goroutines
+	if goroutine_diff > 0 {
+		t.Fatalf("expected no lingering goroutines. found %d", goroutine_diff)
+	}
+}
+
 func TestBothGood(t *testing.T) {
 	src, err := NewMultiSource(&succeedSource{}, &succeedSource{}, metrics.NoopRegisterer, blog.NewMock())
 	test.AssertNotError(t, err, "failed to create multiSource")