From ec2d624ac9eebaeacb5ff3be128b3d6e0c4f9fec Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Wed, 23 Apr 2025 11:23:29 +0530 Subject: [PATCH] xds: generic xds client resource watching e2e (#8183) --- xds/internal/clients/config.go | 10 + .../clients/grpctransport/examples_test.go | 39 + .../clients/grpctransport/grpc_transport.go | 104 +- .../grpctransport/grpc_transport_ext_test.go | 316 ++++ .../grpctransport/grpc_transport_test.go | 101 +- xds/internal/clients/internal/internal.go | 13 - .../clients/internal/internal_test.go | 2 - .../clients/internal/testutils/channel.go | 28 + .../internal/testutils/e2e/clientresources.go | 9 - .../clients/internal/testutils/wrappers.go | 74 + xds/internal/clients/transport_builder.go | 2 +- xds/internal/clients/xdsclient/authority.go | 876 +++++++++++ .../clients/xdsclient/channel_test.go | 16 +- .../clients/xdsclient/clientimpl_watchers.go | 102 ++ .../clients/xdsclient/helpers_test.go | 9 - xds/internal/clients/xdsclient/logging.go | 36 + .../clients/xdsclient/test/authority_test.go | 335 ++++ .../clients/xdsclient/test/dump_test.go | 478 ++++++ .../clients/xdsclient/test/helpers_test.go | 264 ++++ .../xdsclient/test/lds_watchers_test.go | 1385 +++++++++++++++++ .../xdsclient/test/misc_watchers_test.go | 530 +++++++ xds/internal/clients/xdsclient/xdsclient.go | 398 ++++- .../clients/xdsclient/xdsclient_test.go | 124 ++ xds/internal/clients/xdsclient/xdsconfig.go | 4 + 24 files changed, 5146 insertions(+), 109 deletions(-) create mode 100644 xds/internal/clients/grpctransport/examples_test.go create mode 100644 xds/internal/clients/grpctransport/grpc_transport_ext_test.go create mode 100644 xds/internal/clients/internal/testutils/wrappers.go create mode 100644 xds/internal/clients/xdsclient/authority.go create mode 100644 xds/internal/clients/xdsclient/clientimpl_watchers.go create mode 100644 xds/internal/clients/xdsclient/logging.go create mode 100644 xds/internal/clients/xdsclient/test/authority_test.go create mode 100644 xds/internal/clients/xdsclient/test/dump_test.go create mode 100644 xds/internal/clients/xdsclient/test/helpers_test.go create mode 100644 xds/internal/clients/xdsclient/test/lds_watchers_test.go create mode 100644 xds/internal/clients/xdsclient/test/misc_watchers_test.go create mode 100644 xds/internal/clients/xdsclient/xdsclient_test.go diff --git a/xds/internal/clients/config.go b/xds/internal/clients/config.go index a2220f995..0ededfae7 100644 --- a/xds/internal/clients/config.go +++ b/xds/internal/clients/config.go @@ -59,6 +59,16 @@ type ServerIdentifier struct { // // For example, a custom TransportBuilder might use this field to // configure a specific security credentials. + // + // Extensions may be any type that is comparable, as they are used as map + // keys internally. If Extensions are not able to be used as a map key, + // the client may panic. + // + // See: https://go.dev/ref/spec#Comparison_operators + // + // Any equivalent extensions in all ServerIdentifiers present in a single + // client's configuration should have the same value. Not following this + // restriction may result in excess resource usage. Extensions any } diff --git a/xds/internal/clients/grpctransport/examples_test.go b/xds/internal/clients/grpctransport/examples_test.go new file mode 100644 index 000000000..40b0cb271 --- /dev/null +++ b/xds/internal/clients/grpctransport/examples_test.go @@ -0,0 +1,39 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package grpctransport_test + +import ( + "fmt" + + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" +) + +// ExampleServerIdentifierExtension demonstrates how to create +// clients.ServerIdentifier with grpctransport.ServerIdentifierExtension as +// its extensions. +// +// This example is creating clients.ServerIdentifier to connect to server at +// localhost:5678 using the credentials named "local". Note that "local" must +// exist as an entry in the provided credentials to grpctransport.Builder. +func ExampleServerIdentifierExtension() { + // Note the Extensions field is set by value and not by pointer. + fmt.Printf("%+v", clients.ServerIdentifier{ServerURI: "localhost:5678", Extensions: grpctransport.ServerIdentifierExtension{Credentials: "local"}}) + // Output: {ServerURI:localhost:5678 Extensions:{Credentials:local}} +} diff --git a/xds/internal/clients/grpctransport/grpc_transport.go b/xds/internal/clients/grpctransport/grpc_transport.go index 9040be3cd..e13c982a5 100644 --- a/xds/internal/clients/grpctransport/grpc_transport.go +++ b/xds/internal/clients/grpctransport/grpc_transport.go @@ -23,25 +23,55 @@ package grpctransport import ( "context" "fmt" + "sync" "time" "google.golang.org/grpc" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/grpclog" "google.golang.org/grpc/keepalive" "google.golang.org/grpc/xds/internal/clients" ) +var ( + logger = grpclog.Component("grpctransport") +) + // ServerIdentifierExtension holds settings for connecting to a gRPC server, // such as an xDS management or an LRS server. +// +// It must be set by value (not pointer) in the +// clients.ServerIdentifier.Extensions field (See Example). type ServerIdentifierExtension struct { - // Credentials will be used for all gRPC transports. If it is unset, - // transport creation will fail. - Credentials credentials.Bundle + // Credentials is name of the credentials to use for this transport to the + // server. It must be present in the map passed to NewBuilder. + Credentials string } // Builder creates gRPC-based Transports. It must be paired with ServerIdentifiers // that contain an Extension field of type ServerIdentifierExtension. -type Builder struct{} +type Builder struct { + // credentials is a map of credentials names to credentials.Bundle which + // can be used to connect to the server. + credentials map[string]credentials.Bundle + + mu sync.Mutex + // connections is a map of clients.ServerIdentifiers in use by the Builder + // to connect to different servers. + connections map[clients.ServerIdentifier]*grpc.ClientConn + refs map[clients.ServerIdentifier]int +} + +// NewBuilder provides a builder for creating gRPC-based Transports using +// the credentials from provided map of credentials names to +// credentials.Bundle. +func NewBuilder(credentials map[string]credentials.Bundle) *Builder { + return &Builder{ + credentials: credentials, + connections: make(map[clients.ServerIdentifier]*grpc.ClientConn), + refs: make(map[clients.ServerIdentifier]int), + } +} // Build returns a gRPC-based clients.Transport. // @@ -57,14 +87,24 @@ func (b *Builder) Build(si clients.ServerIdentifier) (clients.Transport, error) if !ok { return nil, fmt.Errorf("grpctransport: Extensions field is %T, but must be %T in ServerIdentifier", si.Extensions, ServerIdentifierExtension{}) } - if sce.Credentials == nil { - return nil, fmt.Errorf("grptransport: Credentials field is not set in ServerIdentifierExtension") + + creds, ok := b.credentials[sce.Credentials] + if !ok { + return nil, fmt.Errorf("grpctransport: unknown credentials type %q specified in extensions", sce.Credentials) } - // TODO: Incorporate reference count map for existing transports and - // deduplicate transports based on the provided ServerIdentifier so that - // transport channel to same server can be shared between xDS and LRS - // client. + b.mu.Lock() + defer b.mu.Unlock() + + if cc, ok := b.connections[si]; ok { + if logger.V(2) { + logger.Info("Reusing existing connection to the server for ServerIdentifier: %v", si) + } + b.refs[si]++ + tr := &grpcTransport{cc: cc} + tr.cleanup = b.cleanupFunc(si, tr) + return tr, nil + } // Create a new gRPC client/channel for the server with the provided // credentials, server URI, and a byte codec to send and receive messages. @@ -74,16 +114,50 @@ func (b *Builder) Build(si clients.ServerIdentifier) (clients.Transport, error) Time: 5 * time.Minute, Timeout: 20 * time.Second, }) - cc, err := grpc.NewClient(si.ServerURI, kpCfg, grpc.WithCredentialsBundle(sce.Credentials), grpc.WithDefaultCallOptions(grpc.ForceCodec(&byteCodec{}))) + cc, err := grpc.NewClient(si.ServerURI, kpCfg, grpc.WithCredentialsBundle(creds), grpc.WithDefaultCallOptions(grpc.ForceCodec(&byteCodec{}))) if err != nil { - return nil, fmt.Errorf("grpctransport: failed to create transport to server %q: %v", si.ServerURI, err) + return nil, fmt.Errorf("grpctransport: failed to create connection to server %q: %v", si.ServerURI, err) } + tr := &grpcTransport{cc: cc} + // Register a cleanup function that decrements the refs to the gRPC + // transport each time Close() is called to close it and remove from + // transports and connections map if last reference is being released. + tr.cleanup = b.cleanupFunc(si, tr) - return &grpcTransport{cc: cc}, nil + // Add the newly created connection to the maps to re-use the transport + // channel and track references. + b.connections[si] = cc + b.refs[si] = 1 + + if logger.V(2) { + logger.Info("Created a new transport to the server for ServerIdentifier: %v", si) + } + return tr, nil +} + +func (b *Builder) cleanupFunc(si clients.ServerIdentifier, tr *grpcTransport) func() { + return sync.OnceFunc(func() { + b.mu.Lock() + defer b.mu.Unlock() + + b.refs[si]-- + if b.refs[si] != 0 { + return + } + + tr.cc.Close() + tr.cc = nil + delete(b.connections, si) + delete(b.refs, si) + }) } type grpcTransport struct { cc *grpc.ClientConn + + // cleanup is the function to be invoked for releasing the references to + // the gRPC transport each time Close() is called. + cleanup func() } // NewStream creates a new gRPC stream to the server for the specified method. @@ -96,8 +170,8 @@ func (g *grpcTransport) NewStream(ctx context.Context, method string) (clients.S } // Close closes the gRPC channel to the server. -func (g *grpcTransport) Close() error { - return g.cc.Close() +func (g *grpcTransport) Close() { + g.cleanup() } type stream struct { diff --git a/xds/internal/clients/grpctransport/grpc_transport_ext_test.go b/xds/internal/clients/grpctransport/grpc_transport_ext_test.go new file mode 100644 index 000000000..b34a20570 --- /dev/null +++ b/xds/internal/clients/grpctransport/grpc_transport_ext_test.go @@ -0,0 +1,316 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package grpctransport_test + +import ( + "context" + "sync" + "testing" + "time" + + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/credentials/local" + "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/internal/testutils" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" +) + +const ( + defaultTestTimeout = 10 * time.Second + defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen. +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +type testCredentials struct { + credentials.Bundle + transportCredentials credentials.TransportCredentials +} + +func (tc *testCredentials) TransportCredentials() credentials.TransportCredentials { + return tc.transportCredentials +} +func (tc *testCredentials) PerRPCCredentials() credentials.PerRPCCredentials { + return nil +} + +// TestBuild_Single tests that multiple calls to Build() with the same +// clients.ServerIdentifier returns the same transport. Also verifies that +// only when all references to the newly created transport are released, +// the underlying transport is closed. +func (s) TestBuild_Single(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + lis := testutils.NewListenerWrapper(t, nil) + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis}) + + serverID := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "local"}, + } + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + } + + // Calling Build() first time should create new gRPC transport. + builder := grpctransport.NewBuilder(credentials) + tr, err := builder.Build(serverID) + if err != nil { + t.Fatalf("Failed to build transport: %v", err) + } + // Create a new stream to the server and verify that a new transport is + // created. + if _, err = tr.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Fatalf("Failed to create stream: %v", err) + } + val, err := lis.NewConnCh.Receive(ctx) + if err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } + conn := val.(*testutils.ConnWrapper) + + // Calling Build() again should not create new gRPC transport. + const count = 9 + transports := make([]clients.Transport, count) + for i := 0; i < count; i++ { + func() { + transports[i], err = builder.Build(serverID) + if err != nil { + t.Fatalf("Failed to build transport: %v", err) + } + // Create a new stream to the server and verify that no connection + // is established to the management server at this point. A new + // transport is created only when an existing connection for + // serverID does not exist. + if _, err = tr.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Fatalf("Failed to create stream: %v", err) + } + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected new transport created to management server") + } + }() + } + + // Call Close() multiple times on each of the transport received in the + // above for loop. Close() calls are idempotent. The underlying gRPC + // transport is removed after the Close() call but calling close second + // time should not panic and underlying gRPC transport should not be + // closed. + for i := 0; i < count; i++ { + func() { + transports[i].Close() + transports[i].Close() + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected transport closure to management server") + } + }() + } + + // Call the last Close(). The underlying gRPC transport should be closed + // because calls in the above for loop have released all references. + tr.Close() + if _, err := conn.CloseCh.Receive(ctx); err != nil { + t.Fatal("Timeout when waiting for connection to management server to be closed") + } + + // Calling Build() again, after the previous transport was actually closed, + // should create a new one. + tr2, err := builder.Build(serverID) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer tr2.Close() + // Create a new stream to the server and verify that a new transport is + // created. + if _, err = tr2.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Fatalf("Failed to create stream: %v", err) + } + if _, err := lis.NewConnCh.Receive(ctx); err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } +} + +// TestBuild_Multiple tests the scenario where there are multiple calls to +// Build() with different clients.ServerIdentifier. Verifies that reference +// counts are tracked correctly for each transport and that only when all +// references are released for a transport, it is closed. +func (s) TestBuild_Multiple(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + lis := testutils.NewListenerWrapper(t, nil) + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis}) + + serverID1 := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "local"}, + } + serverID2 := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + "insecure": insecure.NewBundle(), + } + + // Create two gRPC transports. + builder := grpctransport.NewBuilder(credentials) + + tr1, err := builder.Build(serverID1) + if err != nil { + t.Fatalf("Failed to build transport: %v", err) + } + // Create a new stream to the server and verify that a new transport is + // created. + if _, err = tr1.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Fatalf("Failed to create stream: %v", err) + } + val, err := lis.NewConnCh.Receive(ctx) + if err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } + conn1 := val.(*testutils.ConnWrapper) + + tr2, err := builder.Build(serverID2) + if err != nil { + t.Fatalf("Failed to build transport: %v", err) + } + // Create a new stream to the server and verify that a new transport is + // created because credentials are different. + if _, err = tr2.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Fatalf("Failed to create stream: %v", err) + } + val, err = lis.NewConnCh.Receive(ctx) + if err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } + conn2 := val.(*testutils.ConnWrapper) + + // Create N more references to each of the two transports. + const count = 9 + transports1 := make([]clients.Transport, count) + transports2 := make([]clients.Transport, count) + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + for i := 0; i < count; i++ { + var err error + transports1[i], err = builder.Build(serverID1) + if err != nil { + t.Errorf("Failed to build transport: %v", err) + } + // Create a new stream to the server and verify that no connection + // is established to the management server at this point. A new + // transport is created only when an existing connection for + // serverID does not exist. + if _, err = transports1[i].NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Errorf("Failed to create stream: %v", err) + } + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Error("Unexpected new transport created to management server") + } + } + }() + go func() { + defer wg.Done() + for i := 0; i < count; i++ { + var err error + transports2[i], err = builder.Build(serverID2) + if err != nil { + t.Errorf("%d-th call to Build() failed with error: %v", i, err) + } + // Create a new stream to the server and verify that no connection + // is established to the management server at this point. A new + // transport is created only when an existing connection for + // serverID does not exist. + if _, err = transports2[i].NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources"); err != nil { + t.Errorf("Failed to create stream: %v", err) + } + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Error("Unexpected new transport created to management server") + } + } + }() + wg.Wait() + if t.Failed() { + t.FailNow() + } + + // Call Close() multiple times on each of the transport received in the + // above for loop. Close() calls are idempotent. The underlying gRPC + // transport is removed after the Close() call but calling close second + // time should not panic and underlying gRPC transport should not be + // closed. + for i := 0; i < count; i++ { + transports1[i].Close() + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := conn1.CloseCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected transport closure to management server") + } + transports1[i].Close() + } + // Call the last Close(). The underlying gRPC transport should be closed + // because calls in the above for loop have released all references. + tr1.Close() + if _, err := conn1.CloseCh.Receive(ctx); err != nil { + t.Fatal("Timeout when waiting for connection to management server to be closed") + } + + // Call Close() multiple times on each of the transport received in the + // above for loop. Close() calls are idempotent. The underlying gRPC + // transport is removed after the Close() call but calling close second + // time should not panic and underlying gRPC transport should not be + // closed. + for i := 0; i < count; i++ { + transports2[i].Close() + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := conn2.CloseCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected transport closure to management server") + } + transports2[i].Close() + } + // Call the last Close(). The underlying gRPC transport should be closed + // because calls in the above for loop have released all references. + tr2.Close() + if _, err := conn2.CloseCh.Receive(ctx); err != nil { + t.Fatal("Timeout when waiting for connection to management server to be closed") + } +} diff --git a/xds/internal/clients/grpctransport/grpc_transport_test.go b/xds/internal/clients/grpctransport/grpc_transport_test.go index cb308d8e6..ab2cd7c1b 100644 --- a/xds/internal/clients/grpctransport/grpc_transport_test.go +++ b/xds/internal/clients/grpctransport/grpc_transport_test.go @@ -40,7 +40,8 @@ import ( ) const ( - defaultTestTimeout = 10 * time.Second + defaultTestTimeout = 10 * time.Second + defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen. ) type s struct { @@ -128,28 +129,49 @@ type testCredentials struct { func (tc *testCredentials) TransportCredentials() credentials.TransportCredentials { return tc.transportCredentials } +func (tc *testCredentials) PerRPCCredentials() credentials.PerRPCCredentials { + return nil +} // TestBuild_Success verifies that the Builder successfully creates a new -// Transport with a non-nil grpc.ClientConn. +// Transport in both cases when provided clients.ServerIdentifer is same +// one of the existing transport or a new one. func (s) TestBuild_Success(t *testing.T) { - serverCfg := clients.ServerIdentifier{ + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + "insecure": insecure.NewBundle(), + } + b := NewBuilder(credentials) + + serverID1 := clients.ServerIdentifier{ ServerURI: "server-address", - Extensions: ServerIdentifierExtension{Credentials: &testCredentials{transportCredentials: local.NewCredentials()}}, + Extensions: ServerIdentifierExtension{Credentials: "local"}, } - - b := &Builder{} - tr, err := b.Build(serverCfg) + tr1, err := b.Build(serverID1) if err != nil { - t.Fatalf("Build() failed: %v", err) + t.Fatalf("Build(serverID1) call failed: %v", err) } - defer tr.Close() + defer tr1.Close() - if tr == nil { - t.Fatalf("Got nil transport from Build(), want non-nil") + serverID2 := clients.ServerIdentifier{ + ServerURI: "server-address", + Extensions: ServerIdentifierExtension{Credentials: "local"}, } - if tr.(*grpcTransport).cc == nil { - t.Fatalf("Got nil grpc.ClientConn in transport, want non-nil") + tr2, err := b.Build(serverID2) + if err != nil { + t.Fatalf("Build(serverID2) call failed: %v", err) } + defer tr2.Close() + + serverID3 := clients.ServerIdentifier{ + ServerURI: "server-address", + Extensions: ServerIdentifierExtension{Credentials: "insecure"}, + } + tr3, err := b.Build(serverID3) + if err != nil { + t.Fatalf("Build(serverID3) call failed: %v", err) + } + defer tr3.Close() } // TestBuild_Failure verifies that the Builder returns error when incorrect @@ -162,39 +184,49 @@ func (s) TestBuild_Success(t *testing.T) { // - Credentials are nil. func (s) TestBuild_Failure(t *testing.T) { tests := []struct { - name string - serverCfg clients.ServerIdentifier + name string + serverID clients.ServerIdentifier }{ { name: "ServerURI is empty", - serverCfg: clients.ServerIdentifier{ + serverID: clients.ServerIdentifier{ ServerURI: "", - Extensions: ServerIdentifierExtension{Credentials: insecure.NewBundle()}, + Extensions: ServerIdentifierExtension{Credentials: "local"}, }, }, { - name: "Extensions is nil", - serverCfg: clients.ServerIdentifier{ServerURI: "server-address"}, + name: "Extensions is nil", + serverID: clients.ServerIdentifier{ServerURI: "server-address"}, }, { name: "Extensions is not a ServerIdentifierExtension", - serverCfg: clients.ServerIdentifier{ + serverID: clients.ServerIdentifier{ ServerURI: "server-address", Extensions: 1, }, }, { name: "ServerIdentifierExtension Credentials is nil", - serverCfg: clients.ServerIdentifier{ + serverID: clients.ServerIdentifier{ ServerURI: "server-address", Extensions: ServerIdentifierExtension{}, }, }, + { + name: "ServerIdentifierExtension is added as pointer", + serverID: clients.ServerIdentifier{ + ServerURI: "server-address", + Extensions: &ServerIdentifierExtension{Credentials: "local"}, + }, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - b := &Builder{} - tr, err := b.Build(test.serverCfg) + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + } + b := NewBuilder(credentials) + tr, err := b.Build(test.serverID) if err == nil { t.Fatalf("Build() succeeded, want error") } @@ -212,9 +244,12 @@ func (s) TestNewStream_Success(t *testing.T) { serverCfg := clients.ServerIdentifier{ ServerURI: ts.address, - Extensions: ServerIdentifierExtension{Credentials: insecure.NewBundle()}, + Extensions: ServerIdentifierExtension{Credentials: "local"}, } - builder := Builder{} + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + } + builder := NewBuilder(credentials) transport, err := builder.Build(serverCfg) if err != nil { t.Fatalf("Failed to build transport: %v", err) @@ -233,9 +268,12 @@ func (s) TestNewStream_Success(t *testing.T) { func (s) TestNewStream_Error(t *testing.T) { serverCfg := clients.ServerIdentifier{ ServerURI: "invalid-server-uri", - Extensions: ServerIdentifierExtension{Credentials: insecure.NewBundle()}, + Extensions: ServerIdentifierExtension{Credentials: "local"}, } - builder := Builder{} + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + } + builder := NewBuilder(credentials) transport, err := builder.Build(serverCfg) if err != nil { t.Fatalf("Failed to build transport: %v", err) @@ -258,7 +296,7 @@ func (s) TestNewStream_Error(t *testing.T) { // testDiscoverResponse from the server and verifies that the received // discovery response is same as sent from the server. func (s) TestStream_SendAndRecv(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout*2000) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() ts := setupTestServer(t, &v3discoverypb.DiscoveryResponse{VersionInfo: "1"}) @@ -266,9 +304,12 @@ func (s) TestStream_SendAndRecv(t *testing.T) { // Build a grpc-based transport to the above server. serverCfg := clients.ServerIdentifier{ ServerURI: ts.address, - Extensions: ServerIdentifierExtension{Credentials: insecure.NewBundle()}, + Extensions: ServerIdentifierExtension{Credentials: "local"}, } - builder := Builder{} + credentials := map[string]credentials.Bundle{ + "local": &testCredentials{transportCredentials: local.NewCredentials()}, + } + builder := NewBuilder(credentials) transport, err := builder.Build(serverCfg) if err != nil { t.Fatalf("Failed to build transport: %v", err) diff --git a/xds/internal/clients/internal/internal.go b/xds/internal/clients/internal/internal.go index d71212984..371b4d19d 100644 --- a/xds/internal/clients/internal/internal.go +++ b/xds/internal/clients/internal/internal.go @@ -20,9 +20,6 @@ package internal import ( - "fmt" - "strings" - "google.golang.org/grpc/xds/internal/clients" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/structpb" @@ -30,16 +27,6 @@ import ( v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" ) -// ServerIdentifierString returns a string representation of the -// clients.ServerIdentifier si. -// -// WARNING: This method is primarily intended for logging and testing -// purposes. The output returned by this method is not guaranteed to be stable -// and may change at any time. Do not rely on it for production use. -func ServerIdentifierString(si clients.ServerIdentifier) string { - return strings.Join([]string{si.ServerURI, fmt.Sprintf("%v", si.Extensions)}, "-") -} - // NodeProto returns a protobuf representation of clients.Node n. // // This function is intended to be used by the client implementation to convert diff --git a/xds/internal/clients/internal/internal_test.go b/xds/internal/clients/internal/internal_test.go index 8815b9251..5b7889b7e 100644 --- a/xds/internal/clients/internal/internal_test.go +++ b/xds/internal/clients/internal/internal_test.go @@ -22,10 +22,8 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/xds/internal/clients" - "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/structpb" diff --git a/xds/internal/clients/internal/testutils/channel.go b/xds/internal/clients/internal/testutils/channel.go index 00e75b520..509adeae0 100644 --- a/xds/internal/clients/internal/testutils/channel.go +++ b/xds/internal/clients/internal/testutils/channel.go @@ -18,6 +18,8 @@ // Package testutils contains testing helpers for xDS and LRS clients. package testutils +import "context" + // Channel wraps a generic channel and provides a timed receive operation. type Channel struct { // C is the underlying channel on which values sent using the SendXxx() @@ -31,6 +33,32 @@ func (c *Channel) Send(value any) { c.C <- value } +// Receive returns the value received on the underlying channel, or the error +// returned by ctx if it is closed or cancelled. +func (c *Channel) Receive(ctx context.Context) (any, error) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case got := <-c.C: + return got, nil + } +} + +// Replace clears the value on the underlying channel, and sends the new value. +// +// It's expected to be used with a size-1 channel, to only keep the most +// up-to-date item. This method is inherently racy when invoked concurrently +// from multiple goroutines. +func (c *Channel) Replace(value any) { + for { + select { + case c.C <- value: + return + case <-c.C: + } + } +} + // NewChannelWithSize returns a new Channel with a buffer of bufSize. func NewChannelWithSize(bufSize int) *Channel { return &Channel{C: make(chan any, bufSize)} diff --git a/xds/internal/clients/internal/testutils/e2e/clientresources.go b/xds/internal/clients/internal/testutils/e2e/clientresources.go index 9f17c7627..6724f1aae 100644 --- a/xds/internal/clients/internal/testutils/e2e/clientresources.go +++ b/xds/internal/clients/internal/testutils/e2e/clientresources.go @@ -21,7 +21,6 @@ package e2e import ( "fmt" - "github.com/envoyproxy/go-control-plane/pkg/wellknown" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -50,18 +49,10 @@ func DefaultClientListener(target, routeName string) *v3listenerpb.Listener { }, RouteConfigName: routeName, }}, - HttpFilters: []*v3httppb.HttpFilter{HTTPFilter("router", &v3routerpb.Router{})}, // router fields are unused by grpc }) return &v3listenerpb.Listener{ Name: target, ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, - FilterChains: []*v3listenerpb.FilterChain{{ - Name: "filter-chain-name", - Filters: []*v3listenerpb.Filter{{ - Name: wellknown.HTTPConnectionManager, - ConfigType: &v3listenerpb.Filter_TypedConfig{TypedConfig: hcm}, - }}, - }}, } } diff --git a/xds/internal/clients/internal/testutils/wrappers.go b/xds/internal/clients/internal/testutils/wrappers.go new file mode 100644 index 000000000..c7d569041 --- /dev/null +++ b/xds/internal/clients/internal/testutils/wrappers.go @@ -0,0 +1,74 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package testutils + +import ( + "net" + "testing" +) + +// ConnWrapper wraps a net.Conn and pushes on a channel when closed. +type ConnWrapper struct { + net.Conn + CloseCh *Channel +} + +// Close closes the connection and sends a value on the close channel. +func (cw *ConnWrapper) Close() error { + err := cw.Conn.Close() + cw.CloseCh.Replace(nil) + return err +} + +// ListenerWrapper wraps a net.Listener and the returned net.Conn. +// +// It pushes on a channel whenever it accepts a new connection. +type ListenerWrapper struct { + net.Listener + NewConnCh *Channel +} + +// Accept wraps the Listener Accept and sends the accepted connection on a +// channel. +func (l *ListenerWrapper) Accept() (net.Conn, error) { + c, err := l.Listener.Accept() + if err != nil { + return nil, err + } + closeCh := NewChannelWithSize(1) + conn := &ConnWrapper{Conn: c, CloseCh: closeCh} + l.NewConnCh.Replace(conn) + return conn, nil +} + +// NewListenerWrapper returns a ListenerWrapper. +func NewListenerWrapper(t *testing.T, lis net.Listener) *ListenerWrapper { + if lis == nil { + var err error + lis, err = net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatal(err) + } + } + + return &ListenerWrapper{ + Listener: lis, + NewConnCh: NewChannelWithSize(1), + } +} diff --git a/xds/internal/clients/transport_builder.go b/xds/internal/clients/transport_builder.go index 10a25fcab..f940675d9 100644 --- a/xds/internal/clients/transport_builder.go +++ b/xds/internal/clients/transport_builder.go @@ -39,7 +39,7 @@ type Transport interface { NewStream(context.Context, string) (Stream, error) // Close closes the Transport. - Close() error + Close() } // Stream provides methods to send and receive messages on a stream. Messages diff --git a/xds/internal/clients/xdsclient/authority.go b/xds/internal/clients/xdsclient/authority.go new file mode 100644 index 000000000..3ada1465c --- /dev/null +++ b/xds/internal/clients/xdsclient/authority.go @@ -0,0 +1,876 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + + "google.golang.org/grpc/grpclog" + igrpclog "google.golang.org/grpc/internal/grpclog" + "google.golang.org/grpc/xds/internal/clients/internal/syncutil" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/timestamppb" + + v3adminpb "github.com/envoyproxy/go-control-plane/envoy/admin/v3" + v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" +) + +type resourceState struct { + watchers map[ResourceWatcher]bool // Set of watchers for this resource. + cache ResourceData // Most recent ACKed update for this resource. + md xdsresource.UpdateMetadata // Metadata for the most recent update. + deletionIgnored bool // True, if resource deletion was ignored for a prior update. + xdsChannelConfigs map[*xdsChannelWithConfig]bool // Set of xdsChannels where this resource is subscribed. +} + +// xdsChannelForADS is used to acquire a reference to an xdsChannel. This +// functionality is provided by the xdsClient. +// +// The arguments to the function are as follows: +// - the server config for the xdsChannel +// - the calling authority on which a set of callbacks are invoked by the +// xdsChannel on ADS stream events +// +// Returns a reference to the xdsChannel and a function to release the same. A +// non-nil error is returned if the channel creation fails and the first two +// return values are meaningless in this case. +type xdsChannelForADS func(*ServerConfig, *authority) (*xdsChannel, func(), error) + +// xdsChannelWithConfig is a struct that holds an xdsChannel and its associated +// ServerConfig, along with a cleanup function to release the xdsChannel. +type xdsChannelWithConfig struct { + channel *xdsChannel + serverConfig *ServerConfig + cleanup func() +} + +// authority provides the functionality required to communicate with a +// management server corresponding to an authority name specified in the +// xDS client configuration. +// +// It holds references to one or more xdsChannels, one for each server +// configuration in the config, to allow fallback from a primary management +// server to a secondary management server. Authorities that contain similar +// server configuration entries will end up sharing the xdsChannel for that +// server configuration. The xdsChannels are owned and managed by the xdsClient. +// +// It also contains a cache of resource state for resources requested from +// management server(s). This cache contains the list of registered watchers and +// the most recent resource configuration received from the management server. +type authority struct { + // The following fields are initialized at creation time and are read-only + // afterwards, and therefore don't need to be protected with a mutex. + name string // Name of the authority from xDS client configuration. + watcherCallbackSerializer *syncutil.CallbackSerializer // Serializer to run watcher callbacks, owned by the xDS client implementation. + getChannelForADS xdsChannelForADS // Function to get an xdsChannel for ADS, provided by the xDS client implementation. + xdsClientSerializer *syncutil.CallbackSerializer // Serializer to run call ins from the xDS client, owned by this authority. + xdsClientSerializerClose func() // Function to close the above serializer. + logger *igrpclog.PrefixLogger // Logger for this authority. + target string // The gRPC Channel target. + + // The below defined fields must only be accessed in the context of the + // serializer callback, owned by this authority. + + // A two level map containing the state of all the resources being watched. + // + // The first level map key is the ResourceType (Listener, Route etc). This + // allows us to have a single map for all resources instead of having per + // resource-type maps. + // + // The second level map key is the resource name, with the value being the + // actual state of the resource. + resources map[ResourceType]map[string]*resourceState + + // An ordered list of xdsChannels corresponding to the list of server + // configurations specified for this authority in the config. The + // ordering specifies the order in which these channels are preferred for + // fallback. + xdsChannelConfigs []*xdsChannelWithConfig + + // The current active xdsChannel. Here, active does not mean that the + // channel has a working connection to the server. It simply points to the + // channel that we are trying to work with, based on fallback logic. + activeXDSChannel *xdsChannelWithConfig +} + +// authorityBuildOptions wraps arguments required to create a new authority. +type authorityBuildOptions struct { + serverConfigs []ServerConfig // Server configs for the authority + name string // Name of the authority + serializer *syncutil.CallbackSerializer // Callback serializer for invoking watch callbacks + getChannelForADS xdsChannelForADS // Function to acquire a reference to an xdsChannel + logPrefix string // Prefix for logging + target string // Target for the gRPC Channel that owns xDS Client/Authority +} + +// newAuthority creates a new authority instance with the provided +// configuration. The authority is responsible for managing the state of +// resources requested from the management server, as well as acquiring and +// releasing references to channels used to communicate with the management +// server. +// +// Note that no channels to management servers are created at this time. Instead +// a channel to the first server configuration is created when the first watch +// is registered, and more channels are created as needed by the fallback logic. +func newAuthority(args authorityBuildOptions) *authority { + ctx, cancel := context.WithCancel(context.Background()) + l := grpclog.Component("xds") + logPrefix := args.logPrefix + fmt.Sprintf("[authority %q] ", args.name) + ret := &authority{ + name: args.name, + watcherCallbackSerializer: args.serializer, + getChannelForADS: args.getChannelForADS, + xdsClientSerializer: syncutil.NewCallbackSerializer(ctx), + xdsClientSerializerClose: cancel, + logger: igrpclog.NewPrefixLogger(l, logPrefix), + resources: make(map[ResourceType]map[string]*resourceState), + target: args.target, + } + + // Create an ordered list of xdsChannels with their server configs. The + // actual channel to the first server configuration is created when the + // first watch is registered, and channels to other server configurations + // are created as needed to support fallback. + for _, sc := range args.serverConfigs { + ret.xdsChannelConfigs = append(ret.xdsChannelConfigs, &xdsChannelWithConfig{serverConfig: &sc}) + } + return ret +} + +// adsStreamFailure is called to notify the authority about an ADS stream +// failure on an xdsChannel to the management server identified by the provided +// server config. The error is forwarded to all the resource watchers. +// +// This method is called by the xDS client implementation (on all interested +// authorities) when a stream error is reported by an xdsChannel. +// +// Errors of type xdsresource.ErrTypeStreamFailedAfterRecv are ignored. +func (a *authority) adsStreamFailure(serverConfig *ServerConfig, err error) { + a.xdsClientSerializer.TrySchedule(func(context.Context) { + a.handleADSStreamFailure(serverConfig, err) + }) +} + +// Handles ADS stream failure by invoking watch callbacks and triggering +// fallback if the associated conditions are met. +// +// Only executed in the context of a serializer callback. +func (a *authority) handleADSStreamFailure(serverConfig *ServerConfig, err error) { + if a.logger.V(2) { + a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err) + } + + // We do not consider it an error if the ADS stream was closed after having + // received a response on the stream. This is because there are legitimate + // reasons why the server may need to close the stream during normal + // operations, such as needing to rebalance load or the underlying + // connection hitting its max connection age limit. See gRFC A57 for more + // details. + if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv { + a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err) + return + } + + // Two conditions need to be met for fallback to be triggered: + // 1. There is a connectivity failure on the ADS stream, as described in + // gRFC A57. For us, this means that the ADS stream was closed before the + // first server response was received. We already checked that condition + // earlier in this method. + // 2. There is at least one watcher for a resource that is not cached. + // Cached resources include ones that + // - have been successfully received and can be used. + // - are considered non-existent according to xDS Protocol Specification. + if !a.watcherExistsForUncachedResource() { + if a.logger.V(2) { + a.logger.Infof("No watchers for uncached resources. Not triggering fallback") + } + // Since we are not triggering fallback, propagate the connectivity + // error to all watchers and return early. + a.propagateConnectivityErrorToAllWatchers(err) + return + } + + // Attempt to fallback to servers with lower priority than the failing one. + currentServerIdx := a.serverIndexForConfig(serverConfig) + for i := currentServerIdx + 1; i < len(a.xdsChannelConfigs); i++ { + if a.fallbackToServer(a.xdsChannelConfigs[i]) { + // Since we have successfully triggered fallback, we don't have to + // notify watchers about the connectivity error. + return + } + } + + // Having exhausted all available servers, we must notify watchers of the + // connectivity error - A71. + a.propagateConnectivityErrorToAllWatchers(err) +} + +// propagateConnectivityErrorToAllWatchers propagates the given connection error +// to all watchers of all resources. +// +// Only executed in the context of a serializer callback. +func (a *authority) propagateConnectivityErrorToAllWatchers(err error) { + for _, rType := range a.resources { + for _, state := range rType { + for watcher := range state.watchers { + if state.cache == nil { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) + }) + } else { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.AmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) + }) + } + } + } + } +} + +// serverIndexForConfig returns the index of the xdsChannelConfig matching the +// provided server config, panicking if no match is found (which indicates a +// programming error). +func (a *authority) serverIndexForConfig(sc *ServerConfig) int { + for i, cfg := range a.xdsChannelConfigs { + if isServerConfigEqual(sc, cfg.serverConfig) { + return i + } + } + panic(fmt.Sprintf("no server config matching %v found", sc)) +} + +// Determines the server to fallback to and triggers fallback to the same. If +// required, creates an xdsChannel to that server, and re-subscribes to all +// existing resources. +// +// Only executed in the context of a serializer callback. +func (a *authority) fallbackToServer(xc *xdsChannelWithConfig) bool { + if a.logger.V(2) { + a.logger.Infof("Attempting to initiate fallback to server %q", xc.serverConfig) + } + + if xc.channel != nil { + if a.logger.V(2) { + a.logger.Infof("Channel to the next server in the list %q already exists", xc.serverConfig) + } + return false + } + + channel, cleanup, err := a.getChannelForADS(xc.serverConfig, a) + if err != nil { + a.logger.Errorf("Failed to create xDS channel: %v", err) + return false + } + xc.channel = channel + xc.cleanup = cleanup + a.activeXDSChannel = xc + + // Subscribe to all existing resources from the new management server. + for typ, resources := range a.resources { + for name, state := range resources { + if a.logger.V(2) { + a.logger.Infof("Resubscribing to resource of type %q and name %q", typ.TypeName, name) + } + xc.channel.subscribe(typ, name) + + // Add the new channel to the list of xdsChannels from which this + // resource has been requested from. Retain the cached resource and + // the set of existing watchers (and other metadata fields) in the + // resource state. + state.xdsChannelConfigs[xc] = true + } + } + return true +} + +// adsResourceUpdate is called to notify the authority about a resource update +// received on the ADS stream. +// +// This method is called by the xDS client implementation (on all interested +// authorities) when a stream error is reported by an xdsChannel. +func (a *authority) adsResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { + a.xdsClientSerializer.TrySchedule(func(context.Context) { + a.handleADSResourceUpdate(serverConfig, rType, updates, md, onDone) + }) +} + +// handleADSResourceUpdate processes an update from the xDS client, updating the +// resource cache and notifying any registered watchers of the update. +// +// If the update is received from a higher priority xdsChannel that was +// previously down, we revert to it and close all lower priority xdsChannels. +// +// Once the update has been processed by all watchers, the authority is expected +// to invoke the onDone callback. +// +// Only executed in the context of a serializer callback. +func (a *authority) handleADSResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { + a.handleRevertingToPrimaryOnUpdate(serverConfig) + + // We build a list of callback funcs to invoke, and invoke them at the end + // of this method instead of inline (when handling the update for a + // particular resource), because we want to make sure that all calls to + // increment watcherCnt happen before any callbacks are invoked. This will + // ensure that the onDone callback is never invoked before all watcher + // callbacks are invoked, and the watchers have processed the update. + watcherCnt := new(atomic.Int64) + done := func() { + if watcherCnt.Add(-1) == 0 { + onDone() + } + } + funcsToSchedule := []func(context.Context){} + defer func() { + if len(funcsToSchedule) == 0 { + // When there are no watchers for the resources received as part of + // this update, invoke onDone explicitly to unblock the next read on + // the ADS stream. + onDone() + return + } + for _, f := range funcsToSchedule { + a.watcherCallbackSerializer.ScheduleOr(f, onDone) + } + }() + + resourceStates := a.resources[rType] + for name, uErr := range updates { + state, ok := resourceStates[name] + if !ok { + continue + } + + // On error, keep previous version of the resource. But update status + // and error. + if uErr.Err != nil { + state.md.ErrState = md.ErrState + state.md.Status = md.Status + for watcher := range state.watchers { + watcher := watcher + err := uErr.Err + watcherCnt.Add(1) + if state.cache == nil { + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceError(err, done) }) + } else { + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) + } + } + continue + } + + if state.deletionIgnored { + state.deletionIgnored = false + a.logger.Infof("A valid update was received for resource %q of type %q after previously ignoring a deletion", name, rType.TypeName) + } + // Notify watchers if any of these conditions are met: + // - this is the first update for this resource + // - this update is different from the one currently cached + // - the previous update for this resource was NACKed, but the update + // before that was the same as this update. + if state.cache == nil || !state.cache.Equal(uErr.Resource) || state.md.ErrState != nil { + // Update the resource cache. + if a.logger.V(2) { + a.logger.Infof("Resource type %q with name %q added to cache", rType.TypeName, name) + } + state.cache = uErr.Resource + + for watcher := range state.watchers { + watcher := watcher + resource := uErr.Resource + watcherCnt.Add(1) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceChanged(resource, done) }) + } + } + + // Set status to ACK, and clear error state. The metadata might be a + // NACK metadata because some other resources in the same response + // are invalid. + state.md = md + state.md.ErrState = nil + state.md.Status = xdsresource.ServiceStatusACKed + if md.ErrState != nil { + state.md.Version = md.ErrState.Version + } + } + + // If this resource type requires that all resources be present in every + // SotW response from the server, a response that does not include a + // previously seen resource will be interpreted as a deletion of that + // resource unless ignore_resource_deletion option was set in the server + // config. + if !rType.AllResourcesRequiredInSotW { + return + } + for name, state := range resourceStates { + if state.cache == nil { + // If the resource state does not contain a cached update, which can + // happen when: + // - resource was newly requested but has not yet been received, or, + // - resource was removed as part of a previous update, + // we don't want to generate an error for the watchers. + // + // For the first of the above two conditions, this ADS response may + // be in reaction to an earlier request that did not yet request the + // new resource, so its absence from the response does not + // necessarily indicate that the resource does not exist. For that + // case, we rely on the request timeout instead. + // + // For the second of the above two conditions, we already generated + // an error when we received the first response which removed this + // resource. So, there is no need to generate another one. + continue + } + if _, ok := updates[name]; ok { + // If the resource was present in the response, move on. + continue + } + if state.md.Status == xdsresource.ServiceStatusNotExist { + // The metadata status is set to "ServiceStatusNotExist" if a + // previous update deleted this resource, in which case we do not + // want to repeatedly call the watch callbacks with a + // "resource-not-found" error. + continue + } + if serverConfig.IgnoreResourceDeletion { + // Per A53, resource deletions are ignored if the + // `ignore_resource_deletion` server feature is enabled through the + // xDS client configuration. If the resource deletion is to be + // ignored, the resource is not removed from the cache and the + // corresponding OnResourceDoesNotExist() callback is not invoked on + // the watchers. + if !state.deletionIgnored { + state.deletionIgnored = true + a.logger.Warningf("Ignoring resource deletion for resource %q of type %q", name, rType.TypeName) + } + continue + } + + // If we get here, it means that the resource exists in cache, but not + // in the new update. Delete the resource from cache, and send a + // resource not found error to indicate that the resource has been + // removed. Metadata for the resource is still maintained, as this is + // required by CSDS. + state.cache = nil + state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} + for watcher := range state.watchers { + watcher := watcher + watcherCnt.Add(1) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName), done) + }) + } + } +} + +// adsResourceDoesNotExist is called by the xDS client implementation (on all +// interested authorities) to notify the authority that a subscribed resource +// does not exist. +func (a *authority) adsResourceDoesNotExist(rType ResourceType, resourceName string) { + a.xdsClientSerializer.TrySchedule(func(context.Context) { + a.handleADSResourceDoesNotExist(rType, resourceName) + }) +} + +// handleADSResourceDoesNotExist is called when a subscribed resource does not +// exist. It removes the resource from the cache, updates the metadata status +// to ServiceStatusNotExist, and notifies all watchers that the resource does +// not exist. +func (a *authority) handleADSResourceDoesNotExist(rType ResourceType, resourceName string) { + if a.logger.V(2) { + a.logger.Infof("Watch for resource %q of type %s timed out", resourceName, rType.TypeName) + } + + resourceStates := a.resources[rType] + if resourceStates == nil { + if a.logger.V(2) { + a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName) + } + return + } + state, ok := resourceStates[resourceName] + if !ok { + if a.logger.V(2) { + a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName) + } + return + } + + state.cache = nil + state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} + for watcher := range state.watchers { + watcher := watcher + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {}) + }) + } +} + +// handleRevertingToPrimaryOnUpdate is called when a resource update is received +// from the xDS client. +// +// If the update is from the currently active server, nothing is done. Else, all +// lower priority servers are closed and the active server is reverted to the +// highest priority server that sent the update. +// +// This method is only executed in the context of a serializer callback. +func (a *authority) handleRevertingToPrimaryOnUpdate(serverConfig *ServerConfig) { + if a.activeXDSChannel != nil && isServerConfigEqual(serverConfig, a.activeXDSChannel.serverConfig) { + // If the resource update is from the current active server, nothing + // needs to be done from fallback point of view. + return + } + + if a.logger.V(2) { + a.logger.Infof("Received update from non-active server %q", serverConfig) + } + + // If the resource update is not from the current active server, it means + // that we have received an update from a higher priority server and we need + // to revert back to it. This method guarantees that when an update is + // received from a server, all lower priority servers are closed. + serverIdx := a.serverIndexForConfig(serverConfig) + a.activeXDSChannel = a.xdsChannelConfigs[serverIdx] + + // Close all lower priority channels. + // + // But before closing any channel, we need to unsubscribe from any resources + // that were subscribed to on this channel. Resources could be subscribed to + // from multiple channels as we fallback to lower priority servers. But when + // a higher priority one comes back up, we need to unsubscribe from all + // lower priority ones before releasing the reference to them. + for i := serverIdx + 1; i < len(a.xdsChannelConfigs); i++ { + cfg := a.xdsChannelConfigs[i] + + for rType, rState := range a.resources { + for resourceName, state := range rState { + for xcc := range state.xdsChannelConfigs { + if xcc != cfg { + continue + } + // If the current resource is subscribed to on this channel, + // unsubscribe, and remove the channel from the list of + // channels that this resource is subscribed to. + xcc.channel.unsubscribe(rType, resourceName) + delete(state.xdsChannelConfigs, xcc) + } + } + } + + // Release the reference to the channel. + if cfg.cleanup != nil { + if a.logger.V(2) { + a.logger.Infof("Closing lower priority server %q", cfg.serverConfig) + } + cfg.cleanup() + cfg.cleanup = nil + } + cfg.channel = nil + } +} + +// watchResource registers a new watcher for the specified resource type and +// name. It returns a function that can be called to cancel the watch. +// +// If this is the first watch for any resource on this authority, an xdsChannel +// to the first management server (from the list of server configurations) will +// be created. +// +// If this is the first watch for the given resource name, it will subscribe to +// the resource with the xdsChannel. If a cached copy of the resource exists, it +// will immediately notify the new watcher. When the last watcher for a resource +// is removed, it will unsubscribe the resource from the xdsChannel. +func (a *authority) watchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() { + cleanup := func() {} + done := make(chan struct{}) + + a.xdsClientSerializer.ScheduleOr(func(context.Context) { + defer close(done) + + if a.logger.V(2) { + a.logger.Infof("New watch for type %q, resource name %q", rType.TypeName, resourceName) + } + + xdsChannel, err := a.xdsChannelToUse() + if err != nil { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) + return + } + + // Lookup the entry for the resource type in the top-level map. If there is + // no entry for this resource type, create one. + resources := a.resources[rType] + if resources == nil { + resources = make(map[string]*resourceState) + a.resources[rType] = resources + } + + // Lookup the resource state for the particular resource name that the watch + // is being registered for. If this is the first watch for this resource + // name, request it from the management server. + state := resources[resourceName] + if state == nil { + if a.logger.V(2) { + a.logger.Infof("First watch for type %q, resource name %q", rType.TypeName, resourceName) + } + state = &resourceState{ + watchers: make(map[ResourceWatcher]bool), + md: xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusRequested}, + xdsChannelConfigs: map[*xdsChannelWithConfig]bool{xdsChannel: true}, + } + resources[resourceName] = state + xdsChannel.channel.subscribe(rType, resourceName) + } + // Always add the new watcher to the set of watchers. + state.watchers[watcher] = true + + // If we have a cached copy of the resource, notify the new watcher + // immediately. + if state.cache != nil { + if a.logger.V(2) { + a.logger.Infof("Resource type %q with resource name %q found in cache: %v", rType.TypeName, resourceName, state.cache) + } + // state can only be accessed in the context of an + // xdsClientSerializer callback. Hence making a copy of the cached + // resource here for watchCallbackSerializer. + resource := state.cache + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceChanged(resource, func() {}) }) + } + // If last update was NACK'd, notify the new watcher of error + // immediately as well. + if state.md.Status == xdsresource.ServiceStatusNACKed { + if a.logger.V(2) { + a.logger.Infof("Resource type %q with resource name %q was NACKed", rType.TypeName, resourceName) + } + // state can only be accessed in the context of an + // xdsClientSerializer callback. Hence making a copy of the error + // here for watchCallbackSerializer. + err := state.md.ErrState.Err + if state.cache == nil { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) + } else { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) + } + } + // If the metadata field is updated to indicate that the management + // server does not have this resource, notify the new watcher. + if state.md.Status == xdsresource.ServiceStatusNotExist { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {}) + }) + } + cleanup = a.unwatchResource(rType, resourceName, watcher) + }, func() { + if a.logger.V(2) { + a.logger.Infof("Failed to schedule a watch for type %q, resource name %q, because the xDS client is closed", rType.TypeName, resourceName) + } + close(done) + }) + <-done + return cleanup +} + +func (a *authority) unwatchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() { + return sync.OnceFunc(func() { + done := make(chan struct{}) + a.xdsClientSerializer.ScheduleOr(func(context.Context) { + defer close(done) + + if a.logger.V(2) { + a.logger.Infof("Canceling a watch for type %q, resource name %q", rType.TypeName, resourceName) + } + + // Lookup the resource type from the resource cache. The entry is + // guaranteed to be present, since *we* were the ones who added it in + // there when the watch was registered. + resources := a.resources[rType] + state := resources[resourceName] + + // Delete this particular watcher from the list of watchers, so that its + // callback will not be invoked in the future. + delete(state.watchers, watcher) + if len(state.watchers) > 0 { + if a.logger.V(2) { + a.logger.Infof("Other watchers exist for type %q, resource name %q", rType.TypeName, resourceName) + } + return + } + + // There are no more watchers for this resource. Unsubscribe this + // resource from all channels where it was subscribed to and delete + // the state associated with it. + if a.logger.V(2) { + a.logger.Infof("Removing last watch for resource name %q", resourceName) + } + for xcc := range state.xdsChannelConfigs { + xcc.channel.unsubscribe(rType, resourceName) + } + delete(resources, resourceName) + + // If there are no more watchers for this resource type, delete the + // resource type from the top-level map. + if len(resources) == 0 { + if a.logger.V(2) { + a.logger.Infof("Removing last watch for resource type %q", rType.TypeName) + } + delete(a.resources, rType) + } + // If there are no more watchers for any resource type, release the + // reference to the xdsChannels. + if len(a.resources) == 0 { + if a.logger.V(2) { + a.logger.Infof("Removing last watch for for any resource type, releasing reference to the xdsChannel") + } + a.closeXDSChannels() + } + }, func() { close(done) }) + <-done + }) +} + +// xdsChannelToUse returns the xdsChannel to use for communicating with the +// management server. If an active channel is available, it returns that. +// Otherwise, it creates a new channel using the first server configuration in +// the list of configurations, and returns that. +// +// A non-nil error is returned if the channel creation fails. +// +// Only executed in the context of a serializer callback. +func (a *authority) xdsChannelToUse() (*xdsChannelWithConfig, error) { + if a.activeXDSChannel != nil { + return a.activeXDSChannel, nil + } + + sc := a.xdsChannelConfigs[0].serverConfig + xc, cleanup, err := a.getChannelForADS(sc, a) + if err != nil { + return nil, err + } + a.xdsChannelConfigs[0].channel = xc + a.xdsChannelConfigs[0].cleanup = cleanup + a.activeXDSChannel = a.xdsChannelConfigs[0] + return a.activeXDSChannel, nil +} + +// closeXDSChannels closes all the xDS channels associated with this authority, +// when there are no more watchers for any resource type. +// +// Only executed in the context of a serializer callback. +func (a *authority) closeXDSChannels() { + for _, xcc := range a.xdsChannelConfigs { + if xcc.cleanup != nil { + xcc.cleanup() + xcc.cleanup = nil + } + xcc.channel = nil + } + a.activeXDSChannel = nil +} + +// watcherExistsForUncachedResource returns true if there is at least one +// watcher for a resource that has not yet been cached. +// +// Only executed in the context of a serializer callback. +func (a *authority) watcherExistsForUncachedResource() bool { + for _, resourceStates := range a.resources { + for _, state := range resourceStates { + if state.md.Status == xdsresource.ServiceStatusRequested { + return true + } + } + } + return false +} + +// dumpResources returns a dump of the resource configuration cached by this +// authority, for CSDS purposes. +func (a *authority) dumpResources() []*v3statuspb.ClientConfig_GenericXdsConfig { + var ret []*v3statuspb.ClientConfig_GenericXdsConfig + done := make(chan struct{}) + + a.xdsClientSerializer.ScheduleOr(func(context.Context) { + defer close(done) + ret = a.resourceConfig() + }, func() { close(done) }) + <-done + return ret +} + +// resourceConfig returns a slice of GenericXdsConfig objects representing the +// current state of all resources managed by this authority. This is used for +// reporting the current state of the xDS client. +// +// Only executed in the context of a serializer callback. +func (a *authority) resourceConfig() []*v3statuspb.ClientConfig_GenericXdsConfig { + var ret []*v3statuspb.ClientConfig_GenericXdsConfig + for rType, resourceStates := range a.resources { + typeURL := rType.TypeURL + for name, state := range resourceStates { + var raw *anypb.Any + if state.cache != nil { + raw = &anypb.Any{TypeUrl: typeURL, Value: state.cache.Bytes()} + } + config := &v3statuspb.ClientConfig_GenericXdsConfig{ + TypeUrl: typeURL, + Name: name, + VersionInfo: state.md.Version, + XdsConfig: raw, + LastUpdated: timestamppb.New(state.md.Timestamp), + ClientStatus: serviceStatusToProto(state.md.Status), + } + if errState := state.md.ErrState; errState != nil { + config.ErrorState = &v3adminpb.UpdateFailureState{ + LastUpdateAttempt: timestamppb.New(errState.Timestamp), + Details: errState.Err.Error(), + VersionInfo: errState.Version, + } + } + ret = append(ret, config) + } + } + return ret +} + +func (a *authority) close() { + a.xdsClientSerializerClose() + <-a.xdsClientSerializer.Done() + if a.logger.V(2) { + a.logger.Infof("Closed") + } +} + +func serviceStatusToProto(serviceStatus xdsresource.ServiceStatus) v3adminpb.ClientResourceStatus { + switch serviceStatus { + case xdsresource.ServiceStatusUnknown: + return v3adminpb.ClientResourceStatus_UNKNOWN + case xdsresource.ServiceStatusRequested: + return v3adminpb.ClientResourceStatus_REQUESTED + case xdsresource.ServiceStatusNotExist: + return v3adminpb.ClientResourceStatus_DOES_NOT_EXIST + case xdsresource.ServiceStatusACKed: + return v3adminpb.ClientResourceStatus_ACKED + case xdsresource.ServiceStatusNACKed: + return v3adminpb.ClientResourceStatus_NACKED + default: + return v3adminpb.ClientResourceStatus_UNKNOWN + } +} diff --git a/xds/internal/clients/xdsclient/channel_test.go b/xds/internal/clients/xdsclient/channel_test.go index 79b5d826a..d7417f315 100644 --- a/xds/internal/clients/xdsclient/channel_test.go +++ b/xds/internal/clients/xdsclient/channel_test.go @@ -26,10 +26,10 @@ import ( "testing" "time" - "github.com/envoyproxy/go-control-plane/pkg/wellknown" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/google/uuid" + "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/clients/grpctransport" @@ -43,7 +43,6 @@ import ( v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" - v3routerpb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/http/router/v3" v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" ) @@ -56,9 +55,10 @@ func xdsChannelForTest(t *testing.T, serverURI, nodeID string, watchExpiryTimeou // Create a grpc transport to the above management server. si := clients.ServerIdentifier{ ServerURI: serverURI, - Extensions: grpctransport.ServerIdentifierExtension{Credentials: insecure.NewBundle()}, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, } - tr, err := (&grpctransport.Builder{}).Build(si) + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + tr, err := (grpctransport.NewBuilder(credentials)).Build(si) if err != nil { t.Fatalf("Failed to create a transport for server config %v: %v", si, err) } @@ -547,18 +547,10 @@ func (s) TestChannel_ADS_StreamFailure(t *testing.T) { }, RouteConfigName: routeConfigurationName, }}, - HttpFilters: []*v3httppb.HttpFilter{e2e.HTTPFilter("router", &v3routerpb.Router{})}, }) listenerResource, err := anypb.New(&v3listenerpb.Listener{ Name: listenerResourceName, ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, - FilterChains: []*v3listenerpb.FilterChain{{ - Name: "filter-chain-name", - Filters: []*v3listenerpb.Filter{{ - Name: wellknown.HTTPConnectionManager, - ConfigType: &v3listenerpb.Filter_TypedConfig{TypedConfig: hcm}, - }}, - }}, }) if err != nil { t.Fatalf("Failed to create listener resource: %v", err) diff --git a/xds/internal/clients/xdsclient/clientimpl_watchers.go b/xds/internal/clients/xdsclient/clientimpl_watchers.go new file mode 100644 index 000000000..913e313e1 --- /dev/null +++ b/xds/internal/clients/xdsclient/clientimpl_watchers.go @@ -0,0 +1,102 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient + +import ( + "fmt" + + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" +) + +// wrappingWatcher is a wrapper around an xdsresource.ResourceWatcher that adds +// the node ID to the error messages reported to the watcher. +type wrappingWatcher struct { + ResourceWatcher + nodeID string +} + +func (w *wrappingWatcher) AmbientError(err error, done func()) { + w.ResourceWatcher.AmbientError(fmt.Errorf("[xDS node id: %v]: %w", w.nodeID, err), done) +} + +func (w *wrappingWatcher) ResourceError(err error, done func()) { + w.ResourceWatcher.ResourceError(fmt.Errorf("[xDS node id: %v]: %w", w.nodeID, err), done) +} + +// WatchResource starts watching the specified resource. +// +// typeURL specifies the resource type implementation to use. The watch fails +// if there is no resource type implementation for the given typeURL. See the +// ResourceTypes field in the Config struct used to create the XDSClient. +// +// The returned function cancels the watch and prevents future calls to the +// watcher. +func (c *XDSClient) WatchResource(typeURL, resourceName string, watcher ResourceWatcher) (cancel func()) { + // Return early if the client is already closed. + if c.done.HasFired() { + logger.Warningf("Watch registered for type %q, but client is closed", typeURL) + return func() {} + } + + watcher = &wrappingWatcher{ + ResourceWatcher: watcher, + nodeID: c.config.Node.ID, + } + + rType, ok := c.config.ResourceTypes[typeURL] + if !ok { + logger.Warningf("ResourceType implementation for resource type url %v is not found", rType.TypeURL) + watcher.ResourceError(fmt.Errorf("ResourceType implementation for resource type url %v is not found", rType.TypeURL), func() {}) + return func() {} + } + + n := xdsresource.ParseName(resourceName) + a := c.getAuthorityForResource(n) + if a == nil { + logger.Warningf("Watch registered for name %q of type %q, authority %q is not found", rType.TypeName, resourceName, n.Authority) + watcher.ResourceError(fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) + return func() {} + } + // The watchResource method on the authority is invoked with n.String() + // instead of resourceName because n.String() canonicalizes the given name. + // So, two resource names which don't differ in the query string, but only + // differ in the order of context params will result in the same resource + // being watched by the authority. + return a.watchResource(rType, n.String(), watcher) +} + +// Gets the authority for the given resource name. +// +// See examples in this section of the gRFC: +// https://github.com/grpc/proposal/blob/master/A47-xds-federation.md#bootstrap-config-changes +func (c *XDSClient) getAuthorityForResource(name *xdsresource.Name) *authority { + // For new-style resource names, always lookup the authorities map. If the + // name does not specify an authority, we will end up looking for an entry + // in the map with the empty string as the key. + if name.Scheme == xdsresource.FederationScheme { + return c.authorities[name.Authority] + } + + // For old-style resource names, we use the top-level authority if the name + // does not specify an authority. + if name.Authority == "" { + return c.topLevelAuthority + } + return c.authorities[name.Authority] +} diff --git a/xds/internal/clients/xdsclient/helpers_test.go b/xds/internal/clients/xdsclient/helpers_test.go index b37c635a2..03b40fd4e 100644 --- a/xds/internal/clients/xdsclient/helpers_test.go +++ b/xds/internal/clients/xdsclient/helpers_test.go @@ -96,15 +96,6 @@ func processClientSideListener(lis *v3listenerpb.Listener) (*listenerUpdate, err if err := proto.Unmarshal(apiLisAny.GetValue(), apiLis); err != nil { return nil, fmt.Errorf("failed to unmarshal api_listener: %v", err) } - // "HttpConnectionManager.xff_num_trusted_hops must be unset or zero and - // HttpConnectionManager.original_ip_detection_extensions must be empty. If - // either field has an incorrect value, the Listener must be NACKed." - A41 - if apiLis.XffNumTrustedHops != 0 { - return nil, fmt.Errorf("xff_num_trusted_hops must be unset or zero %+v", apiLis) - } - if len(apiLis.OriginalIpDetectionExtensions) != 0 { - return nil, fmt.Errorf("original_ip_detection_extensions must be empty %+v", apiLis) - } switch apiLis.RouteSpecifier.(type) { case *v3httppb.HttpConnectionManager_Rds: diff --git a/xds/internal/clients/xdsclient/logging.go b/xds/internal/clients/xdsclient/logging.go new file mode 100644 index 000000000..c7ba21c49 --- /dev/null +++ b/xds/internal/clients/xdsclient/logging.go @@ -0,0 +1,36 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient + +import ( + "fmt" + + "google.golang.org/grpc/grpclog" + internalgrpclog "google.golang.org/grpc/internal/grpclog" +) + +var logger = grpclog.Component("xds") + +func prefixLogger(p *XDSClient) *internalgrpclog.PrefixLogger { + return internalgrpclog.NewPrefixLogger(logger, clientPrefix(p)) +} + +func clientPrefix(p *XDSClient) string { + return fmt.Sprintf("[xds-client %p] ", p) +} diff --git a/xds/internal/clients/xdsclient/test/authority_test.go b/xds/internal/clients/xdsclient/test/authority_test.go new file mode 100644 index 000000000..27fd32efa --- /dev/null +++ b/xds/internal/clients/xdsclient/test/authority_test.go @@ -0,0 +1,335 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient_test + +import ( + "context" + "net" + "testing" + + "github.com/google/uuid" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/internal/testutils" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" +) + +const ( + testAuthority1 = "test-authority1" + testAuthority2 = "test-authority2" + testAuthority3 = "test-authority3" +) + +var ( + // These two resources use `testAuthority1`, which contains an empty server + // config and therefore will use the default management server. + authorityTestResourceName11 = buildResourceName(listenerResourceTypeName, testAuthority1, ldsName, nil) + authorityTestResourceName12 = buildResourceName(listenerResourceTypeName, testAuthority1, ldsName+"2", nil) + // This resource uses `testAuthority2`, which contains an empty server + // config and therefore will use the default management server. + authorityTestResourceName2 = buildResourceName(listenerResourceTypeName, testAuthority2, ldsName+"3", nil) + // This resource uses `testAuthority3`, which contains a non-empty server + // config, and therefore will use the non-default management server. + authorityTestResourceName3 = buildResourceName(listenerResourceTypeName, testAuthority3, ldsName+"3", nil) +) + +// setupForAuthorityTests spins up two management servers, one to act as the +// default and the other to act as the non-default. It also creates a +// xDS client configuration with three authorities (the first two pointing to +// the default and the third one pointing to the non-default). +// +// Returns two listeners used by the default and non-default management servers +// respectively, and the xDS client. +func setupForAuthorityTests(ctx context.Context, t *testing.T) (*testutils.ListenerWrapper, *testutils.ListenerWrapper, *xdsclient.XDSClient) { + // Create listener wrappers which notify on to a channel whenever a new + // connection is accepted. We use this to track the number of transports + // used by the xDS client. + lisDefault := testutils.NewListenerWrapper(t, nil) + lisNonDefault := testutils.NewListenerWrapper(t, nil) + + // Start a management server to act as the default authority. + defaultAuthorityServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lisDefault}) + + // Start a management server to act as the non-default authority. + nonDefaultAuthorityServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lisNonDefault}) + + // Create a bootstrap configuration with two non-default authorities which + // have empty server configs, and therefore end up using the default server + // config, which points to the above management server. + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + ext := grpctransport.ServerIdentifierExtension{Credentials: "insecure"} + siDefault := clients.ServerIdentifier{ + ServerURI: defaultAuthorityServer.Address, + Extensions: ext, + } + siNonDefault := clients.ServerIdentifier{ + ServerURI: nonDefaultAuthorityServer.Address, + Extensions: ext, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: siDefault}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + testAuthority1: {XDSServers: []xdsclient.ServerConfig{}}, + testAuthority2: {XDSServers: []xdsclient.ServerConfig{}}, + testAuthority3: {XDSServers: []xdsclient.ServerConfig{{ServerIdentifier: siNonDefault}}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + client.SetWatchExpiryTimeoutForTesting(defaultTestWatchExpiryTimeout) + + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(authorityTestResourceName11, rdsName), + e2e.DefaultClientListener(authorityTestResourceName12, rdsName), + e2e.DefaultClientListener(authorityTestResourceName2, rdsName), + e2e.DefaultClientListener(authorityTestResourceName3, rdsName), + }, + SkipValidation: true, + } + if err := defaultAuthorityServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + return lisDefault, lisNonDefault, client +} + +// Tests the xdsChannel sharing logic among authorities. The test verifies the +// following scenarios: +// - A watch for a resource name with an authority matching an existing watch +// should not result in a new transport being created. +// - A watch for a resource name with different authority name but same +// authority config as an existing watch should not result in a new transport +// being created. +func (s) TestAuthority_XDSChannelSharing(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + lis, _, client := setupForAuthorityTests(ctx, t) + defer client.Close() + + // Verify that no connection is established to the management server at this + // point. A transport is created only when a resource (which belongs to that + // authority) is requested. + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected new transport created to management server") + } + + // Request the first resource. Verify that a new transport is created. + watcher := noopListenerWatcher{} + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, authorityTestResourceName11, watcher) + defer ldsCancel1() + if _, err := lis.NewConnCh.Receive(ctx); err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } + + // Request the second resource. Verify that no new transport is created. + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, authorityTestResourceName12, watcher) + defer ldsCancel2() + sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected new transport created to management server") + } + + // Request the third resource. Verify that no new transport is created. + ldsCancel3 := client.WatchResource(xdsresource.V3ListenerURL, authorityTestResourceName2, watcher) + defer ldsCancel3() + sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected new transport created to management server") + } +} + +// Test the xdsChannel close logic. The test verifies that the xDS client +// closes an xdsChannel immediately after the last watch is canceled. +func (s) TestAuthority_XDSChannelClose(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + lis, _, client := setupForAuthorityTests(ctx, t) + defer client.Close() + + // Request the first resource. Verify that a new transport is created. + watcher := noopListenerWatcher{} + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, authorityTestResourceName11, watcher) + val, err := lis.NewConnCh.Receive(ctx) + if err != nil { + t.Fatalf("Timed out when waiting for a new transport to be created to the management server: %v", err) + } + conn := val.(*testutils.ConnWrapper) + + // Request the second resource. Verify that no new transport is created. + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, authorityTestResourceName12, watcher) + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := lis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatal("Unexpected new transport created to management server") + } + + // Cancel both watches, and verify that the connection to the management + // server is closed. + ldsCancel1() + ldsCancel2() + if _, err := conn.CloseCh.Receive(ctx); err != nil { + t.Fatal("Timeout when waiting for connection to management server to be closed") + } +} + +// Tests the scenario where the primary management server is unavailable at +// startup and the xDS client falls back to the secondary. The test verifies +// that the resource watcher is not notified of the connectivity failure until +// all servers have failed. +func (s) TestAuthority_Fallback(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + // Create primary and secondary management servers with restartable + // listeners. + l, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("net.Listen() failed: %v", err) + } + primaryLis := testutils.NewRestartableListener(l) + primaryMgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: primaryLis}) + l, err = net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("net.Listen() failed: %v", err) + } + secondaryLis := testutils.NewRestartableListener(l) + secondaryMgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: secondaryLis}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + psi := clients.ServerIdentifier{ + ServerURI: primaryMgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + ssi := clients.ServerIdentifier{ + ServerURI: secondaryMgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + // Create config with the above primary and fallback management servers, + // and an xDS client with that configuration. + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: psi}, {ServerIdentifier: ssi}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + const listenerName = "listener" + const rdsPrimaryName = "rds-primary" + const rdsSecondaryName = "rds-secondary" + + // Create a Cluster resource on the primary. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, rdsPrimaryName)}, + SkipValidation: true, + } + if err := primaryMgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update primary management server with resources: %v, err: %v", resources, err) + } + + // Create a Cluster resource on the secondary . + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, rdsSecondaryName)}, + SkipValidation: true, + } + if err := secondaryMgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update primary management server with resources: %v, err: %v", resources, err) + } + + // Stop the primary. + primaryLis.Close() + + // Register a watch. + watcher := newListenerWatcher() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, listenerName, watcher) + defer ldsCancel() + + // Ensure that the connectivity error callback is not called. Since, this + // is the first watch without cached resource, it checks for resourceErrCh + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if v, err := watcher.resourceErrCh.Receive(sCtx); err != context.DeadlineExceeded { + t.Fatalf("Resource error callback on the watcher with error: %v", v.(error)) + } + + // Ensure that the resource update callback is invoked. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsSecondaryName, + }, + } + if err := verifyListenerUpdate(ctx, watcher.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + + // Stop the secondary. + secondaryLis.Close() + + // Ensure that the connectivity error callback is called as ambient error + // since cached resource exist. + if _, err := watcher.ambientErrCh.Receive(ctx); err != nil { + t.Fatal("Timeout when waiting for ambient error callback on the watcher") + } +} diff --git a/xds/internal/clients/xdsclient/test/dump_test.go b/xds/internal/clients/xdsclient/test/dump_test.go new file mode 100644 index 000000000..9bf34e953 --- /dev/null +++ b/xds/internal/clients/xdsclient/test/dump_test.go @@ -0,0 +1,478 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient_test + +import ( + "context" + "fmt" + "slices" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/uuid" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/internal/pretty" + "google.golang.org/grpc/xds/internal/clients/internal/testutils" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/anypb" + + v3adminpb "github.com/envoyproxy/go-control-plane/envoy/admin/v3" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" + v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" +) + +func makeGenericXdsConfig(typeURL, name, version string, status v3adminpb.ClientResourceStatus, config *anypb.Any, failure *v3adminpb.UpdateFailureState) *v3statuspb.ClientConfig_GenericXdsConfig { + return &v3statuspb.ClientConfig_GenericXdsConfig{ + TypeUrl: typeURL, + Name: name, + VersionInfo: version, + ClientStatus: status, + XdsConfig: config, + ErrorState: failure, + } +} + +func checkResourceDump(ctx context.Context, want *v3statuspb.ClientStatusResponse, client *xdsclient.XDSClient) error { + var cmpOpts = cmp.Options{ + protocmp.Transform(), + protocmp.IgnoreFields((*v3statuspb.ClientConfig_GenericXdsConfig)(nil), "last_updated"), + protocmp.IgnoreFields((*v3statuspb.ClientConfig)(nil), "client_scope"), + protocmp.IgnoreFields((*v3adminpb.UpdateFailureState)(nil), "last_update_attempt", "details"), + } + + var lastErr error + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + b, err := client.DumpResources() + if err != nil { + lastErr = err + continue + } + got := &v3statuspb.ClientStatusResponse{} + if err := proto.Unmarshal(b, got); err != nil { + lastErr = err + continue + } + // Sort the client configs based on the `client_scope` field. + slices.SortFunc(got.GetConfig(), func(a, b *v3statuspb.ClientConfig) int { + return strings.Compare(a.ClientScope, b.ClientScope) + }) + // Sort the resource configs based on the type_url and name fields. + for _, cc := range got.GetConfig() { + slices.SortFunc(cc.GetGenericXdsConfigs(), func(a, b *v3statuspb.ClientConfig_GenericXdsConfig) int { + if strings.Compare(a.TypeUrl, b.TypeUrl) == 0 { + return strings.Compare(a.Name, b.Name) + } + return strings.Compare(a.TypeUrl, b.TypeUrl) + }) + } + diff := cmp.Diff(want, got, cmpOpts) + if diff == "" { + return nil + } + lastErr = fmt.Errorf("received unexpected resource dump, diff (-got, +want):\n%s, got: %s\n want:%s", diff, pretty.ToJSON(got), pretty.ToJSON(want)) + } + return fmt.Errorf("timeout when waiting for resource dump to reach expected state: %v", lastErr) +} + +// Tests the scenario where there are multiple xDS clients talking to the same +// management server, and requesting the same set of resources. Verifies that +// under all circumstances, both xDS clients receive the same configuration from +// the server. +func (s) TestDumpResources_ManyToOne(t *testing.T) { + // Initialize the xDS resources to be used in this test. + ldsTargets := []string{"lds.target.good:0000", "lds.target.good:1111"} + rdsTargets := []string{"route-config-0", "route-config-1"} + listeners := make([]*v3listenerpb.Listener, len(ldsTargets)) + listenerAnys := make([]*anypb.Any, len(ldsTargets)) + for i := range ldsTargets { + listeners[i] = e2e.DefaultClientListener(ldsTargets[i], rdsTargets[i]) + listenerAnys[i] = testutils.MarshalAny(t, listeners[i]) + } + + // Spin up an xDS management server on a local port. + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create two xDS clients with the above config. + client1, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client1.Close() + client2, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client2.Close() + + // Dump resources and expect empty configs. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + wantNode := &v3corepb.Node{ + Id: nodeID, + UserAgentName: "user-agent", + UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, + ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, + } + wantResp := &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Register watches, dump resources and expect configs in requested state. + for _, xdsC := range []*xdsclient.XDSClient{client1, client2} { + for _, target := range ldsTargets { + xdsC.WatchResource(xdsresource.V3ListenerURL, target, noopListenerWatcher{}) + } + } + wantConfigs := []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[0], "", v3adminpb.ClientResourceStatus_REQUESTED, nil, nil), + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[1], "", v3adminpb.ClientResourceStatus_REQUESTED, nil, nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Configure the resources on the management server. + if err := mgmtServer.Update(ctx, e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: listeners, + SkipValidation: true, + }); err != nil { + t.Fatal(err) + } + + // Dump resources and expect ACK configs. + wantConfigs = []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[0], "1", v3adminpb.ClientResourceStatus_ACKED, listenerAnys[0], nil), + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[1], "1", v3adminpb.ClientResourceStatus_ACKED, listenerAnys[1], nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Update the first resource of each type in the management server to a + // value which is expected to be NACK'ed by the xDS client. + listeners[0] = func() *v3listenerpb.Listener { + hcm := testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{}) + return &v3listenerpb.Listener{ + Name: ldsTargets[0], + ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, + } + }() + if err := mgmtServer.Update(ctx, e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: listeners, + SkipValidation: true, + }); err != nil { + t.Fatal(err) + } + + wantConfigs = []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[0], "1", v3adminpb.ClientResourceStatus_NACKED, listenerAnys[0], &v3adminpb.UpdateFailureState{VersionInfo: "2"}), + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[1], "2", v3adminpb.ClientResourceStatus_ACKED, listenerAnys[1], nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } +} + +// Tests the scenario where there are multiple xDS client talking to different +// management server, and requesting different set of resources. +func (s) TestDumpResources_ManyToMany(t *testing.T) { + // Initialize the xDS resources to be used in this test: + // - The first xDS client watches old style resource names, and thereby + // requests these resources from the top-level xDS server. + // - The second xDS client watches new style resource names with a non-empty + // authority, and thereby requests these resources from the server + // configuration for that authority. + authority := strings.Join(strings.Split(t.Name(), "/"), "") + ldsTargets := []string{ + "lds.target.good:0000", + fmt.Sprintf("xdstp://%s/envoy.config.listener.v3.Listener/lds.targer.good:1111", authority), + } + rdsTargets := []string{ + "route-config-0", + fmt.Sprintf("xdstp://%s/envoy.config.route.v3.RouteConfiguration/route-config-1", authority), + } + listeners := make([]*v3listenerpb.Listener, len(ldsTargets)) + listenerAnys := make([]*anypb.Any, len(ldsTargets)) + for i := range ldsTargets { + listeners[i] = e2e.DefaultClientListener(ldsTargets[i], rdsTargets[i]) + listenerAnys[i] = testutils.MarshalAny(t, listeners[i]) + } + + // Start two management servers. + mgmtServer1 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + mgmtServer2 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + // The first of the above management servers will be the top-level xDS + // server in the bootstrap configuration, and the second will be the xDS + // server corresponding to the test authority. + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{} + listenerType := listenerType + resourceTypes[xdsresource.V3ListenerURL] = listenerType + si1 := clients.ServerIdentifier{ + ServerURI: mgmtServer1.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + si2 := clients.ServerIdentifier{ + ServerURI: mgmtServer2.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si1}}, + Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{{ServerIdentifier: si2}}}, + }, + } + + // Create two xDS clients with the above config. + client1, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client1.Close() + client2, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client2.Close() + + // Check the resource dump before configuring resources on the management server. + // Dump resources and expect empty configs. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + wantNode := &v3corepb.Node{ + Id: nodeID, + UserAgentName: "user-agent", + UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, + ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, + } + wantResp := &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Register watches, the first xDS client watches old style resource names, + // while the second xDS client watches new style resource names. + client1.WatchResource(xdsresource.V3ListenerURL, ldsTargets[0], noopListenerWatcher{}) + client2.WatchResource(xdsresource.V3ListenerURL, ldsTargets[1], noopListenerWatcher{}) + + // Check the resource dump. Both clients should have all resources in + // REQUESTED state. + wantConfigs1 := []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[0], "", v3adminpb.ClientResourceStatus_REQUESTED, nil, nil), + } + wantConfigs2 := []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[1], "", v3adminpb.ClientResourceStatus_REQUESTED, nil, nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs1, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs2, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Configure resources on the first management server. + if err := mgmtServer1.Update(ctx, e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: listeners[:1], + SkipValidation: true, + }); err != nil { + t.Fatal(err) + } + + // Check the resource dump. One client should have resources in ACKED state, + // while the other should still have resources in REQUESTED state. + wantConfigs1 = []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[0], "1", v3adminpb.ClientResourceStatus_ACKED, listenerAnys[0], nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs1, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs2, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } + + // Configure resources on the second management server. + if err := mgmtServer2.Update(ctx, e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: listeners[1:], + SkipValidation: true, + }); err != nil { + t.Fatal(err) + } + + // Check the resource dump. Both clients should have appropriate resources + // in REQUESTED state. + wantConfigs2 = []*v3statuspb.ClientConfig_GenericXdsConfig{ + makeGenericXdsConfig("type.googleapis.com/envoy.config.listener.v3.Listener", ldsTargets[1], "1", v3adminpb.ClientResourceStatus_ACKED, listenerAnys[1], nil), + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs1, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client1); err != nil { + t.Fatal(err) + } + wantResp = &v3statuspb.ClientStatusResponse{ + Config: []*v3statuspb.ClientConfig{ + { + Node: wantNode, + GenericXdsConfigs: wantConfigs2, + }, + }, + } + if err := checkResourceDump(ctx, wantResp, client2); err != nil { + t.Fatal(err) + } +} diff --git a/xds/internal/clients/xdsclient/test/helpers_test.go b/xds/internal/clients/xdsclient/test/helpers_test.go new file mode 100644 index 000000000..e911ec595 --- /dev/null +++ b/xds/internal/clients/xdsclient/test/helpers_test.go @@ -0,0 +1,264 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient_test + +import ( + "bytes" + "errors" + "fmt" + "strconv" + "strings" + "testing" + "time" + + "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/xds/internal/clients/internal/pretty" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + "google.golang.org/protobuf/proto" + + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +const ( + defaultTestWatchExpiryTimeout = 500 * time.Millisecond + defaultTestTimeout = 10 * time.Second + defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen. + + // ListenerResourceTypeName represents the transport agnostic name for the + // listener resource. + listenerResourceTypeName = "ListenerResource" + + ldsName = "xdsclient-test-lds-resource" + rdsName = "xdsclient-test-rds-resource" + ldsNameNewStyle = "xdstp:///envoy.config.listener.v3.Listener/xdsclient-test-lds-resource" + rdsNameNewStyle = "xdstp:///envoy.config.route.v3.RouteConfiguration/xdsclient-test-rds-resource" +) + +var ( + // Singleton instantiation of the resource type implementation. + listenerType = xdsclient.ResourceType{ + TypeURL: xdsresource.V3ListenerURL, + TypeName: listenerResourceTypeName, + AllResourcesRequiredInSotW: true, + Decoder: listenerDecoder{}, + } +) + +func unmarshalListenerResource(r []byte) (string, listenerUpdate, error) { + lis := &v3listenerpb.Listener{} + if err := proto.Unmarshal(r, lis); err != nil { + return "", listenerUpdate{}, fmt.Errorf("failed to unmarshal resource: %v", err) + } + + lu, err := processListener(lis) + if err != nil { + return lis.GetName(), listenerUpdate{}, err + } + lu.Raw = r + return lis.GetName(), *lu, nil +} + +func processListener(lis *v3listenerpb.Listener) (*listenerUpdate, error) { + if lis.GetApiListener() != nil { + return processClientSideListener(lis) + } + return processServerSideListener(lis) +} + +// processClientSideListener checks if the provided Listener proto meets +// the expected criteria. If so, it returns a non-empty routeConfigName. +func processClientSideListener(lis *v3listenerpb.Listener) (*listenerUpdate, error) { + update := &listenerUpdate{} + + apiLisAny := lis.GetApiListener().GetApiListener() + if !xdsresource.IsHTTPConnManagerResource(apiLisAny.GetTypeUrl()) { + return nil, fmt.Errorf("unexpected http connection manager resource type: %q", apiLisAny.GetTypeUrl()) + } + apiLis := &v3httppb.HttpConnectionManager{} + if err := proto.Unmarshal(apiLisAny.GetValue(), apiLis); err != nil { + return nil, fmt.Errorf("failed to unmarshal api_listener: %v", err) + } + + switch apiLis.RouteSpecifier.(type) { + case *v3httppb.HttpConnectionManager_Rds: + if configsource := apiLis.GetRds().GetConfigSource(); configsource.GetAds() == nil && configsource.GetSelf() == nil { + return nil, fmt.Errorf("LDS's RDS configSource is not ADS or Self: %+v", lis) + } + name := apiLis.GetRds().GetRouteConfigName() + if name == "" { + return nil, fmt.Errorf("empty route_config_name: %+v", lis) + } + update.RouteConfigName = name + case *v3httppb.HttpConnectionManager_RouteConfig: + routeU := apiLis.GetRouteConfig() + if routeU == nil { + return nil, fmt.Errorf("empty inline RDS resp:: %+v", lis) + } + if routeU.Name == "" { + return nil, fmt.Errorf("empty route_config_name in inline RDS resp: %+v", lis) + } + update.RouteConfigName = routeU.Name + case nil: + return nil, fmt.Errorf("no RouteSpecifier: %+v", apiLis) + default: + return nil, fmt.Errorf("unsupported type %T for RouteSpecifier", apiLis.RouteSpecifier) + } + + return update, nil +} + +func processServerSideListener(lis *v3listenerpb.Listener) (*listenerUpdate, error) { + if n := len(lis.ListenerFilters); n != 0 { + return nil, fmt.Errorf("unsupported field 'listener_filters' contains %d entries", n) + } + if useOrigDst := lis.GetUseOriginalDst(); useOrigDst != nil && useOrigDst.GetValue() { + return nil, errors.New("unsupported field 'use_original_dst' is present and set to true") + } + addr := lis.GetAddress() + if addr == nil { + return nil, fmt.Errorf("no address field in LDS response: %+v", lis) + } + sockAddr := addr.GetSocketAddress() + if sockAddr == nil { + return nil, fmt.Errorf("no socket_address field in LDS response: %+v", lis) + } + lu := &listenerUpdate{ + InboundListenerCfg: &inboundListenerConfig{ + Address: sockAddr.GetAddress(), + Port: strconv.Itoa(int(sockAddr.GetPortValue())), + }, + } + + return lu, nil +} + +type listenerDecoder struct{} + +// Decode deserializes and validates an xDS resource serialized inside the +// provided `Any` proto, as received from the xDS management server. +func (listenerDecoder) Decode(resource []byte, _ xdsclient.DecodeOptions) (*xdsclient.DecodeResult, error) { + name, listener, err := unmarshalListenerResource(resource) + switch { + case name == "": + // Name is unset only when protobuf deserialization fails. + return nil, err + case err != nil: + // Protobuf deserialization succeeded, but resource validation failed. + return &xdsclient.DecodeResult{Name: name, Resource: &listenerResourceData{Resource: listenerUpdate{}}}, err + } + + return &xdsclient.DecodeResult{Name: name, Resource: &listenerResourceData{Resource: listener}}, nil + +} + +// listenerResourceData wraps the configuration of a Listener resource as +// received from the management server. +// +// Implements the ResourceData interface. +type listenerResourceData struct { + xdsclient.ResourceData + + Resource listenerUpdate +} + +// Equal returns true if other is equal to l. +func (l *listenerResourceData) Equal(other xdsclient.ResourceData) bool { + if l == nil && other == nil { + return true + } + if (l == nil) != (other == nil) { + return false + } + return bytes.Equal(l.Resource.Raw, other.Bytes()) +} + +// ToJSON returns a JSON string representation of the resource data. +func (l *listenerResourceData) ToJSON() string { + return pretty.ToJSON(l.Resource) +} + +// Bytes returns the underlying raw protobuf form of the listener resource. +func (l *listenerResourceData) Bytes() []byte { + return l.Resource.Raw +} + +// ListenerUpdate contains information received in an LDS response, which is of +// interest to the registered LDS watcher. +type listenerUpdate struct { + // RouteConfigName is the route configuration name corresponding to the + // target which is being watched through LDS. + RouteConfigName string + + // InboundListenerCfg contains inbound listener configuration. + InboundListenerCfg *inboundListenerConfig + + // Raw is the resource from the xds response. + Raw []byte +} + +// InboundListenerConfig contains information about the inbound listener, i.e +// the server-side listener. +type inboundListenerConfig struct { + // Address is the local address on which the inbound listener is expected to + // accept incoming connections. + Address string + // Port is the local port on which the inbound listener is expected to + // accept incoming connections. + Port string +} + +func makeAuthorityName(name string) string { + segs := strings.Split(name, "/") + return strings.Join(segs, "") +} + +func makeNewStyleLDSName(authority string) string { + return fmt.Sprintf("xdstp://%s/envoy.config.listener.v3.Listener/xdsclient-test-lds-resource", authority) +} + +// buildResourceName returns the resource name in the format of an xdstp:// +// resource. +func buildResourceName(typeName, auth, id string, ctxParams map[string]string) string { + var typS string + switch typeName { + case listenerResourceTypeName: + typS = "envoy.config.listener.v3.Listener" + default: + // If the name doesn't match any of the standard resources fallback + // to the type name. + typS = typeName + } + return (&xdsresource.Name{ + Scheme: "xdstp", + Authority: auth, + Type: typS, + ID: id, + ContextParams: ctxParams, + }).String() +} diff --git a/xds/internal/clients/xdsclient/test/lds_watchers_test.go b/xds/internal/clients/xdsclient/test/lds_watchers_test.go new file mode 100644 index 000000000..6bf32e73a --- /dev/null +++ b/xds/internal/clients/xdsclient/test/lds_watchers_test.go @@ -0,0 +1,1385 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient_test + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/uuid" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/internal/syncutil" + "google.golang.org/grpc/xds/internal/clients/internal/testutils" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" + v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" +) + +type noopListenerWatcher struct{} + +func (noopListenerWatcher) ResourceChanged(update xdsclient.ResourceData, onDone func()) { + onDone() +} +func (noopListenerWatcher) ResourceError(err error, onDone func()) { + onDone() +} +func (noopListenerWatcher) AmbientError(err error, onDone func()) { + onDone() +} + +type listenerUpdateErrTuple struct { + update listenerUpdate + resourceErr error + ambientErr error +} + +type listenerWatcher struct { + updateCh *testutils.Channel // Messages of type listenerUpdate + resourceErrCh *testutils.Channel // Messages of type resource error + ambientErrCh *testutils.Channel // Messages of type ambient error +} + +func newListenerWatcher() *listenerWatcher { + return &listenerWatcher{ + updateCh: testutils.NewChannelWithSize(1), + resourceErrCh: testutils.NewChannelWithSize(1), + ambientErrCh: testutils.NewChannelWithSize(1), + } +} + +func (lw *listenerWatcher) ResourceChanged(update xdsclient.ResourceData, onDone func()) { + lisData, ok := update.(*listenerResourceData) + if !ok { + lw.resourceErrCh.Send(listenerUpdateErrTuple{resourceErr: fmt.Errorf("unexpected resource type: %T", update)}) + onDone() + return + } + select { + case <-lw.updateCh.C: + default: + } + lw.updateCh.Send(listenerUpdateErrTuple{update: lisData.Resource}) + onDone() +} + +func (lw *listenerWatcher) AmbientError(err error, onDone func()) { + // When used with a go-control-plane management server that continuously + // resends resources which are NACKed by the xDS client, using a `Replace()` + // here and in OnResourceDoesNotExist() simplifies tests which will have + // access to the most recently received error. + lw.ambientErrCh.Replace(listenerUpdateErrTuple{ambientErr: err}) + onDone() +} + +func (lw *listenerWatcher) ResourceError(err error, onDone func()) { + lw.resourceErrCh.Replace(listenerUpdateErrTuple{resourceErr: err}) + onDone() +} + +// badListenerResource returns a listener resource for the given name which does +// not contain the `RouteSpecifier` field in the HTTPConnectionManager, and +// hence is expected to be NACKed by the client. +func badListenerResource(t *testing.T, name string) *v3listenerpb.Listener { + hcm := testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{}) + return &v3listenerpb.Listener{ + Name: name, + ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, + } +} + +// verifyNoListenerUpdate verifies that no listener update is received on the +// provided update channel, and returns an error if an update is received. +// +// A very short deadline is used while waiting for the update, as this function +// is intended to be used when an update is not expected. +func verifyNoListenerUpdate(ctx context.Context, updateCh *testutils.Channel) error { + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if u, err := updateCh.Receive(sCtx); err != context.DeadlineExceeded { + return fmt.Errorf("unexpected ListenerUpdate: %v", u) + } + return nil +} + +// verifyListenerUpdate waits for a listenerUpdateErrTuple from the provided +// updateCh (typically the updateCh, resourceErrCh, or ambientErrCh of +// listenerWatcher) and verifies that it matches the expected wantUpdate tuple. +// +// It performs the following checks: +// - Waits for an item on updateCh until the context deadline. +// - If wantUpdate contains a resourceErr or ambientErr, it compares the +// xdsresource.ErrorType of the received error with the expected error +// type. +// - If wantUpdate contains an update, it compares the received update with +// the expected update, ignoring the Raw field. +// +// Returns an error if the context expires, or if the received tuple does not +// match the expected tuple according to the comparison logic. +func verifyListenerUpdate(ctx context.Context, updateCh *testutils.Channel, wantUpdate listenerUpdateErrTuple) error { + u, err := updateCh.Receive(ctx) + if err != nil { + return fmt.Errorf("timeout when waiting for a listener resource from the management server: %v", err) + } + got := u.(listenerUpdateErrTuple) + if wantUpdate.resourceErr != nil { + if gotType, wantType := xdsresource.ErrType(got.resourceErr), xdsresource.ErrType(wantUpdate.resourceErr); gotType != wantType { + return fmt.Errorf("received update with resource error type %v, want %v", gotType, wantType) + } + } + if wantUpdate.ambientErr != nil { + if gotType, wantType := xdsresource.ErrType(got.ambientErr), xdsresource.ErrType(wantUpdate.ambientErr); gotType != wantType { + return fmt.Errorf("received update with ambient error type %v, want %v", gotType, wantType) + } + } + cmpOpts := []cmp.Option{ + cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(listenerUpdate{}, "Raw"), + } + if diff := cmp.Diff(wantUpdate.update, got.update, cmpOpts...); diff != "" { + return fmt.Errorf("received unexpected diff in the listener resource update: (-want, got):\n%s", diff) + } + return nil +} + +func verifyAmbientErrorType(ctx context.Context, updateCh *testutils.Channel, wantErrType xdsresource.ErrorType, wantNodeID string) error { + u, err := updateCh.Receive(ctx) + if err != nil { + return fmt.Errorf("timeout when waiting for a listener error from the management server: %v", err) + } + gotErr := u.(listenerUpdateErrTuple).ambientErr + return verifyErrorType(gotErr, wantErrType, wantNodeID) +} + +func verifyResourceErrorType(ctx context.Context, updateCh *testutils.Channel, wantErrType xdsresource.ErrorType, wantNodeID string) error { + u, err := updateCh.Receive(ctx) + if err != nil { + return fmt.Errorf("timeout when waiting for a listener error from the management server: %v", err) + } + gotErr := u.(listenerUpdateErrTuple).resourceErr + return verifyErrorType(gotErr, wantErrType, wantNodeID) +} + +func verifyErrorType(gotErr error, wantErrType xdsresource.ErrorType, wantNodeID string) error { + if got, want := xdsresource.ErrType(gotErr), wantErrType; got != want { + return fmt.Errorf("update received with error %v of type: %v, want %v", gotErr, got, want) + } + if !strings.Contains(gotErr.Error(), wantNodeID) { + return fmt.Errorf("update received with error: %v, want error with node ID: %q", gotErr, wantNodeID) + } + return nil +} + +// TestLDSWatch covers the case where a single watcher exists for a single +// listener resource. The test verifies the following scenarios: +// 1. An update from the management server containing the resource being +// watched should result in the invocation of the watch callback. +// 2. An update from the management server containing a resource *not* being +// watched should not result in the invocation of the watch callback. +// 3. After the watch is cancelled, an update from the management server +// containing the resource that was being watched should not result in the +// invocation of the watch callback. +// +// The test is run for old and new style names. +func (s) TestLDSWatch(t *testing.T) { + tests := []struct { + desc string + resourceName string + watchedResource *v3listenerpb.Listener // The resource being watched. + updatedWatchedResource *v3listenerpb.Listener // The watched resource after an update. + notWatchedResource *v3listenerpb.Listener // A resource which is not being watched. + wantUpdate listenerUpdateErrTuple + }{ + { + desc: "old style resource", + resourceName: ldsName, + watchedResource: e2e.DefaultClientListener(ldsName, rdsName), + updatedWatchedResource: e2e.DefaultClientListener(ldsName, "new-rds-resource"), + notWatchedResource: e2e.DefaultClientListener("unsubscribed-lds-resource", rdsName), + wantUpdate: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + }, + }, + { + desc: "new style resource", + resourceName: ldsNameNewStyle, + watchedResource: e2e.DefaultClientListener(ldsNameNewStyle, rdsNameNewStyle), + updatedWatchedResource: e2e.DefaultClientListener(ldsNameNewStyle, "new-rds-resource"), + notWatchedResource: e2e.DefaultClientListener("unsubscribed-lds-resource", rdsNameNewStyle), + wantUpdate: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsNameNewStyle, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register a watch for a listener resource and have the watch + // callback push the received update on to a channel. + lw := newListenerWatcher() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, test.resourceName, lw) + + // Configure the management server to return a single listener + // resource, corresponding to the one we registered a watch for. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{test.watchedResource}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update. + if err := verifyListenerUpdate(ctx, lw.updateCh, test.wantUpdate); err != nil { + t.Fatal(err) + } + + // Configure the management server to return an additional listener + // resource, one that we are not interested in. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{test.watchedResource, test.notWatchedResource}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + if err := verifyNoListenerUpdate(ctx, lw.updateCh); err != nil { + t.Fatal(err) + } + + // Cancel the watch and update the resource corresponding to the original + // watch. Ensure that the cancelled watch callback is not invoked. + ldsCancel() + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{test.updatedWatchedResource, test.notWatchedResource}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + if err := verifyNoListenerUpdate(ctx, lw.updateCh); err != nil { + t.Fatal(err) + } + }) + } +} + +// TestLDSWatch_TwoWatchesForSameResourceName covers the case where two watchers +// exist for a single listener resource. The test verifies the following +// scenarios: +// 1. An update from the management server containing the resource being +// watched should result in the invocation of both watch callbacks. +// 2. After one of the watches is cancelled, a redundant update from the +// management server should not result in the invocation of either of the +// watch callbacks. +// 3. An update from the management server containing the resource being +// watched should result in the invocation of the un-cancelled watch +// callback. +// +// The test is run for old and new style names. +func (s) TestLDSWatch_TwoWatchesForSameResourceName(t *testing.T) { + tests := []struct { + desc string + resourceName string + watchedResource *v3listenerpb.Listener // The resource being watched. + updatedWatchedResource *v3listenerpb.Listener // The watched resource after an update. + wantUpdateV1 listenerUpdateErrTuple + wantUpdateV2 listenerUpdateErrTuple + }{ + { + desc: "old style resource", + resourceName: ldsName, + watchedResource: e2e.DefaultClientListener(ldsName, rdsName), + updatedWatchedResource: e2e.DefaultClientListener(ldsName, "new-rds-resource"), + wantUpdateV1: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + }, + wantUpdateV2: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: "new-rds-resource", + }, + }, + }, + { + desc: "new style resource", + resourceName: ldsNameNewStyle, + watchedResource: e2e.DefaultClientListener(ldsNameNewStyle, rdsNameNewStyle), + updatedWatchedResource: e2e.DefaultClientListener(ldsNameNewStyle, "new-rds-resource"), + wantUpdateV1: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsNameNewStyle, + }, + }, + wantUpdateV2: listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: "new-rds-resource", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register two watches for the same listener resource and have the + // callbacks push the received updates on to a channel. + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, test.resourceName, lw1) + defer ldsCancel1() + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, test.resourceName, lw2) + + // Configure the management server to return a single listener + // resource, corresponding to the one we registered watches for. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{test.watchedResource}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update. + if err := verifyListenerUpdate(ctx, lw1.updateCh, test.wantUpdateV1); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, test.wantUpdateV1); err != nil { + t.Fatal(err) + } + + // Cancel the second watch and force the management server to push a + // redundant update for the resource being watched. Neither of the + // two watch callbacks should be invoked. + ldsCancel2() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + if err := verifyNoListenerUpdate(ctx, lw1.updateCh); err != nil { + t.Fatal(err) + } + if err := verifyNoListenerUpdate(ctx, lw2.updateCh); err != nil { + t.Fatal(err) + } + + // Update to the resource being watched. The un-cancelled callback + // should be invoked while the cancelled one should not be. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{test.updatedWatchedResource}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, test.wantUpdateV2); err != nil { + t.Fatal(err) + } + if err := verifyNoListenerUpdate(ctx, lw2.updateCh); err != nil { + t.Fatal(err) + } + }) + } +} + +// TestLDSWatch_ThreeWatchesForDifferentResourceNames covers the case with three +// watchers (two watchers for one resource, and the third watcher for another +// resource), exist across two listener resources. The test verifies that an +// update from the management server containing both resources results in the +// invocation of all watch callbacks. +// +// The test is run with both old and new style names. +func (s) TestLDSWatch_ThreeWatchesForDifferentResourceNames(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + authority := makeAuthorityName(t.Name()) + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register two watches for the same listener resource and have the + // callbacks push the received updates on to a channel. + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw1) + defer ldsCancel1() + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw2) + defer ldsCancel2() + + // Register the third watch for a different listener resource. + ldsNameNewStyle := makeNewStyleLDSName(authority) + lw3 := newListenerWatcher() + ldsCancel3 := client.WatchResource(xdsresource.V3ListenerURL, ldsNameNewStyle, lw3) + defer ldsCancel3() + + // Configure the management server to return two listener resources, + // corresponding to the registered watches. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(ldsName, rdsName), + e2e.DefaultClientListener(ldsNameNewStyle, rdsName), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update for the all watchers. The two + // resources returned differ only in the resource name. Therefore the + // expected update is the same for all the watchers. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw3.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_ResourceCaching covers the case where a watch is registered for +// a resource which is already present in the cache. The test verifies that the +// watch callback is invoked with the contents from the cache, instead of a +// request being sent to the management server. +func (s) TestLDSWatch_ResourceCaching(t *testing.T) { + firstRequestReceived := false + firstAckReceived := syncutil.NewEvent() + secondRequestReceived := syncutil.NewEvent() + + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ + OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { + // The first request has an empty version string. + if !firstRequestReceived && req.GetVersionInfo() == "" { + firstRequestReceived = true + return nil + } + // The first ack has a non-empty version string. + if !firstAckReceived.HasFired() && req.GetVersionInfo() != "" { + firstAckReceived.Fire() + return nil + } + // Any requests after the first request and ack, are not expected. + secondRequestReceived.Fire() + return nil + }, + }) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register a watch for a listener resource and have the watch + // callback push the received update on to a channel. + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw1) + defer ldsCancel1() + + // Configure the management server to return a single listener + // resource, corresponding to the one we registered a watch for. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(ldsName, rdsName)}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + select { + case <-ctx.Done(): + t.Fatal("timeout when waiting for receipt of ACK at the management server") + case <-firstAckReceived.Done(): + } + + // Register another watch for the same resource. This should get the update + // from the cache. + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw2) + defer ldsCancel2() + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + // No request should get sent out as part of this watch. + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + select { + case <-sCtx.Done(): + case <-secondRequestReceived.Done(): + t.Fatal("xdsClient sent out request instead of using update from cache") + } +} + +// TestLDSWatch_ExpiryTimerFiresBeforeResponse tests the case where the client +// does not receive an LDS response for the request that it sends. The test +// verifies that the watch callback is invoked with an error once the +// watchExpiryTimer fires. +func (s) TestLDSWatch_ExpiryTimerFiresBeforeResponse(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config and override the default + // watch expiry timeout. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + client.SetWatchExpiryTimeoutForTesting(defaultTestWatchExpiryTimeout) + + // Register a watch for a resource which is expected to fail with an error + // after the watch expiry timer fires. + lw := newListenerWatcher() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw) + defer ldsCancel() + + // Wait for the watch expiry timer to fire. + <-time.After(defaultTestWatchExpiryTimeout) + + // Verify that an empty update with the expected resource error is + // received. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + wantErr := xdsresource.NewError(xdsresource.ErrorTypeResourceNotFound, "") + if err := verifyListenerUpdate(ctx, lw.resourceErrCh, listenerUpdateErrTuple{resourceErr: wantErr}); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_ValidResponseCancelsExpiryTimerBehavior tests the case where the +// client receives a valid LDS response for the request that it sends. The test +// verifies that the behavior associated with the expiry timer (i.e, callback +// invocation with error) does not take place. +func (s) TestLDSWatch_ValidResponseCancelsExpiryTimerBehavior(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config and override the default + // watch expiry timeout. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + client.SetWatchExpiryTimeoutForTesting(defaultTestWatchExpiryTimeout) + + // Register a watch for a listener resource and have the watch + // callback push the received update on to a channel. + lw := newListenerWatcher() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw) + defer ldsCancel() + + // Configure the management server to return a single listener + // resource, corresponding to the one we registered a watch for. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(ldsName, rdsName)}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + + // Wait for the watch expiry timer to fire, and verify that the callback is + // not invoked. + <-time.After(defaultTestWatchExpiryTimeout) + if err := verifyNoListenerUpdate(ctx, lw.updateCh); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_ResourceRemoved covers the cases where a resource being watched +// is removed from the management server. The test verifies the following +// scenarios: +// 1. Removing a resource should trigger the watch callback with a resource +// removed error. It should not trigger the watch callback for an unrelated +// resource. +// 2. An update to another resource should result in the invocation of the watch +// callback associated with that resource. It should not result in the +// invocation of the watch callback associated with the deleted resource. +// +// The test is run with both old and new style names. +func (s) TestLDSWatch_ResourceRemoved(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + authority := makeAuthorityName(t.Name()) + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register two watches for two listener resources and have the + // callbacks push the received updates on to a channel. + resourceName1 := ldsName + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, resourceName1, lw1) + defer ldsCancel1() + + resourceName2 := makeNewStyleLDSName(authority) + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, resourceName2, lw2) + defer ldsCancel2() + + // Configure the management server to return two listener resources, + // corresponding to the registered watches. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(resourceName1, rdsName), + e2e.DefaultClientListener(resourceName2, rdsName), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update for both watchers. The two + // resources returned differ only in the resource name. Therefore the + // expected update is the same for both watchers. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + + // Remove the first listener resource on the management server. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(resourceName2, rdsName)}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // The first watcher should receive a resource error for resource removal, + // while the second watcher should not see an update. + if err := verifyListenerUpdate(ctx, lw1.resourceErrCh, listenerUpdateErrTuple{ + resourceErr: xdsresource.NewError(xdsresource.ErrorTypeResourceNotFound, ""), + }); err != nil { + t.Fatal(err) + } + if err := verifyNoListenerUpdate(ctx, lw2.updateCh); err != nil { + t.Fatal(err) + } + + // Update the second listener resource on the management server. The first + // watcher should not see an update, while the second watcher should. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(resourceName2, "new-rds-resource")}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + if err := verifyNoListenerUpdate(ctx, lw1.updateCh); err != nil { + t.Fatal(err) + } + wantUpdate = listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: "new-rds-resource", + }, + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_NewWatcherForRemovedResource covers the case where a new +// watcher registers for a resource that has been removed. The test verifies +// the following scenarios: +// 1. When a resource is deleted by the management server, any active +// watchers of that resource should be notified with a "resource removed" +// error through their watch callback. +// 2. If a new watcher attempts to register for a resource that has already +// been deleted, its watch callback should be immediately invoked with a +// "resource removed" error. +func (s) TestLDSWatch_NewWatcherForRemovedResource(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register watch for the listener resource and have the + // callbacks push the received updates on to a channel. + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw1) + defer ldsCancel1() + + // Configure the management server to return listener resource, + // corresponding to the registered watch. + resource := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(ldsName, rdsName)}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resource); err != nil { + t.Fatalf("Failed to update management server with resource: %v, err: %v", resource, err) + } + + // Verify the contents of the received update for existing watch. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + + // Remove the listener resource on the management server. + resource = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resource); err != nil { + t.Fatalf("Failed to update management server with resource: %v, err: %v", resource, err) + } + + // The existing watcher should receive a resource error for resource + // removal. + updateError := listenerUpdateErrTuple{resourceErr: xdsresource.NewError(xdsresource.ErrorTypeResourceNotFound, "")} + if err := verifyListenerUpdate(ctx, lw1.resourceErrCh, updateError); err != nil { + t.Fatal(err) + } + + // New watchers attempting to register for a deleted resource should also + // receive a "resource removed" error. + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw2) + defer ldsCancel2() + if err := verifyListenerUpdate(ctx, lw2.resourceErrCh, updateError); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_NACKError covers the case where an update from the management +// server is NACKed by the xdsclient. The test verifies that the error is +// propagated to the existing watcher. After NACK, if a new watcher registers +// for the resource, error is propagated to the new watcher as well. +func (s) TestLDSWatch_NACKError(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register a watch for a listener resource and have the watch + // callback push the received update on to a channel. + lw := newListenerWatcher() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw) + defer ldsCancel() + + // Configure the management server to return a single listener resource + // which is expected to be NACKed by the client. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{badListenerResource(t, ldsName)}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify that the expected error is propagated to the existing watcher. + // Since the resource is not cached, it should be received as resource + // error. + if err := verifyResourceErrorType(ctx, lw.resourceErrCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + t.Fatal(err) + } + + // Verify that the expected error is propagated to the new watcher as well. + // Since the resource is not cached, it should be received as resource + // error. + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw2) + defer ldsCancel2() + if err := verifyResourceErrorType(ctx, lw2.resourceErrCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + t.Fatal(err) + } +} + +// Tests the scenario where a watch registered for a resource results in a good +// update followed by a bad update. This results in the resource cache +// containing both the old good update and the latest NACK error. The test +// verifies that a when a new watch is registered for the same resource, the new +// watcher receives the good update followed by the NACK error. +func (s) TestLDSWatch_ResourceCaching_NACKError(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register a watch for a listener resource and have the watch + // callback push the received update on to a channel. + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw1) + defer ldsCancel1() + + // Configure the management server to return a single listener + // resource, corresponding to the one we registered a watch for. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(ldsName, rdsName)}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), 1000*defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + + // Configure the management server to return a single listener resource + // which is expected to be NACKed by the client. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{badListenerResource(t, ldsName)}, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify that the expected error is propagated to the existing watcher. + // Since the resource is cached, it should be received as ambient error. + if err := verifyAmbientErrorType(ctx, lw1.ambientErrCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + t.Fatal(err) + } + + // Register another watch for the same resource. This should get the update + // and error from the cache. + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw2) + defer ldsCancel2() + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } + // Verify that the expected error is propagated to the existing watcher. + // Since the resource is cached, it should be received as ambient error. + if err := verifyAmbientErrorType(ctx, lw2.ambientErrCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_PartialValid covers the case where a response from the +// management server contains both valid and invalid resources and is expected +// to be NACKed by the xdsclient. The test verifies that watchers corresponding +// to the valid resource receive the update, while watchers corresponding to the +// invalid resource receive an error. +func (s) TestLDSWatch_PartialValid(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + authority := makeAuthorityName(t.Name()) + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register two watches for listener resources. The first watch is expected + // to receive an error because the received resource is NACKed. The second + // watch is expected to get a good update. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + badResourceName := ldsName + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, badResourceName, lw1) + defer ldsCancel1() + goodResourceName := makeNewStyleLDSName(authority) + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, goodResourceName, lw2) + defer ldsCancel2() + + // Configure the management server with two listener resources. One of these + // is a bad resource causing the update to be NACKed. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + badListenerResource(t, badResourceName), + e2e.DefaultClientListener(goodResourceName, rdsName), + }, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify that the expected error is propagated to the watcher which + // requested for the bad resource. + // Verify that the expected error is propagated to the existing watcher. + // Since the resource is not cached, it should be received as resource + // error. + if err := verifyResourceErrorType(ctx, lw1.resourceErrCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + t.Fatal(err) + } + + // Verify that the watcher watching the good resource receives a good + // update. + wantUpdate := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate); err != nil { + t.Fatal(err) + } +} + +// TestLDSWatch_PartialResponse covers the case where a response from the +// management server does not contain all requested resources. LDS responses are +// supposed to contain all requested resources, and the absence of one usually +// indicates that the management server does not know about it. In cases where +// the server has never responded with this resource before, the xDS client is +// expected to wait for the watch timeout to expire before concluding that the +// resource does not exist on the server +func (s) TestLDSWatch_PartialResponse(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + nodeID := uuid.New().String() + authority := makeAuthorityName(t.Name()) + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Register two watches for two listener resources and have the + // callbacks push the received updates on to a channel. + resourceName1 := ldsName + lw1 := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, resourceName1, lw1) + defer ldsCancel1() + + resourceName2 := makeNewStyleLDSName(authority) + lw2 := newListenerWatcher() + ldsCancel2 := client.WatchResource(xdsresource.V3ListenerURL, resourceName2, lw2) + defer ldsCancel2() + + // Configure the management server to return only one of the two listener + // resources, corresponding to the registered watches. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(resourceName1, rdsName), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update for first watcher. + wantUpdate1 := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw1.updateCh, wantUpdate1); err != nil { + t.Fatal(err) + } + + // Verify that the second watcher does not get an update with an error. + if err := verifyNoListenerUpdate(ctx, lw2.updateCh); err != nil { + t.Fatal(err) + } + + // Configure the management server to return two listener resources, + // corresponding to the registered watches. + resources = e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(resourceName1, rdsName), + e2e.DefaultClientListener(resourceName2, rdsName), + }, + SkipValidation: true, + } + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Verify the contents of the received update for the second watcher. + wantUpdate2 := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + if err := verifyListenerUpdate(ctx, lw2.updateCh, wantUpdate2); err != nil { + t.Fatal(err) + } + + // Verify that the first watcher gets no update, as the first resource did + // not change. + if err := verifyNoListenerUpdate(ctx, lw1.updateCh); err != nil { + t.Fatal(err) + } +} diff --git a/xds/internal/clients/xdsclient/test/misc_watchers_test.go b/xds/internal/clients/xdsclient/test/misc_watchers_test.go new file mode 100644 index 000000000..397022725 --- /dev/null +++ b/xds/internal/clients/xdsclient/test/misc_watchers_test.go @@ -0,0 +1,530 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient_test + +import ( + "context" + "fmt" + "net" + "strings" + "testing" + + "github.com/google/uuid" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/internal/testutils" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" + "google.golang.org/grpc/xds/internal/clients/internal/testutils/fakeserver" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + "google.golang.org/protobuf/types/known/anypb" + + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" +) + +// testLDSWatcher is a test watcher that registers two watches corresponding to +// the names passed in at creation time on a valid update. +type testLDSWatcher struct { + client *xdsclient.XDSClient + name1, name2 string + lw1, lw2 *listenerWatcher + cancel1, cancel2 func() + updateCh *testutils.Channel +} + +func newTestLDSWatcher(client *xdsclient.XDSClient, name1, name2 string) *testLDSWatcher { + return &testLDSWatcher{ + client: client, + name1: name1, + name2: name2, + lw1: newListenerWatcher(), + lw2: newListenerWatcher(), + updateCh: testutils.NewChannelWithSize(1), + } +} + +func (lw *testLDSWatcher) ResourceChanged(update xdsclient.ResourceData, onDone func()) { + lisData, ok := update.(*listenerResourceData) + if !ok { + lw.updateCh.Send(listenerUpdateErrTuple{resourceErr: fmt.Errorf("unexpected resource type: %T", update)}) + onDone() + return + } + lw.updateCh.Send(listenerUpdateErrTuple{update: lisData.Resource}) + + lw.cancel1 = lw.client.WatchResource(xdsresource.V3ListenerURL, lw.name1, lw.lw1) + lw.cancel2 = lw.client.WatchResource(xdsresource.V3ListenerURL, lw.name2, lw.lw2) + onDone() +} + +func (lw *testLDSWatcher) AmbientError(err error, onDone func()) { + // When used with a go-control-plane management server that continuously + // resends resources which are NACKed by the xDS client, using a `Replace()` + // here and in OnResourceDoesNotExist() simplifies tests which will have + // access to the most recently received error. + lw.updateCh.Replace(listenerUpdateErrTuple{ambientErr: err}) + onDone() +} + +func (lw *testLDSWatcher) ResourceError(err error, onDone func()) { + lw.updateCh.Replace(listenerUpdateErrTuple{resourceErr: xdsresource.NewError(xdsresource.ErrorTypeResourceNotFound, "Listener not found in received response")}) + onDone() +} + +func (lw *testLDSWatcher) cancel() { + lw.cancel1() + lw.cancel2() +} + +// TestWatchCallAnotherWatch tests the scenario where a watch is registered for +// a resource, and more watches are registered from the first watch's callback. +// The test verifies that this scenario does not lead to a deadlock. +func (s) TestWatchCallAnotherWatch(t *testing.T) { + // Start an xDS management server and set the option to allow it to respond + // to requests which only specify a subset of the configured resources. + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + + nodeID := uuid.New().String() + authority := makeAuthorityName(t.Name()) + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp style resource names used in this test use a slash removed + // version of t.Name as their authority, and the empty config + // results in the top-level xds server configuration being used for + // this authority. + Authorities: map[string]xdsclient.Authority{ + authority: {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + // Configure the management server to return two listener resources, + // corresponding to the registered watches. + ldsNameNewStyle := makeNewStyleLDSName(authority) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{ + e2e.DefaultClientListener(ldsName, rdsName), + e2e.DefaultClientListener(ldsNameNewStyle, rdsNameNewStyle), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := mgmtServer.Update(ctx, resources); err != nil { + t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) + } + + // Create a listener watcher that registers two more watches from + // the OnUpdate callback: + // - one for the same resource name as this watch, which would be + // satisfied from xdsClient cache + // - the other for a different resource name, which would be + // satisfied from the server + lw := newTestLDSWatcher(client, ldsName, ldsNameNewStyle) + defer lw.cancel() + ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, ldsName, lw) + defer ldsCancel() + + // Verify the contents of the received update for the all watchers. + // Verify the contents of the received update for the all watchers. The two + // resources returned differ only in the resource name. Therefore the + // expected update is the same for all the watchers. + wantUpdate12 := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsName, + }, + } + // Verify the contents of the received update for the all watchers. The two + // resources returned differ only in the resource name. Therefore the + // expected update is the same for all the watchers. + wantUpdate3 := listenerUpdateErrTuple{ + update: listenerUpdate{ + RouteConfigName: rdsNameNewStyle, + }, + } + if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate12); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw.lw1.updateCh, wantUpdate12); err != nil { + t.Fatal(err) + } + if err := verifyListenerUpdate(ctx, lw.lw2.updateCh, wantUpdate3); err != nil { + t.Fatal(err) + } +} + +// TestNodeProtoSentOnlyInFirstRequest verifies that a non-empty node proto gets +// sent only on the first discovery request message on the ADS stream. +// +// It also verifies the same behavior holds after a stream restart. +func (s) TestNodeProtoSentOnlyInFirstRequest(t *testing.T) { + // Create a restartable listener which can close existing connections. + l, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("Error while listening. Err: %v", err) + } + lis := testutils.NewRestartableListener(l) + + // Start a fake xDS management server with the above restartable listener. + // + // We are unable to use the go-control-plane server here, because it caches + // the node proto received in the first request message and adds it to + // subsequent requests before invoking the OnStreamRequest() callback. + // Therefore we cannot verify what is sent by the xDS client. + mgmtServer, cleanup, err := fakeserver.StartServer(lis) + if err != nil { + t.Fatalf("Failed to start fake xDS server: %v", err) + } + defer cleanup() + + // Create bootstrap configuration pointing to the above management server. + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + const ( + serviceName = "my-service-client-side-xds" + routeConfigName = "route-" + serviceName + clusterName = "cluster-" + serviceName + serviceName2 = "my-service-client-side-xds-2" + ) + + // Register a watch for the Listener resource. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + watcher := newListenerWatcher() + ldsCancel1 := client.WatchResource(xdsresource.V3ListenerURL, serviceName, watcher) + defer ldsCancel1() + + // Ensure the watch results in a discovery request with an empty node proto. + if err := readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { + t.Fatal(err) + } + + // Configure a listener resource on the fake xDS server. + lisAny, err := anypb.New(e2e.DefaultClientListener(serviceName, routeConfigName)) + if err != nil { + t.Fatalf("Failed to marshal listener resource into an Any proto: %v", err) + } + mgmtServer.XDSResponseChan <- &fakeserver.Response{ + Resp: &v3discoverypb.DiscoveryResponse{ + TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", + VersionInfo: "1", + Resources: []*anypb.Any{lisAny}, + }, + } + + // The xDS client is expected to ACK the Listener resource. The discovery + // request corresponding to the ACK must contain a nil node proto. + if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { + t.Fatal(err) + } + + // Register a watch for another Listener resource. + ldscancel2 := client.WatchResource(xdsresource.V3ListenerURL, serviceName2, watcher) + defer ldscancel2() + + // Ensure the watch results in a discovery request with an empty node proto. + if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { + t.Fatal(err) + } + + // Configure the other listener resource on the fake xDS server. + lisAny2, err := anypb.New(e2e.DefaultClientListener(serviceName2, routeConfigName)) + + if err != nil { + t.Fatalf("Failed to marshal route configuration resource into an Any proto: %v", err) + } + + mgmtServer.XDSResponseChan <- &fakeserver.Response{ + Resp: &v3discoverypb.DiscoveryResponse{ + TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", + VersionInfo: "1", + Resources: []*anypb.Any{lisAny2}, + }, + } + + // Ensure the discovery request for the ACK contains an empty node proto. + if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { + t.Fatal(err) + } + + // Stop the management server and expect the error callback to be invoked. + lis.Stop() + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for the connection error to be propagated to the watcher") + case <-watcher.ambientErrCh.C: + } + // Restart the management server. + lis.Restart() + + // The xDS client is expected to re-request previously requested resources. + // Here, we expect 1 DiscoveryRequest messages with both the listener resources. + // The message should contain a non-nil node proto (since its the first + // request after restart). + // + // And since we don't push any responses on the response channel of the fake + // server, we do not expect any ACKs here. + if err := readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { + t.Fatal(err) + } +} + +// readDiscoveryResponseAndCheckForEmptyNodeProto reads a discovery request +// message out of the provided reqCh. It returns an error if it fails to read a +// message before the context deadline expires, or if the read message contains +// a non-empty node proto. +func readDiscoveryResponseAndCheckForEmptyNodeProto(ctx context.Context, reqCh *testutils.Channel) error { + v, err := reqCh.Receive(ctx) + if err != nil { + return fmt.Errorf("Timeout when waiting for a DiscoveryRequest message") + } + req := v.(*fakeserver.Request).Req.(*v3discoverypb.DiscoveryRequest) + if node := req.GetNode(); node != nil { + return fmt.Errorf("Node proto received in DiscoveryRequest message is %v, want empty node proto", node) + } + return nil +} + +// readDiscoveryResponseAndCheckForNonEmptyNodeProto reads a discovery request +// message out of the provided reqCh. It returns an error if it fails to read a +// message before the context deadline expires, or if the read message contains +// an empty node proto. +func readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx context.Context, reqCh *testutils.Channel) error { + v, err := reqCh.Receive(ctx) + if err != nil { + return fmt.Errorf("Timeout when waiting for a DiscoveryRequest message") + } + req := v.(*fakeserver.Request).Req.(*v3discoverypb.DiscoveryRequest) + if node := req.GetNode(); node == nil { + return fmt.Errorf("Empty node proto received in DiscoveryRequest message, want non-empty node proto") + } + return nil +} + +// Tests that the errors returned by the xDS client when watching a resource +// contain the node ID that was used to create the client. This test covers two +// scenarios: +// +// 1. When a watch is registered for an already registered resource type, but +// a new watch is registered with a type url which is not present in +// provided resource types. +// 2. When a watch is registered for a resource name whose authority is not +// found in the xDS client config. +func (s) TestWatchErrorsContainNodeID(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + // Create bootstrap configuration pointing to the above management server. + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: grpctransport.NewBuilder(credentials), + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + t.Run("Right_Wrong_ResourceType_Implementations", func(t *testing.T) { + const listenerName = "listener-name" + watcher := newListenerWatcher() + client.WatchResource(xdsresource.V3ListenerURL, listenerName, watcher) + + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + select { + case <-sCtx.Done(): + case <-watcher.updateCh.C: + t.Fatal("Unexpected resource update") + case <-watcher.resourceErrCh.C: + t.Fatal("Unexpected resource error") + } + + client.WatchResource("non-existent-type-url", listenerName, watcher) + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for error callback to be invoked") + case u, ok := <-watcher.resourceErrCh.C: + if !ok { + t.Fatalf("got no update, wanted listener resource error from the management server") + } + gotErr := u.(listenerUpdateErrTuple).resourceErr + if !strings.Contains(gotErr.Error(), nodeID) { + t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) + } + } + }) + + t.Run("Missing_Authority", func(t *testing.T) { + const listenerName = "xdstp://nonexistant-authority/envoy.config.listener.v3.Listener/listener-name" + watcher := newListenerWatcher() + client.WatchResource(xdsresource.V3ListenerURL, listenerName, watcher) + + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for error callback to be invoked") + case u, ok := <-watcher.resourceErrCh.C: + if !ok { + t.Fatalf("got no update, wanted listener resource error from the management server") + } + gotErr := u.(listenerUpdateErrTuple).resourceErr + if !strings.Contains(gotErr.Error(), nodeID) { + t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) + } + } + }) +} + +// erroringTransportBuilder is a transport builder which always returns a nil +// transport along with an error. +type erroringTransportBuilder struct{} + +func (*erroringTransportBuilder) Build(_ clients.ServerIdentifier) (clients.Transport, error) { + return nil, fmt.Errorf("failed to create transport") +} + +// Tests that the errors returned by the xDS client when watching a resource +// contain the node ID when channel creation to the management server fails. +func (s) TestWatchErrorsContainNodeID_ChannelCreationFailure(t *testing.T) { + mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) + + // Create bootstrap configuration pointing to the above management server. + nodeID := uuid.New().String() + + resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} + si := clients.ServerIdentifier{ + ServerURI: mgmtServer.Address, + Extensions: grpctransport.ServerIdentifierExtension{Credentials: "insecure"}, + } + + xdsClientConfig := xdsclient.Config{ + Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, + Node: clients.Node{ID: nodeID}, + TransportBuilder: &erroringTransportBuilder{}, + ResourceTypes: resourceTypes, + // Xdstp resource names used in this test do not specify an + // authority. These will end up looking up an entry with the + // empty key in the authorities map. Having an entry with an + // empty key and empty configuration, results in these + // resources also using the top-level configuration. + Authorities: map[string]xdsclient.Authority{ + "": {XDSServers: []xdsclient.ServerConfig{}}, + }, + } + + // Create an xDS client with the above config. + client, err := xdsclient.New(xdsClientConfig) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + defer client.Close() + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + const listenerName = "listener-name" + watcher := newListenerWatcher() + client.WatchResource(xdsresource.V3ListenerURL, listenerName, watcher) + + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for error callback to be invoked") + case u, ok := <-watcher.resourceErrCh.C: + if !ok { + t.Fatalf("got no update, wanted listener resource error from the management server") + } + gotErr := u.(listenerUpdateErrTuple).resourceErr + if !strings.Contains(gotErr.Error(), nodeID) { + t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) + } + } +} diff --git a/xds/internal/clients/xdsclient/xdsclient.go b/xds/internal/clients/xdsclient/xdsclient.go index f893e8c92..f0f4a6784 100644 --- a/xds/internal/clients/xdsclient/xdsclient.go +++ b/xds/internal/clients/xdsclient/xdsclient.go @@ -1,5 +1,3 @@ -//revive:disable:unused-parameter - /* * * Copyright 2025 gRPC authors. @@ -32,36 +30,400 @@ // server. package xdsclient +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "google.golang.org/grpc/internal/grpclog" + "google.golang.org/grpc/xds/internal/clients" + clientsinternal "google.golang.org/grpc/xds/internal/clients/internal" + "google.golang.org/grpc/xds/internal/clients/internal/backoff" + "google.golang.org/grpc/xds/internal/clients/internal/syncutil" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" + "google.golang.org/protobuf/proto" + + v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" +) + +const ( + // NameForServer represents the value to be passed as name when creating an xDS + // client from xDS-enabled gRPC servers. This is a well-known dedicated key + // value, and is defined in gRFC A71. + NameForServer = "#server" + + defaultWatchExpiryTimeout = 15 * time.Second + name = "xds-client" +) + +var ( + // ErrClientClosed is returned when the xDS client is closed. + ErrClientClosed = errors.New("xds: the xDS client is closed") + + defaultExponentialBackoff = backoff.DefaultExponential.Backoff +) + // XDSClient is a client which queries a set of discovery APIs (collectively // termed as xDS) on a remote management server, to discover // various dynamic resources. type XDSClient struct { + // The following fields are initialized at creation time and are read-only + // after that, and therefore can be accessed without a mutex. + done *syncutil.Event // Fired when the client is closed. + topLevelAuthority *authority // The top-level authority, used only for old-style names without an authority. + authorities map[string]*authority // Map from authority names in config to authority struct. + config *Config // Complete xDS client configuration. + watchExpiryTimeout time.Duration // Expiry timeout for ADS watch. + backoff func(int) time.Duration // Backoff for ADS and LRS stream failures. + transportBuilder clients.TransportBuilder // Builder to create transports to xDS server. + resourceTypes map[string]ResourceType // Registry of resource types, for parsing incoming ADS responses. + serializer *syncutil.CallbackSerializer // Serializer for invoking resource watcher callbacks. + serializerClose func() // Function to close the serializer. + logger *grpclog.PrefixLogger // Logger for this client. + target string // The gRPC target for this client. + + // The XDSClient owns a bunch of channels to individual xDS servers + // specified in the xDS client configuration. Authorities acquire references + // to these channels based on server configs within the authority config. + // The XDSClient maintains a list of interested authorities for each of + // these channels, and forwards updates from the channels to each of these + // authorities. + // + // Once all references to a channel are dropped, the channel is closed. + channelsMu sync.Mutex + xdsActiveChannels map[ServerConfig]*channelState // Map from server config to in-use xdsChannels. } // New returns a new xDS Client configured with the provided config. func New(config Config) (*XDSClient, error) { - panic("unimplemented") + switch { + case config.Node.ID == "": + return nil, errors.New("xdsclient: node ID is empty") + case config.ResourceTypes == nil: + return nil, errors.New("xdsclient: resource types map is nil") + case config.TransportBuilder == nil: + return nil, errors.New("xdsclient: transport builder is nil") + case config.Authorities == nil && config.Servers == nil: + return nil, errors.New("xdsclient: no servers or authorities specified") + } + + client, err := newClient(&config, defaultWatchExpiryTimeout, defaultExponentialBackoff, name) + if err != nil { + return nil, err + } + return client, nil } -// WatchResource starts watching the specified resource. -// -// typeURL specifies the resource type implementation to use. The watch fails -// if there is no resource type implementation for the given typeURL. See the -// ResourceTypes field in the Config struct used to create the XDSClient. -// -// The returned function cancels the watch and prevents future calls to the -// watcher. -func (c *XDSClient) WatchResource(typeURL, name string, watcher ResourceWatcher) (cancel func()) { - panic("unimplemented") +// SetWatchExpiryTimeoutForTesting override the default watch expiry timeout +// with provided timeout value. +func (c *XDSClient) SetWatchExpiryTimeoutForTesting(watchExpiryTimeout time.Duration) { + c.watchExpiryTimeout = watchExpiryTimeout } -// Close closes the xDS client. -func (c *XDSClient) Close() error { - panic("unimplemented") +// newClient returns a new XDSClient with the given config. +func newClient(config *Config, watchExpiryTimeout time.Duration, streamBackoff func(int) time.Duration, target string) (*XDSClient, error) { + ctx, cancel := context.WithCancel(context.Background()) + c := &XDSClient{ + target: target, + done: syncutil.NewEvent(), + authorities: make(map[string]*authority), + config: config, + watchExpiryTimeout: watchExpiryTimeout, + backoff: streamBackoff, + serializer: syncutil.NewCallbackSerializer(ctx), + serializerClose: cancel, + transportBuilder: config.TransportBuilder, + resourceTypes: config.ResourceTypes, + xdsActiveChannels: make(map[ServerConfig]*channelState), + } + + for name, cfg := range config.Authorities { + // If server configs are specified in the authorities map, use that. + // Else, use the top-level server configs. + serverCfg := config.Servers + if len(cfg.XDSServers) >= 1 { + serverCfg = cfg.XDSServers + } + c.authorities[name] = newAuthority(authorityBuildOptions{ + serverConfigs: serverCfg, + name: name, + serializer: c.serializer, + getChannelForADS: c.getChannelForADS, + logPrefix: clientPrefix(c), + target: target, + }) + } + c.topLevelAuthority = newAuthority(authorityBuildOptions{ + serverConfigs: config.Servers, + name: "", + serializer: c.serializer, + getChannelForADS: c.getChannelForADS, + logPrefix: clientPrefix(c), + target: target, + }) + c.logger = prefixLogger(c) + return c, nil +} + +// Close closes the xDS client and releases all resources. +func (c *XDSClient) Close() { + if c.done.HasFired() { + return + } + c.done.Fire() + + c.topLevelAuthority.close() + for _, a := range c.authorities { + a.close() + } + + // Channel close cannot be invoked with the lock held, because it can race + // with stream failure happening at the same time. The latter will callback + // into the XDSClient and will attempt to grab the lock. This will result + // in a deadlock. So instead, we release the lock and wait for all active + // channels to be closed. + var channelsToClose []*xdsChannel + c.channelsMu.Lock() + for _, cs := range c.xdsActiveChannels { + channelsToClose = append(channelsToClose, cs.channel) + } + c.xdsActiveChannels = nil + c.channelsMu.Unlock() + for _, c := range channelsToClose { + c.close() + } + + c.serializerClose() + <-c.serializer.Done() + + c.logger.Infof("Shutdown") +} + +// getChannelForADS returns an xdsChannel for the given server configuration. +// +// If an xdsChannel exists for the given server configuration, it is returned. +// Else a new one is created. It also ensures that the calling authority is +// added to the set of interested authorities for the returned channel. +// +// It returns the xdsChannel and a function to release the calling authority's +// reference on the channel. The caller must call the cancel function when it is +// no longer interested in this channel. +// +// A non-nil error is returned if an xdsChannel was not created. +func (c *XDSClient) getChannelForADS(serverConfig *ServerConfig, callingAuthority *authority) (*xdsChannel, func(), error) { + if c.done.HasFired() { + return nil, nil, ErrClientClosed + } + + initLocked := func(s *channelState) { + if c.logger.V(2) { + c.logger.Infof("Adding authority %q to the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) + } + s.interestedAuthorities[callingAuthority] = true + } + deInitLocked := func(s *channelState) { + if c.logger.V(2) { + c.logger.Infof("Removing authority %q from the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) + } + delete(s.interestedAuthorities, callingAuthority) + } + + return c.getOrCreateChannel(serverConfig, initLocked, deInitLocked) +} + +// getOrCreateChannel returns an xdsChannel for the given server configuration. +// +// If an active xdsChannel exists for the given server configuration, it is +// returned. If an idle xdsChannel exists for the given server configuration, it +// is revived from the idle cache and returned. Else a new one is created. +// +// The initLocked function runs some initialization logic before the channel is +// returned. This includes adding the calling authority to the set of interested +// authorities for the channel or incrementing the count of the number of LRS +// calls on the channel. +// +// The deInitLocked function runs some cleanup logic when the returned cleanup +// function is called. This involves removing the calling authority from the set +// of interested authorities for the channel or decrementing the count of the +// number of LRS calls on the channel. +// +// Both initLocked and deInitLocked are called with the c.channelsMu held. +// +// Returns the xdsChannel and a cleanup function to be invoked when the channel +// is no longer required. A non-nil error is returned if an xdsChannel was not +// created. +func (c *XDSClient) getOrCreateChannel(serverConfig *ServerConfig, initLocked, deInitLocked func(*channelState)) (*xdsChannel, func(), error) { + c.channelsMu.Lock() + defer c.channelsMu.Unlock() + + if c.logger.V(2) { + c.logger.Infof("Received request for a reference to an xdsChannel for server config %q", serverConfig) + } + + // Use an existing channel, if one exists for this server config. + if st, ok := c.xdsActiveChannels[*serverConfig]; ok { + if c.logger.V(2) { + c.logger.Infof("Reusing an existing xdsChannel for server config %q", serverConfig) + } + initLocked(st) + return st.channel, c.releaseChannel(serverConfig, st, deInitLocked), nil + } + + if c.logger.V(2) { + c.logger.Infof("Creating a new xdsChannel for server config %q", serverConfig) + } + + // Create a new transport and create a new xdsChannel, and add it to the + // map of xdsChannels. + tr, err := c.transportBuilder.Build(serverConfig.ServerIdentifier) + if err != nil { + return nil, func() {}, fmt.Errorf("xds: failed to create transport for server config %v: %v", serverConfig, err) + } + state := &channelState{ + parent: c, + serverConfig: serverConfig, + interestedAuthorities: make(map[*authority]bool), + } + channel, err := newXDSChannel(xdsChannelOpts{ + transport: tr, + serverConfig: serverConfig, + clientConfig: c.config, + eventHandler: state, + backoff: c.backoff, + watchExpiryTimeout: c.watchExpiryTimeout, + logPrefix: clientPrefix(c), + }) + if err != nil { + return nil, func() {}, fmt.Errorf("xds: failed to create a new channel for server config %v: %v", serverConfig, err) + } + state.channel = channel + c.xdsActiveChannels[*serverConfig] = state + initLocked(state) + return state.channel, c.releaseChannel(serverConfig, state, deInitLocked), nil +} + +// releaseChannel is a function that is called when a reference to an xdsChannel +// needs to be released. It handles closing channels with no active references. +// +// The function takes the following parameters: +// - serverConfig: the server configuration for the xdsChannel +// - state: the state of the xdsChannel +// - deInitLocked: a function that performs any necessary cleanup for the xdsChannel +// +// The function returns another function that can be called to release the +// reference to the xdsChannel. This returned function is idempotent, meaning +// it can be called multiple times without any additional effect. +func (c *XDSClient) releaseChannel(serverConfig *ServerConfig, state *channelState, deInitLocked func(*channelState)) func() { + return sync.OnceFunc(func() { + c.channelsMu.Lock() + + if c.logger.V(2) { + c.logger.Infof("Received request to release a reference to an xdsChannel for server config %q", serverConfig) + } + deInitLocked(state) + + // The channel has active users. Do nothing and return. + if len(state.interestedAuthorities) != 0 { + if c.logger.V(2) { + c.logger.Infof("xdsChannel %p has other active references", state.channel) + } + c.channelsMu.Unlock() + return + } + + delete(c.xdsActiveChannels, *serverConfig) + if c.logger.V(2) { + c.logger.Infof("Closing xdsChannel [%p] for server config %s", state.channel, serverConfig) + } + channelToClose := state.channel + c.channelsMu.Unlock() + + channelToClose.close() + }) } // DumpResources returns the status and contents of all xDS resources being // watched by the xDS client. -func (c *XDSClient) DumpResources() []byte { - panic("unimplemented") +func (c *XDSClient) DumpResources() ([]byte, error) { + retCfg := c.topLevelAuthority.dumpResources() + for _, a := range c.authorities { + retCfg = append(retCfg, a.dumpResources()...) + } + + nodeProto := clientsinternal.NodeProto(c.config.Node) + nodeProto.ClientFeatures = []string{clientFeatureNoOverprovisioning, clientFeatureResourceWrapper} + resp := &v3statuspb.ClientStatusResponse{} + resp.Config = append(resp.Config, &v3statuspb.ClientConfig{ + Node: nodeProto, + GenericXdsConfigs: retCfg, + }) + return proto.Marshal(resp) +} + +// channelState represents the state of an xDS channel. It tracks the number of +// LRS references, the authorities interested in the channel, and the server +// configuration used for the channel. +// +// It receives callbacks for events on the underlying ADS stream and invokes +// corresponding callbacks on interested authorities. +type channelState struct { + parent *XDSClient + serverConfig *ServerConfig + + // Access to the following fields should be protected by the parent's + // channelsMu. + channel *xdsChannel + interestedAuthorities map[*authority]bool +} + +func (cs *channelState) adsStreamFailure(err error) { + if cs.parent.done.HasFired() { + return + } + + cs.parent.channelsMu.Lock() + defer cs.parent.channelsMu.Unlock() + for authority := range cs.interestedAuthorities { + authority.adsStreamFailure(cs.serverConfig, err) + } +} + +func (cs *channelState) adsResourceUpdate(typ ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { + if cs.parent.done.HasFired() { + return + } + + cs.parent.channelsMu.Lock() + defer cs.parent.channelsMu.Unlock() + + if len(cs.interestedAuthorities) == 0 { + onDone() + return + } + + authorityCnt := new(atomic.Int64) + authorityCnt.Add(int64(len(cs.interestedAuthorities))) + done := func() { + if authorityCnt.Add(-1) == 0 { + onDone() + } + } + for authority := range cs.interestedAuthorities { + authority.adsResourceUpdate(cs.serverConfig, typ, updates, md, done) + } +} + +func (cs *channelState) adsResourceDoesNotExist(typ ResourceType, resourceName string) { + if cs.parent.done.HasFired() { + return + } + + cs.parent.channelsMu.Lock() + defer cs.parent.channelsMu.Unlock() + for authority := range cs.interestedAuthorities { + authority.adsResourceDoesNotExist(typ, resourceName) + } } diff --git a/xds/internal/clients/xdsclient/xdsclient_test.go b/xds/internal/clients/xdsclient/xdsclient_test.go new file mode 100644 index 000000000..6c5c7b59a --- /dev/null +++ b/xds/internal/clients/xdsclient/xdsclient_test.go @@ -0,0 +1,124 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient + +import ( + "strings" + "testing" + + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" +) + +func (s) TestXDSClient_New(t *testing.T) { + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + + tests := []struct { + name string + config Config + wantErr string + }{ + { + name: "empty node ID", + config: Config{}, + wantErr: "node ID is empty", + }, + { + name: "nil resource types", + config: Config{ + Node: clients.Node{ID: "node-id"}, + }, + wantErr: "resource types map is nil", + }, + { + name: "nil transport builder", + config: Config{ + Node: clients.Node{ID: "node-id"}, + ResourceTypes: map[string]ResourceType{xdsresource.V3ListenerURL: listenerType}, + }, + wantErr: "transport builder is nil", + }, + { + name: "no servers or authorities", + config: Config{ + Node: clients.Node{ID: "node-id"}, + ResourceTypes: map[string]ResourceType{xdsresource.V3ListenerURL: listenerType}, + TransportBuilder: grpctransport.NewBuilder(credentials), + }, + wantErr: "no servers or authorities specified", + }, + { + name: "success with servers", + config: Config{ + Node: clients.Node{ID: "node-id"}, + ResourceTypes: map[string]ResourceType{xdsresource.V3ListenerURL: listenerType}, + TransportBuilder: grpctransport.NewBuilder(credentials), + Servers: []ServerConfig{{ServerIdentifier: clients.ServerIdentifier{ServerURI: "dummy-server"}}}, + }, + wantErr: "", + }, + { + name: "success with authorities", + config: Config{ + Node: clients.Node{ID: "node-id"}, + ResourceTypes: map[string]ResourceType{xdsresource.V3ListenerURL: listenerType}, + TransportBuilder: grpctransport.NewBuilder(credentials), + Authorities: map[string]Authority{"authority-name": {XDSServers: []ServerConfig{{ServerIdentifier: clients.ServerIdentifier{ServerURI: "dummy-server"}}}}}, + }, + wantErr: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := New(tt.config) + if tt.wantErr == "" { + if err != nil { + t.Fatalf("New(%+v) failed: %v", tt.config, err) + } + } else { + if err == nil || !strings.Contains(err.Error(), tt.wantErr) { + t.Fatalf("New(%+v) returned error %v, want error %q", tt.config, err, tt.wantErr) + } + } + if c != nil { + c.Close() + } + }) + } +} + +func (s) TestXDSClient_Close(t *testing.T) { + credentials := map[string]credentials.Bundle{"insecure": insecure.NewBundle()} + config := Config{ + Node: clients.Node{ID: "node-id"}, + ResourceTypes: map[string]ResourceType{xdsresource.V3ListenerURL: listenerType}, + TransportBuilder: grpctransport.NewBuilder(credentials), + Servers: []ServerConfig{{ServerIdentifier: clients.ServerIdentifier{ServerURI: "dummy-server"}}}, + } + c, err := New(config) + if err != nil { + t.Fatalf("New(%+v) failed: %v", config, err) + } + c.Close() + // Calling close again should not panic. + c.Close() +} diff --git a/xds/internal/clients/xdsclient/xdsconfig.go b/xds/internal/clients/xdsclient/xdsconfig.go index bfbe41679..4c7d090df 100644 --- a/xds/internal/clients/xdsclient/xdsconfig.go +++ b/xds/internal/clients/xdsclient/xdsconfig.go @@ -83,3 +83,7 @@ type Authority struct { // See Config.Servers for more details. XDSServers []ServerConfig } + +func isServerConfigEqual(a, b *ServerConfig) bool { + return a.ServerIdentifier == b.ServerIdentifier && a.IgnoreResourceDeletion == b.IgnoreResourceDeletion +}