diff --git a/xds/src/generated/thirdparty/grpc/io/envoyproxy/envoy/service/rate_limit_quota/v3/RateLimitQuotaServiceGrpc.java b/xds/src/generated/thirdparty/grpc/io/envoyproxy/envoy/service/rate_limit_quota/v3/RateLimitQuotaServiceGrpc.java
new file mode 100644
index 0000000000..2cbb7536d4
--- /dev/null
+++ b/xds/src/generated/thirdparty/grpc/io/envoyproxy/envoy/service/rate_limit_quota/v3/RateLimitQuotaServiceGrpc.java
@@ -0,0 +1,303 @@
+package io.envoyproxy.envoy.service.rate_limit_quota.v3;
+
+import static io.grpc.MethodDescriptor.generateFullMethodName;
+
+/**
+ * <pre>
+ * Defines the Rate Limit Quota Service (RLQS).
+ * </pre>
+ */
+@javax.annotation.Generated(
+    value = "by gRPC proto compiler",
+    comments = "Source: envoy/service/rate_limit_quota/v3/rlqs.proto")
+@io.grpc.stub.annotations.GrpcGenerated
+public final class RateLimitQuotaServiceGrpc {
+
+  private RateLimitQuotaServiceGrpc() {}
+
+  public static final java.lang.String SERVICE_NAME = "envoy.service.rate_limit_quota.v3.RateLimitQuotaService";
+
+  // Static method descriptors that strictly reflect the proto.
+  private static volatile io.grpc.MethodDescriptor<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports,
+      io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse> getStreamRateLimitQuotasMethod;
+
+  @io.grpc.stub.annotations.RpcMethod(
+      fullMethodName = SERVICE_NAME + '/' + "StreamRateLimitQuotas",
+      requestType = io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports.class,
+      responseType = io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse.class,
+      methodType = io.grpc.MethodDescriptor.MethodType.BIDI_STREAMING)
+  public static io.grpc.MethodDescriptor<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports,
+      io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse> getStreamRateLimitQuotasMethod() {
+    io.grpc.MethodDescriptor<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports, io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse> getStreamRateLimitQuotasMethod;
+    if ((getStreamRateLimitQuotasMethod = RateLimitQuotaServiceGrpc.getStreamRateLimitQuotasMethod) == null) {
+      synchronized (RateLimitQuotaServiceGrpc.class) {
+        if ((getStreamRateLimitQuotasMethod = RateLimitQuotaServiceGrpc.getStreamRateLimitQuotasMethod) == null) {
+          RateLimitQuotaServiceGrpc.getStreamRateLimitQuotasMethod = getStreamRateLimitQuotasMethod =
+              io.grpc.MethodDescriptor.<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports, io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse>newBuilder()
+              .setType(io.grpc.MethodDescriptor.MethodType.BIDI_STREAMING)
+              .setFullMethodName(generateFullMethodName(SERVICE_NAME, "StreamRateLimitQuotas"))
+              .setSampledToLocalTracing(true)
+              .setRequestMarshaller(io.grpc.protobuf.ProtoUtils.marshaller(
+                  io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports.getDefaultInstance()))
+              .setResponseMarshaller(io.grpc.protobuf.ProtoUtils.marshaller(
+                  io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse.getDefaultInstance()))
+              .setSchemaDescriptor(new RateLimitQuotaServiceMethodDescriptorSupplier("StreamRateLimitQuotas"))
+              .build();
+        }
+      }
+    }
+    return getStreamRateLimitQuotasMethod;
+  }
+
+  /**
+   * Creates a new async stub that supports all call types for the service
+   */
+  public static RateLimitQuotaServiceStub newStub(io.grpc.Channel channel) {
+    io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceStub> factory =
+      new io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceStub>() {
+        @java.lang.Override
+        public RateLimitQuotaServiceStub newStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+          return new RateLimitQuotaServiceStub(channel, callOptions);
+        }
+      };
+    return RateLimitQuotaServiceStub.newStub(factory, channel);
+  }
+
+  /**
+   * Creates a new blocking-style stub that supports unary and streaming output calls on the service
+   */
+  public static RateLimitQuotaServiceBlockingStub newBlockingStub(
+      io.grpc.Channel channel) {
+    io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceBlockingStub> factory =
+      new io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceBlockingStub>() {
+        @java.lang.Override
+        public RateLimitQuotaServiceBlockingStub newStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+          return new RateLimitQuotaServiceBlockingStub(channel, callOptions);
+        }
+      };
+    return RateLimitQuotaServiceBlockingStub.newStub(factory, channel);
+  }
+
+  /**
+   * Creates a new ListenableFuture-style stub that supports unary calls on the service
+   */
+  public static RateLimitQuotaServiceFutureStub newFutureStub(
+      io.grpc.Channel channel) {
+    io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceFutureStub> factory =
+      new io.grpc.stub.AbstractStub.StubFactory<RateLimitQuotaServiceFutureStub>() {
+        @java.lang.Override
+        public RateLimitQuotaServiceFutureStub newStub(io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+          return new RateLimitQuotaServiceFutureStub(channel, callOptions);
+        }
+      };
+    return RateLimitQuotaServiceFutureStub.newStub(factory, channel);
+  }
+
+  /**
+   * <pre>
+   * Defines the Rate Limit Quota Service (RLQS).
+   * </pre>
+   */
+  public interface AsyncService {
+
+    /**
+     * <pre>
+     * Main communication channel: the data plane sends usage reports to the RLQS server,
+     * and the server asynchronously responding with the assignments.
+     * </pre>
+     */
+    default io.grpc.stub.StreamObserver<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports> streamRateLimitQuotas(
+        io.grpc.stub.StreamObserver<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse> responseObserver) {
+      return io.grpc.stub.ServerCalls.asyncUnimplementedStreamingCall(getStreamRateLimitQuotasMethod(), responseObserver);
+    }
+  }
+
+  /**
+   * Base class for the server implementation of the service RateLimitQuotaService.
+   * <pre>
+   * Defines the Rate Limit Quota Service (RLQS).
+   * </pre>
+   */
+  public static abstract class RateLimitQuotaServiceImplBase
+      implements io.grpc.BindableService, AsyncService {
+
+    @java.lang.Override public final io.grpc.ServerServiceDefinition bindService() {
+      return RateLimitQuotaServiceGrpc.bindService(this);
+    }
+  }
+
+  /**
+   * A stub to allow clients to do asynchronous rpc calls to service RateLimitQuotaService.
+   * <pre>
+   * Defines the Rate Limit Quota Service (RLQS).
+   * </pre>
+   */
+  public static final class RateLimitQuotaServiceStub
+      extends io.grpc.stub.AbstractAsyncStub<RateLimitQuotaServiceStub> {
+    private RateLimitQuotaServiceStub(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      super(channel, callOptions);
+    }
+
+    @java.lang.Override
+    protected RateLimitQuotaServiceStub build(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      return new RateLimitQuotaServiceStub(channel, callOptions);
+    }
+
+    /**
+     * <pre>
+     * Main communication channel: the data plane sends usage reports to the RLQS server,
+     * and the server asynchronously responding with the assignments.
+     * </pre>
+     */
+    public io.grpc.stub.StreamObserver<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports> streamRateLimitQuotas(
+        io.grpc.stub.StreamObserver<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse> responseObserver) {
+      return io.grpc.stub.ClientCalls.asyncBidiStreamingCall(
+          getChannel().newCall(getStreamRateLimitQuotasMethod(), getCallOptions()), responseObserver);
+    }
+  }
+
+  /**
+   * A stub to allow clients to do synchronous rpc calls to service RateLimitQuotaService.
+   * <pre>
+   * Defines the Rate Limit Quota Service (RLQS).
+   * </pre>
+   */
+  public static final class RateLimitQuotaServiceBlockingStub
+      extends io.grpc.stub.AbstractBlockingStub<RateLimitQuotaServiceBlockingStub> {
+    private RateLimitQuotaServiceBlockingStub(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      super(channel, callOptions);
+    }
+
+    @java.lang.Override
+    protected RateLimitQuotaServiceBlockingStub build(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      return new RateLimitQuotaServiceBlockingStub(channel, callOptions);
+    }
+  }
+
+  /**
+   * A stub to allow clients to do ListenableFuture-style rpc calls to service RateLimitQuotaService.
+   * <pre>
+   * Defines the Rate Limit Quota Service (RLQS).
+   * </pre>
+   */
+  public static final class RateLimitQuotaServiceFutureStub
+      extends io.grpc.stub.AbstractFutureStub<RateLimitQuotaServiceFutureStub> {
+    private RateLimitQuotaServiceFutureStub(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      super(channel, callOptions);
+    }
+
+    @java.lang.Override
+    protected RateLimitQuotaServiceFutureStub build(
+        io.grpc.Channel channel, io.grpc.CallOptions callOptions) {
+      return new RateLimitQuotaServiceFutureStub(channel, callOptions);
+    }
+  }
+
+  private static final int METHODID_STREAM_RATE_LIMIT_QUOTAS = 0;
+
+  private static final class MethodHandlers<Req, Resp> implements
+      io.grpc.stub.ServerCalls.UnaryMethod<Req, Resp>,
+      io.grpc.stub.ServerCalls.ServerStreamingMethod<Req, Resp>,
+      io.grpc.stub.ServerCalls.ClientStreamingMethod<Req, Resp>,
+      io.grpc.stub.ServerCalls.BidiStreamingMethod<Req, Resp> {
+    private final AsyncService serviceImpl;
+    private final int methodId;
+
+    MethodHandlers(AsyncService serviceImpl, int methodId) {
+      this.serviceImpl = serviceImpl;
+      this.methodId = methodId;
+    }
+
+    @java.lang.Override
+    @java.lang.SuppressWarnings("unchecked")
+    public void invoke(Req request, io.grpc.stub.StreamObserver<Resp> responseObserver) {
+      switch (methodId) {
+        default:
+          throw new AssertionError();
+      }
+    }
+
+    @java.lang.Override
+    @java.lang.SuppressWarnings("unchecked")
+    public io.grpc.stub.StreamObserver<Req> invoke(
+        io.grpc.stub.StreamObserver<Resp> responseObserver) {
+      switch (methodId) {
+        case METHODID_STREAM_RATE_LIMIT_QUOTAS:
+          return (io.grpc.stub.StreamObserver<Req>) serviceImpl.streamRateLimitQuotas(
+              (io.grpc.stub.StreamObserver<io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse>) responseObserver);
+        default:
+          throw new AssertionError();
+      }
+    }
+  }
+
+  public static final io.grpc.ServerServiceDefinition bindService(AsyncService service) {
+    return io.grpc.ServerServiceDefinition.builder(getServiceDescriptor())
+        .addMethod(
+          getStreamRateLimitQuotasMethod(),
+          io.grpc.stub.ServerCalls.asyncBidiStreamingCall(
+            new MethodHandlers<
+              io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaUsageReports,
+              io.envoyproxy.envoy.service.rate_limit_quota.v3.RateLimitQuotaResponse>(
+                service, METHODID_STREAM_RATE_LIMIT_QUOTAS)))
+        .build();
+  }
+
+  private static abstract class RateLimitQuotaServiceBaseDescriptorSupplier
+      implements io.grpc.protobuf.ProtoFileDescriptorSupplier, io.grpc.protobuf.ProtoServiceDescriptorSupplier {
+    RateLimitQuotaServiceBaseDescriptorSupplier() {}
+
+    @java.lang.Override
+    public com.google.protobuf.Descriptors.FileDescriptor getFileDescriptor() {
+      return io.envoyproxy.envoy.service.rate_limit_quota.v3.RlqsProto.getDescriptor();
+    }
+
+    @java.lang.Override
+    public com.google.protobuf.Descriptors.ServiceDescriptor getServiceDescriptor() {
+      return getFileDescriptor().findServiceByName("RateLimitQuotaService");
+    }
+  }
+
+  private static final class RateLimitQuotaServiceFileDescriptorSupplier
+      extends RateLimitQuotaServiceBaseDescriptorSupplier {
+    RateLimitQuotaServiceFileDescriptorSupplier() {}
+  }
+
+  private static final class RateLimitQuotaServiceMethodDescriptorSupplier
+      extends RateLimitQuotaServiceBaseDescriptorSupplier
+      implements io.grpc.protobuf.ProtoMethodDescriptorSupplier {
+    private final java.lang.String methodName;
+
+    RateLimitQuotaServiceMethodDescriptorSupplier(java.lang.String methodName) {
+      this.methodName = methodName;
+    }
+
+    @java.lang.Override
+    public com.google.protobuf.Descriptors.MethodDescriptor getMethodDescriptor() {
+      return getServiceDescriptor().findMethodByName(methodName);
+    }
+  }
+
+  private static volatile io.grpc.ServiceDescriptor serviceDescriptor;
+
+  public static io.grpc.ServiceDescriptor getServiceDescriptor() {
+    io.grpc.ServiceDescriptor result = serviceDescriptor;
+    if (result == null) {
+      synchronized (RateLimitQuotaServiceGrpc.class) {
+        result = serviceDescriptor;
+        if (result == null) {
+          serviceDescriptor = result = io.grpc.ServiceDescriptor.newBuilder(SERVICE_NAME)
+              .setSchemaDescriptor(new RateLimitQuotaServiceFileDescriptorSupplier())
+              .addMethod(getStreamRateLimitQuotasMethod())
+              .build();
+        }
+      }
+    }
+    return result;
+  }
+}
diff --git a/xds/third_party/envoy/import.sh b/xds/third_party/envoy/import.sh
index 3eeb46cf66..b5b59b5fb4 100755
--- a/xds/third_party/envoy/import.sh
+++ b/xds/third_party/envoy/import.sh
@@ -91,6 +91,7 @@ envoy/extensions/transport_sockets/tls/v3/tls.proto
 envoy/service/discovery/v3/ads.proto
 envoy/service/discovery/v3/discovery.proto
 envoy/service/load_stats/v3/lrs.proto
+envoy/service/rate_limit_quota/v3/rlqs.proto
 envoy/service/status/v3/csds.proto
 envoy/type/http/v3/path_transformation.proto
 envoy/type/matcher/v3/filter_state.proto
@@ -107,7 +108,10 @@ envoy/type/tracing/v3/custom_tag.proto
 envoy/type/v3/http.proto
 envoy/type/v3/percent.proto
 envoy/type/v3/range.proto
+envoy/type/v3/ratelimit_strategy.proto
+envoy/type/v3/ratelimit_unit.proto
 envoy/type/v3/semantic_version.proto
+envoy/type/v3/token_bucket.proto
 )
 
 pushd "$(git rev-parse --show-toplevel)/xds/third_party/envoy" > /dev/null
diff --git a/xds/third_party/envoy/src/main/proto/envoy/service/rate_limit_quota/v3/rlqs.proto b/xds/third_party/envoy/src/main/proto/envoy/service/rate_limit_quota/v3/rlqs.proto
new file mode 100644
index 0000000000..b8fa2cd898
--- /dev/null
+++ b/xds/third_party/envoy/src/main/proto/envoy/service/rate_limit_quota/v3/rlqs.proto
@@ -0,0 +1,258 @@
+syntax = "proto3";
+
+package envoy.service.rate_limit_quota.v3;
+
+import "envoy/type/v3/ratelimit_strategy.proto";
+
+import "google/protobuf/duration.proto";
+
+import "xds/annotations/v3/status.proto";
+
+import "udpa/annotations/status.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.service.rate_limit_quota.v3";
+option java_outer_classname = "RlqsProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/service/rate_limit_quota/v3;rate_limit_quotav3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+option (xds.annotations.v3.file_status).work_in_progress = true;
+
+// [#protodoc-title: Rate Limit Quota Service (RLQS)]
+
+// The Rate Limit Quota Service (RLQS) is a Envoy global rate limiting service that allows to
+// delegate rate limit decisions to a remote service. The service will aggregate the usage reports
+// from multiple data plane instances, and distribute Rate Limit Assignments to each instance
+// based on its business logic. The logic is outside of the scope of the protocol API.
+//
+// The protocol is designed as a streaming-first API. It utilizes watch-like subscription model.
+// The data plane groups requests into Quota Buckets as directed by the filter config,
+// and periodically reports them to the RLQS server along with the Bucket identifier, :ref:`BucketId
+// <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>`. Once RLQS server has collected enough
+// reports to make a decision, it'll send back the assignment with the rate limiting instructions.
+//
+// The first report sent by the data plane is interpreted by the RLQS server as a "watch" request,
+// indicating that the data plane instance is interested in receiving further updates for the
+// ``BucketId``. From then on, RLQS server may push assignments to this instance at will, even if
+// the instance is not sending usage reports. It's the responsibility of the RLQS server
+// to determine when the data plane instance didn't send ``BucketId`` reports for too long,
+// and to respond with the :ref:`AbandonAction
+// <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`,
+// indicating that the server has now stopped sending quota assignments for the ``BucketId`` bucket,
+// and the data plane instance should :ref:`abandon
+// <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.abandon_action>`
+// it.
+//
+// If for any reason the RLQS client doesn't receive the initial assignment for the reported bucket,
+// in order to prevent memory exhaustion, the data plane will limit the time such bucket
+// is retained. The exact time to wait for the initial assignment is chosen by the filter,
+// and may vary based on the implementation.
+// Once the duration ends, the data plane will stop reporting bucket usage, reject any enqueued
+// requests, and purge the bucket from the memory. Subsequent requests matched into the bucket
+// will re-initialize the bucket in the "no assignment" state, restarting the reports.
+//
+// Refer to Rate Limit Quota :ref:`configuration overview <config_http_filters_rate_limit_quota>`
+// for further details.
+
+// Defines the Rate Limit Quota Service (RLQS).
+service RateLimitQuotaService {
+  // Main communication channel: the data plane sends usage reports to the RLQS server,
+  // and the server asynchronously responding with the assignments.
+  rpc StreamRateLimitQuotas(stream RateLimitQuotaUsageReports)
+      returns (stream RateLimitQuotaResponse) {
+  }
+}
+
+message RateLimitQuotaUsageReports {
+  // The usage report for a bucket.
+  //
+  // .. note::
+  //   Note that the first report sent for a ``BucketId`` indicates to the RLQS server that
+  //   the RLQS client is subscribing for the future assignments for this ``BucketId``.
+  message BucketQuotaUsage {
+    // ``BucketId`` for which request quota usage is reported.
+    BucketId bucket_id = 1 [(validate.rules).message = {required: true}];
+
+    // Time elapsed since the last report.
+    google.protobuf.Duration time_elapsed = 2 [(validate.rules).duration = {
+      required: true
+      gt {}
+    }];
+
+    // Requests the data plane has allowed through.
+    uint64 num_requests_allowed = 3;
+
+    // Requests throttled.
+    uint64 num_requests_denied = 4;
+  }
+
+  // All quota requests must specify the domain. This enables sharing the quota
+  // server between different applications without fear of overlap.
+  // E.g., "envoy".
+  //
+  // Should only be provided in the first report, all subsequent messages on the same
+  // stream are considered to be in the same domain. In case the domain needs to be
+  // changes, close the stream, and reopen a new one with the different domain.
+  string domain = 1 [(validate.rules).string = {min_len: 1}];
+
+  // A list of quota usage reports. The list is processed by the RLQS server in the same order
+  // it's provided by the client.
+  repeated BucketQuotaUsage bucket_quota_usages = 2 [(validate.rules).repeated = {min_items: 1}];
+}
+
+message RateLimitQuotaResponse {
+  // Commands the data plane to apply one of the actions to the bucket with the
+  // :ref:`bucket_id <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+  message BucketAction {
+    // Quota assignment for the bucket. Configures the rate limiting strategy and the duration
+    // for the given :ref:`bucket_id
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+    //
+    // **Applying the first assignment to the bucket**
+    //
+    // Once the data plane receives the ``QuotaAssignmentAction``, it must send the current usage
+    // report for the bucket, and start rate limiting requests matched into the bucket
+    // using the strategy configured in the :ref:`rate_limit_strategy
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.rate_limit_strategy>`
+    // field. The assignment becomes bucket's ``active`` assignment.
+    //
+    // **Expiring the assignment**
+    //
+    // The duration of the assignment defined in the :ref:`assignment_time_to_live
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.assignment_time_to_live>`
+    // field. When the duration runs off, the assignment is ``expired``, and no longer ``active``.
+    // The data plane should stop applying the rate limiting strategy to the bucket, and transition
+    // the bucket to the "expired assignment" state. This activates the behavior configured in the
+    // :ref:`expired_assignment_behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.expired_assignment_behavior>`
+    // field.
+    //
+    // **Replacing the assignment**
+    //
+    // * If the rate limiting strategy is different from bucket's ``active`` assignment, or
+    //   the current bucket assignment is ``expired``, the data plane must immediately
+    //   end the current assignment, report the bucket usage, and apply the new assignment.
+    //   The new assignment becomes bucket's ``active`` assignment.
+    // * If the rate limiting strategy is the same as the bucket's ``active`` (not ``expired``)
+    //   assignment, the data plane should extend the duration of the ``active`` assignment
+    //   for the duration of the new assignment provided in the :ref:`assignment_time_to_live
+    //   <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.assignment_time_to_live>`
+    //   field. The ``active`` assignment is considered unchanged.
+    message QuotaAssignmentAction {
+      // A duration after which the assignment is be considered ``expired``. The process of the
+      // expiration is described :ref:`above
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`.
+      //
+      // * If unset, the assignment has no expiration date.
+      // * If set to ``0``, the assignment expires immediately, forcing the client into the
+      //   :ref:`"expired assignment"
+      //   <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior.expired_assignment_behavior_timeout>`
+      //   state. This may be used by the RLQS server in cases when it needs clients to proactively
+      //   fall back to the pre-configured :ref:`ExpiredAssignmentBehavior
+      //   <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior>`,
+      //   f.e. before the server going into restart.
+      //
+      // .. attention::
+      //   Note that :ref:`expiring
+      //   <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`
+      //   the assignment is not the same as :ref:`abandoning
+      //   <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+      //   the assignment. While expiring the assignment just transitions the bucket to
+      //   the "expired assignment" state; abandoning the assignment completely erases
+      //   the bucket from the data plane memory, and stops the usage reports.
+      google.protobuf.Duration assignment_time_to_live = 2 [(validate.rules).duration = {gte {}}];
+
+      // Configures the local rate limiter for the request matched to the bucket.
+      // If not set, allow all requests.
+      type.v3.RateLimitStrategy rate_limit_strategy = 3;
+    }
+
+    // Abandon action for the bucket. Indicates that the RLQS server will no longer be
+    // sending updates for the given :ref:`bucket_id
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+    //
+    // If no requests are reported for a bucket, after some time the server considers the bucket
+    // inactive. The server stops tracking the bucket, and instructs the the data plane to abandon
+    // the bucket via this message.
+    //
+    // **Abandoning the assignment**
+    //
+    // The data plane is to erase the bucket (including its usage data) from the memory.
+    // It should stop tracking the bucket, and stop reporting its usage. This effectively resets
+    // the data plane to the state prior to matching the first request into the bucket.
+    //
+    // **Restarting the subscription**
+    //
+    // If a new request is matched into a bucket previously abandoned, the data plane must behave
+    // as if it has never tracked the bucket, and it's the first request matched into it:
+    //
+    // 1. The process of :ref:`subscription and reporting
+    //    <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.reporting_interval>`
+    //    starts from the beginning.
+    //
+    // 2. The bucket transitions to the :ref:`"no assignment"
+    //    <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`
+    //    state.
+    //
+    // 3. Once the new assignment is received, it's applied per
+    //    "Applying the first assignment to the bucket" section of the :ref:`QuotaAssignmentAction
+    //    <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`.
+    message AbandonAction {
+    }
+
+    // ``BucketId`` for which request the action is applied.
+    BucketId bucket_id = 1 [(validate.rules).message = {required: true}];
+
+    oneof bucket_action {
+      option (validate.required) = true;
+
+      // Apply the quota assignment to the bucket.
+      //
+      // Commands the data plane to apply a rate limiting strategy to the bucket.
+      // The process of applying and expiring the rate limiting strategy is detailed in the
+      // :ref:`QuotaAssignmentAction
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`
+      // message.
+      QuotaAssignmentAction quota_assignment_action = 2;
+
+      // Abandon the bucket.
+      //
+      // Commands the data plane to abandon the bucket.
+      // The process of abandoning the bucket is described in the :ref:`AbandonAction
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+      // message.
+      AbandonAction abandon_action = 3;
+    }
+  }
+
+  // An ordered list of actions to be applied to the buckets. The actions are applied in the
+  // given order, from top to bottom.
+  repeated BucketAction bucket_action = 1 [(validate.rules).repeated = {min_items: 1}];
+}
+
+// The identifier for the bucket. Used to match the bucket between the control plane (RLQS server),
+// and the data plane (RLQS client), f.e.:
+//
+// * the data plane sends a usage report for requests matched into the bucket with ``BucketId``
+//   to the control plane
+// * the control plane sends an assignment for the bucket with ``BucketId`` to the data plane
+//   Bucket ID.
+//
+// Example:
+//
+// .. validated-code-block:: yaml
+//   :type-name: envoy.service.rate_limit_quota.v3.BucketId
+//
+//   bucket:
+//     name: my_bucket
+//     env: staging
+//
+// .. note::
+//   The order of ``BucketId`` keys do not matter. Buckets ``{ a: 'A', b: 'B' }`` and
+//   ``{ b: 'B', a: 'A' }`` are identical.
+message BucketId {
+  map<string, string> bucket = 1 [(validate.rules).map = {
+    min_pairs: 1
+    keys {string {min_len: 1}}
+    values {string {min_len: 1}}
+  }];
+}
diff --git a/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_strategy.proto b/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_strategy.proto
new file mode 100644
index 0000000000..a86da55b85
--- /dev/null
+++ b/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_strategy.proto
@@ -0,0 +1,79 @@
+syntax = "proto3";
+
+package envoy.type.v3;
+
+import "envoy/type/v3/ratelimit_unit.proto";
+import "envoy/type/v3/token_bucket.proto";
+
+import "xds/annotations/v3/status.proto";
+
+import "udpa/annotations/status.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.type.v3";
+option java_outer_classname = "RatelimitStrategyProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/type/v3;typev3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+option (xds.annotations.v3.file_status).work_in_progress = true;
+
+// [#protodoc-title: Rate Limit Strategies]
+
+message RateLimitStrategy {
+  // Choose between allow all and deny all.
+  enum BlanketRule {
+    ALLOW_ALL = 0;
+    DENY_ALL = 1;
+  }
+
+  // Best-effort limit of the number of requests per time unit.
+  //
+  // Allows to specify the desired requests per second (RPS, QPS), requests per minute (QPM, RPM),
+  // etc., without specifying a rate limiting algorithm implementation.
+  //
+  // ``RequestsPerTimeUnit`` strategy does not demand any specific rate limiting algorithm to be
+  // used (in contrast to the :ref:`TokenBucket <envoy_v3_api_msg_type.v3.TokenBucket>`,
+  // for example). It implies that the implementation details of rate limiting algorithm are
+  // irrelevant as long as the configured number of "requests per time unit" is achieved.
+  //
+  // Note that the ``TokenBucket`` is still a valid implementation of the ``RequestsPerTimeUnit``
+  // strategy, and may be chosen to enforce the rate limit. However, there's no guarantee it will be
+  // the ``TokenBucket`` in particular, and not the Leaky Bucket, the Sliding Window, or any other
+  // rate limiting algorithm that fulfills the requirements.
+  message RequestsPerTimeUnit {
+    // The desired number of requests per :ref:`time_unit
+    // <envoy_v3_api_field_type.v3.RateLimitStrategy.RequestsPerTimeUnit.time_unit>` to allow.
+    // If set to ``0``, deny all (equivalent to ``BlanketRule.DENY_ALL``).
+    //
+    // .. note::
+    //   Note that the algorithm implementation determines the course of action for the requests
+    //   over the limit. As long as the ``requests_per_time_unit`` converges on the desired value,
+    //   it's allowed to treat this field as a soft-limit: allow bursts, redistribute the allowance
+    //   over time, etc.
+    //
+    uint64 requests_per_time_unit = 1;
+
+    // The unit of time. Ignored when :ref:`requests_per_time_unit
+    // <envoy_v3_api_field_type.v3.RateLimitStrategy.RequestsPerTimeUnit.requests_per_time_unit>`
+    // is ``0`` (deny all).
+    RateLimitUnit time_unit = 2 [(validate.rules).enum = {defined_only: true}];
+  }
+
+  oneof strategy {
+    option (validate.required) = true;
+
+    // Allow or Deny the requests.
+    // If unset, allow all.
+    BlanketRule blanket_rule = 1 [(validate.rules).enum = {defined_only: true}];
+
+    // Best-effort limit of the number of requests per time unit, f.e. requests per second.
+    // Does not prescribe any specific rate limiting algorithm, see :ref:`RequestsPerTimeUnit
+    // <envoy_v3_api_msg_type.v3.RateLimitStrategy.RequestsPerTimeUnit>` for details.
+    RequestsPerTimeUnit requests_per_time_unit = 2;
+
+    // Limit the requests by consuming tokens from the Token Bucket.
+    // Allow the same number of requests as the number of tokens available in
+    // the token bucket.
+    TokenBucket token_bucket = 3;
+  }
+}
diff --git a/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_unit.proto b/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_unit.proto
new file mode 100644
index 0000000000..1a96497926
--- /dev/null
+++ b/xds/third_party/envoy/src/main/proto/envoy/type/v3/ratelimit_unit.proto
@@ -0,0 +1,37 @@
+syntax = "proto3";
+
+package envoy.type.v3;
+
+import "udpa/annotations/status.proto";
+
+option java_package = "io.envoyproxy.envoy.type.v3";
+option java_outer_classname = "RatelimitUnitProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/type/v3;typev3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+
+// [#protodoc-title: Ratelimit Time Unit]
+
+// Identifies the unit of of time for rate limit.
+enum RateLimitUnit {
+  // The time unit is not known.
+  UNKNOWN = 0;
+
+  // The time unit representing a second.
+  SECOND = 1;
+
+  // The time unit representing a minute.
+  MINUTE = 2;
+
+  // The time unit representing an hour.
+  HOUR = 3;
+
+  // The time unit representing a day.
+  DAY = 4;
+
+  // The time unit representing a month.
+  MONTH = 5;
+
+  // The time unit representing a year.
+  YEAR = 6;
+}
diff --git a/xds/third_party/envoy/src/main/proto/envoy/type/v3/token_bucket.proto b/xds/third_party/envoy/src/main/proto/envoy/type/v3/token_bucket.proto
new file mode 100644
index 0000000000..157a271efc
--- /dev/null
+++ b/xds/third_party/envoy/src/main/proto/envoy/type/v3/token_bucket.proto
@@ -0,0 +1,39 @@
+syntax = "proto3";
+
+package envoy.type.v3;
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/wrappers.proto";
+
+import "udpa/annotations/status.proto";
+import "udpa/annotations/versioning.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.type.v3";
+option java_outer_classname = "TokenBucketProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/type/v3;typev3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+
+// [#protodoc-title: Token bucket]
+
+// Configures a token bucket, typically used for rate limiting.
+message TokenBucket {
+  option (udpa.annotations.versioning).previous_message_type = "envoy.type.TokenBucket";
+
+  // The maximum tokens that the bucket can hold. This is also the number of tokens that the bucket
+  // initially contains.
+  uint32 max_tokens = 1 [(validate.rules).uint32 = {gt: 0}];
+
+  // The number of tokens added to the bucket during each fill interval. If not specified, defaults
+  // to a single token.
+  google.protobuf.UInt32Value tokens_per_fill = 2 [(validate.rules).uint32 = {gt: 0}];
+
+  // The fill interval that tokens are added to the bucket. During each fill interval
+  // ``tokens_per_fill`` are added to the bucket. The bucket will never contain more than
+  // ``max_tokens`` tokens.
+  google.protobuf.Duration fill_interval = 3 [(validate.rules).duration = {
+    required: true
+    gt {}
+  }];
+}