From 521d59db895e98f7dd4a417258cff6e5f6fba858 Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Wed, 20 Aug 2025 17:51:05 -0400 Subject: [PATCH] Add doc.go, example_test.go and ARCHITECTURE.md to apiserver Kubernetes-commit: e1c586eb00f185c05afb82d2292ddb6bf179709d --- ARCHITECTURE.md | 251 ++++++++++++++++++++++++++++++++++++++++++++++++ doc.go | 61 +++++++++++- example_test.go | 67 +++++++++++++ 3 files changed, 377 insertions(+), 2 deletions(-) create mode 100644 ARCHITECTURE.md create mode 100644 example_test.go diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 000000000..dce3bc5a7 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,251 @@ +# apiserver Architecture + +## 1. Server Composition + +The `kube-apiserver` binary is not one server, but a **server chain** of three distinct +`GenericAPIServer` instances: the core Kubernetes API, API extensions (CRDs), and the aggregation +layer. + +This composition is managed by a layered configuration system, starting with command-line +flags parsed by `options` structs (e.g., `RecommendedOptions` in `pkg/server/options`), which +populate a `Config` object that is then used to instantiate the `GenericAPIServer` +instances. The construction of this delegation chain can be found in the `CreateServerChain` +function in `cmd/kube-apiserver/app/server.go`. + +```mermaid +graph TD + subgraph Incoming Request + direction LR + A[User/Client] --> B{/apis/apps/v1/deployments}; + end + + subgraph kube-apiserver process + direction LR + B --> C[Aggregator Server]; + C -- Not an APIService --> D[Kube API Server]; + D -- Handles Request --> E[REST Storage]; + C -- Is an APIService --> F[Proxy to Extension API Server]; + D -- Not a Core API --> G[API Extensions Server]; + G -- Handles CRD --> E; + end +``` + +1. **Aggregator Server (`kube-aggregator`):** + * **Purpose:** Handles the `apiregistration.k8s.io` API and acts as a reverse proxy for + extension API servers. This functionality was designed to allow third-party APIs to be + "aggregated" into the main Kubernetes API server seamlessly. + * **Mechanism:** It watches `APIService` objects. When a request arrives (e.g., for + `/apis/mycompany.com/v1/myresources`), it checks if an `APIService` has "claimed" + that path. If so, it uses a `ServiceResolver` to find the IP of the backing + `Service` and proxies the request. + * **Delegation:** If no `APIService` matches, it delegates the request to the next + server in the chain. + +2. **Kube API Server (Core):** + * **Purpose:** Serves all the built-in Kubernetes APIs (`core/v1`, `apps/v1`, etc.). + * **Mechanism:** This is the main server, configured with all the core REST storage + strategies. + * **Delegation:** If a request is for a path that is not a core API (e.g., for a CRD), + it delegates the request to the next server in the chain. + +3. **API Extensions Server (`apiextensions-apiserver`):** + * **Purpose:** Handles the `apiextensions.k8s.io` API, which manages + `CustomResourceDefinition` objects. The evolution of CRDs from a simple extension + mechanism to a feature-rich system with validation, versioning, and defaulting is + documented in a series of KEPs, starting with the graduation to GA in Kubernetes v1.16. + * **Mechanism:** When a CRD is created, this server dynamically creates and installs a + new REST storage handler for the new resource, making it immediately available. + * **Delegation:** It is the end of the chain. If it cannot handle a request, a `404 Not + Found` is returned. + +## 2. Handler Chain + +Every request flows through a standard chain of HTTP handlers (filters). The request body +is not deserialized until it has passed authentication and authorization. The default handler +chain is constructed by the `DefaultBuildHandlerChain` function in +`staging/src/k8s.io/apiserver/pkg/server/config.go`. + +```mermaid +sequenceDiagram + participant Client + participant Handler Chain + participant Authentication + participant Authorization + participant Priority and Fairness + participant Admission Control + participant REST Endpoint + + Client->>Handler Chain: Request + Handler Chain->>Authentication: Authenticate + Authentication-->>Handler Chain: User Info + Handler Chain->>Authorization: Authorize + Authorization-->>Handler Chain: Allowed/Denied + Handler Chain->>Priority and Fairness: Classify & Queue + Priority and Fairness-->>Handler Chain: Proceed + Handler Chain->>Admission Control: Mutate & Validate + Admission Control-->>Handler Chain: Object OK + Handler Chain->>REST Endpoint: Handle + REST Endpoint-->>Handler Chain: Response + Handler Chain-->>Client: Response +``` + +The handler chain consists of the following stages: + +1. **Authentication (`pkg/authentication`):** This filter identifies the user. The system is + pluggable and composed of multiple authenticators (e.g., client certs, bearer tokens, OIDC). + The identity of the user is determined by the first authenticator in the chain that successfully + identifies the user. +2. **Authorization (`pkg/authorization`):** This filter checks if the user is permitted to + perform the action. This system is also pluggable and composed of multiple authorizers + (e.g., RBAC, Node, Webhook). Each authorizer may respond with either allow, deny, or no opinion. + If the response is no opinion, the request is passed to the next authorizer in the chain. +3. **Priority and Fairness (`pkg/util/flowcontrol`):** This subsystem manages request + concurrency, classifying requests into `FlowSchema`s and `PriorityLevel`s to prevent + overload. This feature was introduced to prevent high traffic from overwhelming the API + server and to ensure that critical cluster operations are not starved. +4. **Admission Control (`pkg/admission`):** This is the primary mechanism for policy + enforcement. It is only at this stage that the request body is deserialized into an + object. It is a chain of plugins that can mutate or validate an object. The built-in Pod + Security admission controller is a key example of this, enforcing Pod Security Standards + at the namespace level. +5. **REST Endpoint Handling (`pkg/endpoints`):** The request is finally dispatched to the + appropriate REST handler, which is installed by the `APIInstaller`. + +## 3. API Group Registration + +The high-level steps for introducing an API are: + +1. **Define Types:** Create or modify the Go structs in the `types.go` file for the API group. +2. **Generate Code:** Use the code generators provided by the Kubernetes project to create the required + boilerplate methods for deep-copy, conversion, and defaulting. +3. **Implement the `Strategy`:** Write the custom business logic and validation for the + resource in its `Strategy` object. +4. **Register and Install:** Create the `APIGroupInfo` struct, bundling the `Scheme` and the + `Strategy`-configured storage, and pass it to the `GenericAPIServer`'s `InstallAPIGroup` + method. + +### The API Group Registry + +The `runtime.Scheme` acts as a central registry for an API group's type information. A single +`Scheme` object is created for each API group and is responsible for the following key +capabilities: + +* **Type Registration and Mapping:** The `Scheme`'s primary role is to map a GroupVersionKind + (GVK) to its corresponding Go type and back. This process also relies on the `deepcopy-gen` + tool to create `DeepCopy()` methods for each type, which is critical for ensuring that + objects returned from caches are never modified directly. + +* **API Conversion:** The `Scheme` stores the conversion functions that translate objects + between different API versions. These functions are typically generated by the + `conversion-gen` tool and enable the **hub-and-spoke** model. + +* **Defaulting:** The `Scheme` registers defaulting functions that populate optional fields in + an object. These are usually generated by the `defaulter-gen` tool. + +* **Declarative Validation:** The `Scheme` can store and execute code-generated validation + functions, providing a baseline level of validation. This is distinct from the primary, + handwritten business logic validation, which is handled by the `Strategy` object. + +### The `APIGroupInfo` Struct and `Strategy` Object + +With a populated `Scheme`, the API group is registered with the `GenericAPIServer` by bundling +the `Scheme` with the storage backend and versioning information into an `APIGroupInfo` struct. + +```mermaid +graph TD + subgraph Server Configuration + A[APIGroupInfo for apps v1]; + A --> B{Scheme: Knows Deployment v1}; + A --> C{Storage: deployments RESTStorage}; + A --> D{Version Priority: v1, v1beta1}; + end + + subgraph RESTStorage Implementation + C --> E[genericregistry.Store]; + E --> F[etcd client]; + E --> G[Deployment Strategy]; + end + + subgraph Server Runtime + H[GenericAPIServer] -- InstallAPIGroup --> I[APIInstaller]; + I -- Uses --> A; + I --> J{Register /apis/apps/v1/deployments}; + J --> K[HTTP Handler]; + K -- On Request --> C; + end +``` + +The registration process follows these steps: + +1. **`APIGroupInfo` Construction:** For each API group, an `APIGroupInfo` struct is created, + which contains the populated `Scheme`, a map of resources to their storage + implementations, and an ordered list of **Version Priority**. + +2. **REST Storage Instantiation:** For each resource, a `genericregistry.Store` is created. It + is configured with a resource-specific `Strategy` object that contains the core business + logic (e.g., handwritten validation). + +3. **API Group Installation:** The `GenericAPIServer`'s `InstallAPIGroup` method takes the + `APIGroupInfo` and uses an `APIInstaller` to expose the resources as HTTP endpoints. + +## 4. Watch Cache + +To handle the high volume of watch requests from controllers without overwhelming etcd, the +apiserver uses a **watch cache**. The implementation can be found in +`staging/src/k8s.io/apiserver/pkg/storage/cacher/`. + +* **Initialization:** The cacher first performs a `LIST` to get the current state of all + objects and a `ResourceVersion` for that point-in-time. It then starts a `WATCH` from + that version to ensure a consistent stream of events. +* **Serving from Cache:** Most list and watch requests are served from this in-memory cache, which + dramatically reduces the load on etcd. Consistent reads are also served from the + cache. This is achieved by first fetching the revision number of the latest write from + etcd. The server then ensures the cache is at least that recent—waiting for it to + refresh if necessary—before serving the request. +* **Fallback to Storage:** If a client request cannot be served from the + cache's buffer, the request "falls through" to the underlying etcd storage. +* **Bookmarks:** The cacher uses bookmark events to track the latest `ResourceVersion` for + unchanged objects. This prevents the cache's `ResourceVersion` from becoming too old, + which avoids the need for expensive relist operations from etcd when the objects have + not been modified. + +## 5. Conflict Resolution + +* **Optimistic Concurrency via `resourceVersion`:** Clients are expected to perform updates using a + read-modify-write workflow. The apiserver uses the `resourceVersion` field of every + object to enforce optimistic concurrency. When a client submits an update (`PUT` or `PATCH`), + it must provide the `resourceVersion` of the object it based its modifications on. If the + `resourceVersion` on the server does not match, the server rejects the request with a + `409 Conflict` error. This forces the client to re-read the object, resolve the conflict, + and resubmit with the new `resourceVersion`. +* **Server-Side Apply:** A declarative, "intent-based" patch. The server maintains a + `managedFields` section in the object's metadata to track which "manager" (e.g., a + controller) owns each field. This allows multiple actors to manage different parts of the + same object without overwriting each other's changes. + +## 6. Discovery and OpenAPI + +Apiservers serve the `/apis` discovery endpoints and the `/openapi/v2` and `/openapi/v3` +specifications. The generation of the OpenAPI specification is a multi-stage process. + +* **`openapi-gen`**: This tool reflects on Go structs, reads godoc comments, and looks at + validation struct tags to generate a map of all API definitions. +* **`zz_generated.openapi.go`**: The output is a large Go file containing a + `GetOpenAPIDefinitions` function. +* **Runtime**: The `GenericAPIServer` calls this generated function to build the final OpenAPI + JSON spec that it serves to clients. + +## 7. Security & Observability + +* **Audit (`pkg/audit`):** The apiserver has a policy-driven event logging pipeline. The audit + policy controls what is logged and at which stage of a request. +* **Security:** + * **mTLS:** The primary authentication mechanism for system components. + * **Service Account Token Issuance:** The `kube-apiserver` acts as an OIDC provider, + issuing and validating JWTs for `ServiceAccount`s. + +## 8. Streaming Protocols + +* **Websockets:** The apiserver uses websockets to upgrade HTTP + connections for interactive, streaming protocols like `exec`, `attach`, and + `port-forward`. The `UpgradeAwareProxyHandler` manages this process. diff --git a/doc.go b/doc.go index 573d9e39b..d6334e073 100644 --- a/doc.go +++ b/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2021 The Kubernetes Authors. +Copyright 2016 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -9,9 +9,66 @@ You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +WITHOUTHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// Package apiserver provides the machinery for building Kubernetes-style API servers. +// +// This library is the foundation for the Kubernetes API server (`kube-apiserver`), +// and is also the primary framework for developers building custom API servers to extend +// the Kubernetes API. +// +// An extension API server is a user-provided, standalone web server that registers itself +// with the main kube-apiserver to handle specific API groups. This allows developers to +// extend Kubernetes with their own APIs that behave like core Kubernetes APIs, complete +// with typed clients, authentication, authorization, and discovery. +// +// # Key Packages +// +// The `apiserver` library is composed of several key packages: +// +// - `pkg/server`: This is the core of the library, providing the `GenericAPIServer` +// and the main machinery for building the server. +// - `pkg/admission`: This package contains the admission control framework. Developers +// can use this to build custom admission plugins that can validate or mutate +// requests to enforce custom policies. This is a common way to extend Kubernetes +// behavior without adding a full API server. +// - `pkg/authentication`: This package provides the framework for authenticating +// requests. +// - `pkg/authorization`: This package provides the framework for authorizing +// requests. +// - `pkg/endpoints`: This package contains the machinery for building the REST +// endpoints for the API server. +// - `pkg/registry`: This package provides the storage interface for the API server. +// +// # Instantiating a GenericAPIServer +// +// The `GenericAPIServer` struct is the heart of any extension server. It is responsible +// for assembling and running the HTTP serving stack. See the runnable example for a +// demonstration of how to instantiate a `GenericAPIServer`. +// +// # Building an Extension API Server (API Aggregation) +// +// The mechanism that enables extension API servers is API aggregation. The +// primary apiserver (typically the kube-apiserver) acts as a proxy, forwarding +// requests for a specific API group (e.g., /apis/myextension.io/v1) to a +// registered extension server. The apiserver is configured using +// APIService objects. +// +// For most use cases, custom resources (CustomResourceDefinitions) are the +// preferred way to extend the Kubernetes API. +// +// # Building an Admission Plugin +// +// The `pkg/admission` package provides a way to add admission policies directly +// into an apiserver. Admission plugins can be used to validate or mutate objects +// during write operations. The kube-apiserver uses admission plugins to provide +// a variety of core system capabilities. +// +// For most extension use cases dynamic admission control using policies +// (ValidatingAdmissionPolicies or MutatingAdmissionPolicies) or +// webhooks (ValidatingWebhookConfiguration and MutatingWebhookConfiguration) are the +// preferred way to extend admission control. package apiserver diff --git a/example_test.go b/example_test.go new file mode 100644 index 000000000..80622e3f0 --- /dev/null +++ b/example_test.go @@ -0,0 +1,67 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiserver_test + +import ( + "fmt" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apiserver/pkg/registry/rest" + "k8s.io/apiserver/pkg/server" +) + +func Example_instantiation() { + // 1. Create a scheme and a codec factory. + // In a real application, you would register your API types with the scheme. + scheme := runtime.NewScheme() + codecs := serializer.NewCodecFactory(scheme) + + // 2. Create a new server configuration. + // This creates a recommended configuration with default values. + serverConfig := server.NewRecommendedConfig(codecs) + + // 3. Create a new GenericAPIServer. + // The "delegate" is used to chain to another API server. For a standalone + // server, we can use an empty delegate. + genericServer, err := serverConfig.Complete().New("my-api-server", server.NewEmptyDelegate()) + if err != nil { + fmt.Printf("Error creating generic API server: %v", err) + return + } + + // 4. Install an API group. + // This is where you would define your API resources and their storage. + // For this example, we'll use an empty APIGroupInfo. + apiGroupInfo := &server.APIGroupInfo{ + PrioritizedVersions: []schema.GroupVersion{{Group: "mygroup.example.com", Version: "v1"}}, + VersionedResourcesStorageMap: make(map[string]map[string]rest.Storage), + Scheme: scheme, + ParameterCodec: runtime.NewParameterCodec(scheme), + NegotiatedSerializer: codecs, + } + if err := genericServer.InstallAPIGroup(apiGroupInfo); err != nil { + fmt.Printf("Error installing API group: %v", err) + return + } + + fmt.Println("GenericAPIServer created successfully.") + + // Output: + // GenericAPIServer created successfully. +}