feat(kubelet): Add ResourceHealthStatus for DRA pods

This change introduces the ability for the Kubelet to monitor and report
the health of devices allocated via Dynamic Resource Allocation (DRA).
This addresses a key part of KEP-4680 by providing visibility into
device failures, which helps users and controllers diagnose pod failures.

The implementation includes:
- A new `v1alpha1.NodeHealth` gRPC service with a `WatchResources`
  stream that DRA plugins can optionally implement.
- A health information cache within the Kubelet's DRA manager to track
  the last known health of each device and handle plugin disconnections.
- An asynchronous update mechanism that triggers a pod sync when a
  device's health changes.
- A new `allocatedResourcesStatus` field in `v1.ContainerStatus` to
  expose the device health information to users via the Pod API.

Update vendor

KEP-4680: Fix lint, boilerplate, and codegen issues

Add another e2e test, add TODO for KEP4680 & update test infra helpers

Add Feature Gate e2e test

Fixing presubmits

Fix var names, feature gating, and nits

Fix DRA Health gRPC API according to review feedback

Kubernetes-commit: b7de71f9ce74e99dde61ee138608df8edc5486bd
This commit is contained in:
John-Paul Sassine 2025-06-17 20:49:12 +00:00 committed by Kubernetes Publisher
parent 28e34b2552
commit e6033dfbf2
3 changed files with 641 additions and 0 deletions

View File

@ -0,0 +1,414 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Copyright 2025 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.4
// protoc v4.23.4
// source: staging/src/k8s.io/kubelet/pkg/apis/dra-health/v1alpha1/api.proto
package v1alpha1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// HealthStatus defines the possible health states of a device.
type HealthStatus int32
const (
// UNKNOWN indicates that the health of the device cannot be determined.
HealthStatus_UNKNOWN HealthStatus = 0
// HEALTHY indicates that the device is operating normally.
HealthStatus_HEALTHY HealthStatus = 1
// UNHEALTHY indicates that the device has reported a problem.
HealthStatus_UNHEALTHY HealthStatus = 2
)
// Enum value maps for HealthStatus.
var (
HealthStatus_name = map[int32]string{
0: "UNKNOWN",
1: "HEALTHY",
2: "UNHEALTHY",
}
HealthStatus_value = map[string]int32{
"UNKNOWN": 0,
"HEALTHY": 1,
"UNHEALTHY": 2,
}
)
func (x HealthStatus) Enum() *HealthStatus {
p := new(HealthStatus)
*p = x
return p
}
func (x HealthStatus) String() string {
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}
func (HealthStatus) Descriptor() protoreflect.EnumDescriptor {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_enumTypes[0].Descriptor()
}
func (HealthStatus) Type() protoreflect.EnumType {
return &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_enumTypes[0]
}
func (x HealthStatus) Number() protoreflect.EnumNumber {
return protoreflect.EnumNumber(x)
}
// Deprecated: Use HealthStatus.Descriptor instead.
func (HealthStatus) EnumDescriptor() ([]byte, []int) {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP(), []int{0}
}
type NodeWatchResourcesRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *NodeWatchResourcesRequest) Reset() {
*x = NodeWatchResourcesRequest{}
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *NodeWatchResourcesRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*NodeWatchResourcesRequest) ProtoMessage() {}
func (x *NodeWatchResourcesRequest) ProtoReflect() protoreflect.Message {
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use NodeWatchResourcesRequest.ProtoReflect.Descriptor instead.
func (*NodeWatchResourcesRequest) Descriptor() ([]byte, []int) {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP(), []int{0}
}
// DeviceIdentifier uniquely identifies a device within the scope of a driver.
type DeviceIdentifier struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The pool which contains the device.
PoolName string `protobuf:"bytes,1,opt,name=pool_name,json=poolName,proto3" json:"pool_name,omitempty"`
// The unique name of the device within the pool.
DeviceName string `protobuf:"bytes,2,opt,name=device_name,json=deviceName,proto3" json:"device_name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeviceIdentifier) Reset() {
*x = DeviceIdentifier{}
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeviceIdentifier) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeviceIdentifier) ProtoMessage() {}
func (x *DeviceIdentifier) ProtoReflect() protoreflect.Message {
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeviceIdentifier.ProtoReflect.Descriptor instead.
func (*DeviceIdentifier) Descriptor() ([]byte, []int) {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP(), []int{1}
}
func (x *DeviceIdentifier) GetPoolName() string {
if x != nil {
return x.PoolName
}
return ""
}
func (x *DeviceIdentifier) GetDeviceName() string {
if x != nil {
return x.DeviceName
}
return ""
}
// DeviceHealth represents the health of a single device.
type DeviceHealth struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The identifier for the device.
Device *DeviceIdentifier `protobuf:"bytes,1,opt,name=device,proto3" json:"device,omitempty"`
// The health status of the device.
Health HealthStatus `protobuf:"varint,2,opt,name=health,proto3,enum=v1alpha1.HealthStatus" json:"health,omitempty"`
// The Unix time (in seconds) of when this health status was last determined by the plugin.
LastUpdatedTime int64 `protobuf:"varint,3,opt,name=last_updated_time,json=lastUpdatedTime,proto3" json:"last_updated_time,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeviceHealth) Reset() {
*x = DeviceHealth{}
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeviceHealth) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeviceHealth) ProtoMessage() {}
func (x *DeviceHealth) ProtoReflect() protoreflect.Message {
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeviceHealth.ProtoReflect.Descriptor instead.
func (*DeviceHealth) Descriptor() ([]byte, []int) {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP(), []int{2}
}
func (x *DeviceHealth) GetDevice() *DeviceIdentifier {
if x != nil {
return x.Device
}
return nil
}
func (x *DeviceHealth) GetHealth() HealthStatus {
if x != nil {
return x.Health
}
return HealthStatus_UNKNOWN
}
func (x *DeviceHealth) GetLastUpdatedTime() int64 {
if x != nil {
return x.LastUpdatedTime
}
return 0
}
// NodeWatchResourcesResponse contains a list of devices and their current health.
// This should be a complete list for the driver; Kubelet will reconcile this
// state with its internal cache. Any devices managed by the driver that are
// not in this list will be considered to have an "Unknown" health status after a timeout.
type NodeWatchResourcesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Devices []*DeviceHealth `protobuf:"bytes,1,rep,name=devices,proto3" json:"devices,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *NodeWatchResourcesResponse) Reset() {
*x = NodeWatchResourcesResponse{}
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *NodeWatchResourcesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*NodeWatchResourcesResponse) ProtoMessage() {}
func (x *NodeWatchResourcesResponse) ProtoReflect() protoreflect.Message {
mi := &file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use NodeWatchResourcesResponse.ProtoReflect.Descriptor instead.
func (*NodeWatchResourcesResponse) Descriptor() ([]byte, []int) {
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP(), []int{3}
}
func (x *NodeWatchResourcesResponse) GetDevices() []*DeviceHealth {
if x != nil {
return x.Devices
}
return nil
}
var File_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto protoreflect.FileDescriptor
var file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDesc = string([]byte{
0x0a, 0x41, 0x73, 0x74, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x6b, 0x38,
0x73, 0x2e, 0x69, 0x6f, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x6c, 0x65, 0x74, 0x2f, 0x70, 0x6b, 0x67,
0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x64, 0x72, 0x61, 0x2d, 0x68, 0x65, 0x61, 0x6c, 0x74, 0x68,
0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x61, 0x70, 0x69, 0x2e, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x12, 0x08, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x22, 0x1b, 0x0a,
0x19, 0x4e, 0x6f, 0x64, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72,
0x63, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x50, 0x0a, 0x10, 0x44, 0x65,
0x76, 0x69, 0x63, 0x65, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x12, 0x1b,
0x0a, 0x09, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x64,
0x65, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
0x52, 0x0a, 0x64, 0x65, 0x76, 0x69, 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x9e, 0x01, 0x0a,
0x0c, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x32, 0x0a,
0x06, 0x64, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e,
0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x49,
0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x52, 0x06, 0x64, 0x65, 0x76, 0x69, 0x63,
0x65, 0x12, 0x2e, 0x0a, 0x06, 0x68, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28,
0x0e, 0x32, 0x16, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x48, 0x65, 0x61,
0x6c, 0x74, 0x68, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x68, 0x65, 0x61, 0x6c, 0x74,
0x68, 0x12, 0x2a, 0x0a, 0x11, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0f, 0x6c, 0x61,
0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x54, 0x69, 0x6d, 0x65, 0x22, 0x4e, 0x0a,
0x1a, 0x4e, 0x6f, 0x64, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72,
0x63, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x07, 0x64,
0x65, 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x76,
0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x48, 0x65,
0x61, 0x6c, 0x74, 0x68, 0x52, 0x07, 0x64, 0x65, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2a, 0x37, 0x0a,
0x0c, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0b, 0x0a,
0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x48, 0x45,
0x41, 0x4c, 0x54, 0x48, 0x59, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x48, 0x45, 0x41,
0x4c, 0x54, 0x48, 0x59, 0x10, 0x02, 0x32, 0x78, 0x0a, 0x11, 0x44, 0x52, 0x41, 0x52, 0x65, 0x73,
0x6f, 0x75, 0x72, 0x63, 0x65, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x63, 0x0a, 0x12, 0x4e,
0x6f, 0x64, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65,
0x73, 0x12, 0x23, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x4e, 0x6f, 0x64,
0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x52,
0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
0x31, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x6f, 0x75,
0x72, 0x63, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01,
0x42, 0x2d, 0x5a, 0x2b, 0x6b, 0x38, 0x73, 0x2e, 0x69, 0x6f, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x6c,
0x65, 0x74, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x64, 0x72, 0x61, 0x2d,
0x68, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var (
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescOnce sync.Once
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescData []byte
)
func file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescGZIP() []byte {
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescOnce.Do(func() {
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDesc), len(file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDesc)))
})
return file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDescData
}
var file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
var file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes = make([]protoimpl.MessageInfo, 4)
var file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_goTypes = []any{
(HealthStatus)(0), // 0: v1alpha1.HealthStatus
(*NodeWatchResourcesRequest)(nil), // 1: v1alpha1.NodeWatchResourcesRequest
(*DeviceIdentifier)(nil), // 2: v1alpha1.DeviceIdentifier
(*DeviceHealth)(nil), // 3: v1alpha1.DeviceHealth
(*NodeWatchResourcesResponse)(nil), // 4: v1alpha1.NodeWatchResourcesResponse
}
var file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_depIdxs = []int32{
2, // 0: v1alpha1.DeviceHealth.device:type_name -> v1alpha1.DeviceIdentifier
0, // 1: v1alpha1.DeviceHealth.health:type_name -> v1alpha1.HealthStatus
3, // 2: v1alpha1.NodeWatchResourcesResponse.devices:type_name -> v1alpha1.DeviceHealth
1, // 3: v1alpha1.DRAResourceHealth.NodeWatchResources:input_type -> v1alpha1.NodeWatchResourcesRequest
4, // 4: v1alpha1.DRAResourceHealth.NodeWatchResources:output_type -> v1alpha1.NodeWatchResourcesResponse
4, // [4:5] is the sub-list for method output_type
3, // [3:4] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
}
func init() { file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_init() }
func file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_init() {
if File_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDesc), len(file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_rawDesc)),
NumEnums: 1,
NumMessages: 4,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_goTypes,
DependencyIndexes: file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_depIdxs,
EnumInfos: file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_enumTypes,
MessageInfos: file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_msgTypes,
}.Build()
File_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto = out.File
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_goTypes = nil
file_staging_src_k8s_io_kubelet_pkg_apis_dra_health_v1alpha1_api_proto_depIdxs = nil
}

View File

@ -0,0 +1,67 @@
// Copyright 2025 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package v1alpha1;
option go_package = "k8s.io/kubelet/pkg/apis/dra-health/v1alpha1";
// DRAResourceHealth service is implemented by DRA plugins and called by Kubelet.
service DRAResourceHealth {
// NodeWatchResources allows a DRA plugin to stream health updates for its devices to Kubelet.
rpc NodeWatchResources(NodeWatchResourcesRequest) returns (stream NodeWatchResourcesResponse) {}
}
message NodeWatchResourcesRequest {
// Reserved for future use.
}
// HealthStatus defines the possible health states of a device.
enum HealthStatus {
// UNKNOWN indicates that the health of the device cannot be determined.
UNKNOWN = 0;
// HEALTHY indicates that the device is operating normally.
HEALTHY = 1;
// UNHEALTHY indicates that the device has reported a problem.
UNHEALTHY = 2;
}
// DeviceIdentifier uniquely identifies a device within the scope of a driver.
message DeviceIdentifier {
// The pool which contains the device.
string pool_name = 1;
// The unique name of the device within the pool.
string device_name = 2;
}
// DeviceHealth represents the health of a single device.
message DeviceHealth {
// The identifier for the device.
DeviceIdentifier device = 1;
// The health status of the device.
HealthStatus health = 2;
// The Unix time (in seconds) of when this health status was last determined by the plugin.
int64 last_updated_time = 3;
}
// NodeWatchResourcesResponse contains a list of devices and their current health.
// This should be a complete list for the driver; Kubelet will reconcile this
// state with its internal cache. Any devices managed by the driver that are
// not in this list will be considered to have an "Unknown" health status after a timeout.
message NodeWatchResourcesResponse {
repeated DeviceHealth devices = 1;
}

View File

@ -0,0 +1,160 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Copyright 2025 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc v4.23.4
// source: staging/src/k8s.io/kubelet/pkg/apis/dra-health/v1alpha1/api.proto
package v1alpha1
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
DRAResourceHealth_NodeWatchResources_FullMethodName = "/v1alpha1.DRAResourceHealth/NodeWatchResources"
)
// DRAResourceHealthClient is the client API for DRAResourceHealth service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
//
// DRAResourceHealth service is implemented by DRA plugins and called by Kubelet.
type DRAResourceHealthClient interface {
// NodeWatchResources allows a DRA plugin to stream health updates for its devices to Kubelet.
NodeWatchResources(ctx context.Context, in *NodeWatchResourcesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[NodeWatchResourcesResponse], error)
}
type dRAResourceHealthClient struct {
cc grpc.ClientConnInterface
}
func NewDRAResourceHealthClient(cc grpc.ClientConnInterface) DRAResourceHealthClient {
return &dRAResourceHealthClient{cc}
}
func (c *dRAResourceHealthClient) NodeWatchResources(ctx context.Context, in *NodeWatchResourcesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[NodeWatchResourcesResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &DRAResourceHealth_ServiceDesc.Streams[0], DRAResourceHealth_NodeWatchResources_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[NodeWatchResourcesRequest, NodeWatchResourcesResponse]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DRAResourceHealth_NodeWatchResourcesClient = grpc.ServerStreamingClient[NodeWatchResourcesResponse]
// DRAResourceHealthServer is the server API for DRAResourceHealth service.
// All implementations must embed UnimplementedDRAResourceHealthServer
// for forward compatibility.
//
// DRAResourceHealth service is implemented by DRA plugins and called by Kubelet.
type DRAResourceHealthServer interface {
// NodeWatchResources allows a DRA plugin to stream health updates for its devices to Kubelet.
NodeWatchResources(*NodeWatchResourcesRequest, grpc.ServerStreamingServer[NodeWatchResourcesResponse]) error
mustEmbedUnimplementedDRAResourceHealthServer()
}
// UnimplementedDRAResourceHealthServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedDRAResourceHealthServer struct{}
func (UnimplementedDRAResourceHealthServer) NodeWatchResources(*NodeWatchResourcesRequest, grpc.ServerStreamingServer[NodeWatchResourcesResponse]) error {
return status.Errorf(codes.Unimplemented, "method NodeWatchResources not implemented")
}
func (UnimplementedDRAResourceHealthServer) mustEmbedUnimplementedDRAResourceHealthServer() {}
func (UnimplementedDRAResourceHealthServer) testEmbeddedByValue() {}
// UnsafeDRAResourceHealthServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to DRAResourceHealthServer will
// result in compilation errors.
type UnsafeDRAResourceHealthServer interface {
mustEmbedUnimplementedDRAResourceHealthServer()
}
func RegisterDRAResourceHealthServer(s grpc.ServiceRegistrar, srv DRAResourceHealthServer) {
// If the following call pancis, it indicates UnimplementedDRAResourceHealthServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&DRAResourceHealth_ServiceDesc, srv)
}
func _DRAResourceHealth_NodeWatchResources_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(NodeWatchResourcesRequest)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(DRAResourceHealthServer).NodeWatchResources(m, &grpc.GenericServerStream[NodeWatchResourcesRequest, NodeWatchResourcesResponse]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DRAResourceHealth_NodeWatchResourcesServer = grpc.ServerStreamingServer[NodeWatchResourcesResponse]
// DRAResourceHealth_ServiceDesc is the grpc.ServiceDesc for DRAResourceHealth service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var DRAResourceHealth_ServiceDesc = grpc.ServiceDesc{
ServiceName: "v1alpha1.DRAResourceHealth",
HandlerType: (*DRAResourceHealthServer)(nil),
Methods: []grpc.MethodDesc{},
Streams: []grpc.StreamDesc{
{
StreamName: "NodeWatchResources",
Handler: _DRAResourceHealth_NodeWatchResources_Handler,
ServerStreams: true,
},
},
Metadata: "staging/src/k8s.io/kubelet/pkg/apis/dra-health/v1alpha1/api.proto",
}