[addons] Introduce NodeProblemDetector

Node Problem Detector aims to make various node problems visible to
the upstream layers in the cluster management stack. It is a daemon
that runs on each node, detects node problems and reports them to apiserver
so to avoid scheduling new pods on bad nodes and also easily identify
which are the problems on underlying nodes.

Project Home: https://github.com/kubernetes/node-problem-detector

Signed-off-by: dntosas <ntosas@gmail.com>
This commit is contained in:
dntosas 2021-05-04 16:33:40 +03:00
parent 466877c8d5
commit 20124d3ba9
No known key found for this signature in database
GPG Key ID: FC873FCAA5A65CC8
15 changed files with 547 additions and 0 deletions

View File

@ -190,6 +190,19 @@ The kOps CLI requires additional IAM permissions to manage the requisite EventBr
**Warning: If you switch between the two operating modes on an existing cluster, the old resources have to be manually deleted. For IMDS to Queue Processor, this means deleting the k8s nth daemonset. For Queue Processor to IMDS, this means deleting the k8s nth deployment and the AWS resources: the SQS queue, EventBridge rules, and ASG Lifecycle hooks.**
#### Node Problem Detector
{{ kops_feature_table(kops_added_default='1.22') }}
[Node Problem Detector](https://github.com/kubernetes/node-problem-detector) aims to make various node problems visible to the upstream layers in the cluster management stack. It is a daemon that runs on each node, detects node problems and reports them to apiserver.
```yaml
spec:
nodeProblemDetector:
enabled: true
memoryRequest: 32Mi
cpuRequest: 10m
```
#### Snapshot controller
{{ kops_feature_table(kops_added_default='1.21', k8s_min='1.20') }}

View File

@ -3965,6 +3965,51 @@ spec:
items:
type: string
type: array
nodeProblemDetector:
description: NodeProblemDetector determines the node problem detector
configuration.
properties:
cpuLimit:
anyOf:
- type: integer
- type: string
description: 'CPULimit of NodeProblemDetector container. Default:
10m'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
cpuRequest:
anyOf:
- type: integer
- type: string
description: 'CPURequest of NodeProblemDetector container. Default:
10m'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
enabled:
description: 'Enabled enables the NodeProblemDetector. Default:
false'
type: boolean
image:
description: Image is the NodeProblemDetector docker container
used.
type: string
memoryLimit:
anyOf:
- type: integer
- type: string
description: 'MemoryLimit of NodeProblemDetector container. Default:
80Mi'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
memoryRequest:
anyOf:
- type: integer
- type: string
description: 'MemoryRequest of NodeProblemDetector container.
Default: 80Mi'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
nodeTerminationHandler:
description: NodeTerminationHandler determines the cluster autoscaler
configuration.

View File

@ -161,6 +161,8 @@ type ClusterSpec struct {
// NodeTerminationHandler determines the node termination handler configuration.
NodeTerminationHandler *NodeTerminationHandlerConfig `json:"nodeTerminationHandler,omitempty"`
// NodeProblemDetector determines the node problem detector configuration.
NodeProblemDetector *NodeProblemDetectorConfig `json:"nodeProblemDetector,omitempty"`
// MetricsServer determines the metrics server configuration.
MetricsServer *MetricsServerConfig `json:"metricsServer,omitempty"`
// CertManager determines the metrics server configuration.

View File

@ -888,6 +888,28 @@ type NodeTerminationHandlerConfig struct {
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
}
// NodeProblemDetector determines the node problem detector configuration.
type NodeProblemDetectorConfig struct {
// Enabled enables the NodeProblemDetector.
// Default: false
Enabled *bool `json:"enabled,omitempty"`
// Image is the NodeProblemDetector docker container used.
Image *string `json:"image,omitempty"`
// MemoryRequest of NodeProblemDetector container.
// Default: 80Mi
MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"`
// CPURequest of NodeProblemDetector container.
// Default: 10m
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
// MemoryLimit of NodeProblemDetector container.
// Default: 80Mi
MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"`
// CPULimit of NodeProblemDetector container.
// Default: 10m
CPULimit *resource.Quantity `json:"cpuLimit,omitempty"`
}
// ClusterAutoscalerConfig determines the cluster autoscaler configuration.
type ClusterAutoscalerConfig struct {
// Enabled enables the cluster autoscaler.

View File

@ -160,6 +160,8 @@ type ClusterSpec struct {
// NodeTerminationHandler determines the cluster autoscaler configuration.
NodeTerminationHandler *NodeTerminationHandlerConfig `json:"nodeTerminationHandler,omitempty"`
// NodeProblemDetector determines the node problem detector configuration.
NodeProblemDetector *NodeProblemDetectorConfig `json:"nodeProblemDetector,omitempty"`
// MetricsServer determines the metrics server configuration.
MetricsServer *MetricsServerConfig `json:"metricsServer,omitempty"`
// CertManager determines the metrics server configuration.

View File

@ -887,6 +887,28 @@ type NodeTerminationHandlerConfig struct {
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
}
// NodeProblemDetector determines the node problem detector configuration.
type NodeProblemDetectorConfig struct {
// Enabled enables the NodeProblemDetector.
// Default: false
Enabled *bool `json:"enabled,omitempty"`
// Image is the NodeProblemDetector docker container used.
Image *string `json:"image,omitempty"`
// MemoryRequest of NodeProblemDetector container.
// Default: 80Mi
MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"`
// CPURequest of NodeProblemDetector container.
// Default: 10m
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
// MemoryLimit of NodeProblemDetector container.
// Default: 80Mi
MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"`
// CPULimit of NodeProblemDetector container.
// Default: 10m
CPULimit *resource.Quantity `json:"cpuLimit,omitempty"`
}
// ClusterAutoscalerConfig determines the cluster autoscaler configuration.
type ClusterAutoscalerConfig struct {
// Enabled enables the cluster autoscaler.

View File

@ -853,6 +853,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NodeProblemDetectorConfig)(nil), (*kops.NodeProblemDetectorConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig(a.(*NodeProblemDetectorConfig), b.(*kops.NodeProblemDetectorConfig), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*kops.NodeProblemDetectorConfig)(nil), (*NodeProblemDetectorConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig(a.(*kops.NodeProblemDetectorConfig), b.(*NodeProblemDetectorConfig), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NodeTerminationHandlerConfig)(nil), (*kops.NodeTerminationHandlerConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha2_NodeTerminationHandlerConfig_To_kops_NodeTerminationHandlerConfig(a.(*NodeTerminationHandlerConfig), b.(*kops.NodeTerminationHandlerConfig), scope)
}); err != nil {
@ -2437,6 +2447,15 @@ func autoConvert_v1alpha2_ClusterSpec_To_kops_ClusterSpec(in *ClusterSpec, out *
} else {
out.NodeTerminationHandler = nil
}
if in.NodeProblemDetector != nil {
in, out := &in.NodeProblemDetector, &out.NodeProblemDetector
*out = new(kops.NodeProblemDetectorConfig)
if err := Convert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig(*in, *out, s); err != nil {
return err
}
} else {
out.NodeProblemDetector = nil
}
if in.MetricsServer != nil {
in, out := &in.MetricsServer, &out.MetricsServer
*out = new(kops.MetricsServerConfig)
@ -2840,6 +2859,15 @@ func autoConvert_kops_ClusterSpec_To_v1alpha2_ClusterSpec(in *kops.ClusterSpec,
} else {
out.NodeTerminationHandler = nil
}
if in.NodeProblemDetector != nil {
in, out := &in.NodeProblemDetector, &out.NodeProblemDetector
*out = new(NodeProblemDetectorConfig)
if err := Convert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig(*in, *out, s); err != nil {
return err
}
} else {
out.NodeProblemDetector = nil
}
if in.MetricsServer != nil {
in, out := &in.MetricsServer, &out.MetricsServer
*out = new(MetricsServerConfig)
@ -5906,6 +5934,36 @@ func Convert_kops_NodeLocalDNSConfig_To_v1alpha2_NodeLocalDNSConfig(in *kops.Nod
return autoConvert_kops_NodeLocalDNSConfig_To_v1alpha2_NodeLocalDNSConfig(in, out, s)
}
func autoConvert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig(in *NodeProblemDetectorConfig, out *kops.NodeProblemDetectorConfig, s conversion.Scope) error {
out.Enabled = in.Enabled
out.Image = in.Image
out.MemoryRequest = in.MemoryRequest
out.CPURequest = in.CPURequest
out.MemoryLimit = in.MemoryLimit
out.CPULimit = in.CPULimit
return nil
}
// Convert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig is an autogenerated conversion function.
func Convert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig(in *NodeProblemDetectorConfig, out *kops.NodeProblemDetectorConfig, s conversion.Scope) error {
return autoConvert_v1alpha2_NodeProblemDetectorConfig_To_kops_NodeProblemDetectorConfig(in, out, s)
}
func autoConvert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig(in *kops.NodeProblemDetectorConfig, out *NodeProblemDetectorConfig, s conversion.Scope) error {
out.Enabled = in.Enabled
out.Image = in.Image
out.MemoryRequest = in.MemoryRequest
out.CPURequest = in.CPURequest
out.MemoryLimit = in.MemoryLimit
out.CPULimit = in.CPULimit
return nil
}
// Convert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig is an autogenerated conversion function.
func Convert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig(in *kops.NodeProblemDetectorConfig, out *NodeProblemDetectorConfig, s conversion.Scope) error {
return autoConvert_kops_NodeProblemDetectorConfig_To_v1alpha2_NodeProblemDetectorConfig(in, out, s)
}
func autoConvert_v1alpha2_NodeTerminationHandlerConfig_To_kops_NodeTerminationHandlerConfig(in *NodeTerminationHandlerConfig, out *kops.NodeTerminationHandlerConfig, s conversion.Scope) error {
out.Enabled = in.Enabled
out.EnableSpotInterruptionDraining = in.EnableSpotInterruptionDraining

View File

@ -1025,6 +1025,11 @@ func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec) {
*out = new(NodeTerminationHandlerConfig)
(*in).DeepCopyInto(*out)
}
if in.NodeProblemDetector != nil {
in, out := &in.NodeProblemDetector, &out.NodeProblemDetector
*out = new(NodeProblemDetectorConfig)
(*in).DeepCopyInto(*out)
}
if in.MetricsServer != nil {
in, out := &in.MetricsServer, &out.MetricsServer
*out = new(MetricsServerConfig)
@ -3934,6 +3939,52 @@ func (in *NodeLocalDNSConfig) DeepCopy() *NodeLocalDNSConfig {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeProblemDetectorConfig) DeepCopyInto(out *NodeProblemDetectorConfig) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
*out = new(bool)
**out = **in
}
if in.Image != nil {
in, out := &in.Image, &out.Image
*out = new(string)
**out = **in
}
if in.MemoryRequest != nil {
in, out := &in.MemoryRequest, &out.MemoryRequest
x := (*in).DeepCopy()
*out = &x
}
if in.CPURequest != nil {
in, out := &in.CPURequest, &out.CPURequest
x := (*in).DeepCopy()
*out = &x
}
if in.MemoryLimit != nil {
in, out := &in.MemoryLimit, &out.MemoryLimit
x := (*in).DeepCopy()
*out = &x
}
if in.CPULimit != nil {
in, out := &in.CPULimit, &out.CPULimit
x := (*in).DeepCopy()
*out = &x
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeProblemDetectorConfig.
func (in *NodeProblemDetectorConfig) DeepCopy() *NodeProblemDetectorConfig {
if in == nil {
return nil
}
out := new(NodeProblemDetectorConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeTerminationHandlerConfig) DeepCopyInto(out *NodeTerminationHandlerConfig) {
*out = *in

View File

@ -1109,6 +1109,11 @@ func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec) {
*out = new(NodeTerminationHandlerConfig)
(*in).DeepCopyInto(*out)
}
if in.NodeProblemDetector != nil {
in, out := &in.NodeProblemDetector, &out.NodeProblemDetector
*out = new(NodeProblemDetectorConfig)
(*in).DeepCopyInto(*out)
}
if in.MetricsServer != nil {
in, out := &in.MetricsServer, &out.MetricsServer
*out = new(MetricsServerConfig)
@ -4116,6 +4121,52 @@ func (in *NodeLocalDNSConfig) DeepCopy() *NodeLocalDNSConfig {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeProblemDetectorConfig) DeepCopyInto(out *NodeProblemDetectorConfig) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
*out = new(bool)
**out = **in
}
if in.Image != nil {
in, out := &in.Image, &out.Image
*out = new(string)
**out = **in
}
if in.MemoryRequest != nil {
in, out := &in.MemoryRequest, &out.MemoryRequest
x := (*in).DeepCopy()
*out = &x
}
if in.CPURequest != nil {
in, out := &in.CPURequest, &out.CPURequest
x := (*in).DeepCopy()
*out = &x
}
if in.MemoryLimit != nil {
in, out := &in.MemoryLimit, &out.MemoryLimit
x := (*in).DeepCopy()
*out = &x
}
if in.CPULimit != nil {
in, out := &in.CPULimit, &out.CPULimit
x := (*in).DeepCopy()
*out = &x
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeProblemDetectorConfig.
func (in *NodeProblemDetectorConfig) DeepCopy() *NodeProblemDetectorConfig {
if in == nil {
return nil
}
out := new(NodeProblemDetectorConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeTerminationHandlerConfig) DeepCopyInto(out *NodeTerminationHandlerConfig) {
*out = *in

View File

@ -22,6 +22,7 @@ go_library(
"kubeproxy.go",
"kubescheduler.go",
"networking.go",
"nodeproblemdetector.go",
"nodeterminationhandler.go",
"openstack.go",
],

View File

@ -0,0 +1,69 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package components
import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/loader"
)
// NodeProblemDetectorOptionsBuilder adds options for the node problem detector to the model.
type NodeProblemDetectorOptionsBuilder struct {
*OptionsContext
}
var _ loader.OptionsBuilder = &NodeProblemDetectorOptionsBuilder{}
func (b *NodeProblemDetectorOptionsBuilder) BuildOptions(o interface{}) error {
clusterSpec := o.(*kops.ClusterSpec)
if clusterSpec.NodeProblemDetector == nil {
return nil
}
npd := clusterSpec.NodeProblemDetector
if npd.Enabled == nil {
npd.Enabled = fi.Bool(false)
}
if npd.CPURequest == nil {
defaultCPURequest := resource.MustParse("10m")
npd.CPURequest = &defaultCPURequest
}
if npd.MemoryRequest == nil {
defaultMemoryRequest := resource.MustParse("80Mi")
npd.MemoryRequest = &defaultMemoryRequest
}
if npd.CPULimit == nil {
defaultCPULimit := resource.MustParse("10m")
npd.CPULimit = &defaultCPULimit
}
if npd.MemoryLimit == nil {
defaultMemoryLimit := resource.MustParse("80Mi")
npd.MemoryLimit = &defaultMemoryLimit
}
if npd.Image == nil {
npd.Image = fi.String("k8s.gcr.io/node-problem-detector/node-problem-detector:v0.8.8")
}
return nil
}

View File

@ -50,6 +50,7 @@ go_library(
"cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.10.yaml.template",
"cloudup/resources/addons/networking.cilium.io/k8s-1.12-v1.9.yaml.template",
"cloudup/resources/addons/snapshot-controller.addons.k8s.io/k8s-1.20.yaml.template",
"cloudup/resources/addons/node-problem-detector.addons.k8s.io/k8s-1.17.yaml.template",
],
importpath = "k8s.io/kops/upup/models",
visibility = ["//visibility:public"],

View File

@ -0,0 +1,188 @@
{{ with .NodeProblemDetector }}
# Sourced from https://github.com/kubernetes/node-problem-detector/tree/v0.8.8
---
# Source: node-problem-detector/deployment/node-problem-detector.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-problem-detector
namespace: kube-system
labels:
app: node-problem-detector
spec:
selector:
matchLabels:
app: node-problem-detector
template:
metadata:
labels:
app: node-problem-detector
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: node-problem-detector
command:
- /node-problem-detector
- --logtostderr
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json
image: {{ .Image }}
resources:
limits:
cpu: {{ .CPULimit }}
memory: {{ .MemoryLimit }}
requests:
cpu: {{ .CPURequest }}
memory: {{ .MemoryRequest }}
securityContext:
privileged: true
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: log
mountPath: /var/log
readOnly: true
- name: kmsg
mountPath: /dev/kmsg
readOnly: true
# Make sure node problem detector is in the same timezone
# with the host.
- name: localtime
mountPath: /etc/localtime
readOnly: true
- name: config
mountPath: /config
readOnly: true
volumes:
- name: log
# Config `log` to your system log directory
hostPath:
path: /var/log/
- name: kmsg
hostPath:
path: /dev/kmsg
- name: localtime
hostPath:
path: /etc/localtime
- name: config
configMap:
name: node-problem-detector-config
items:
- key: kernel-monitor.json
path: kernel-monitor.json
- key: docker-monitor.json
path: docker-monitor.json
priorityClassName: system-node-critical
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
---
# Source: node-problem-detector/deployment/node-problem-detector-config.yaml
apiVersion: v1
data:
kernel-monitor.json: |
{
"plugin": "kmsg",
"logPath": "/dev/kmsg",
"lookback": "5m",
"bufferSize": 10,
"source": "kernel-monitor",
"conditions": [
{
"type": "KernelDeadlock",
"reason": "KernelHasNoDeadlock",
"message": "kernel has no deadlock"
},
{
"type": "ReadonlyFilesystem",
"reason": "FilesystemIsNotReadOnly",
"message": "Filesystem is not read-only"
}
],
"rules": [
{
"type": "temporary",
"reason": "OOMKilling",
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*"
},
{
"type": "temporary",
"reason": "TaskHung",
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "temporary",
"reason": "UnregisterNetDevice",
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
},
{
"type": "temporary",
"reason": "MemoryReadError",
"pattern": "CE memory read error .*"
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "AUFSUmountHung",
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "DockerHung",
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "permanent",
"condition": "ReadonlyFilesystem",
"reason": "FilesystemIsReadOnly",
"pattern": "Remounting filesystem read-only"
}
]
}
docker-monitor.json: |
{
"plugin": "journald",
"pluginConfig": {
"source": "dockerd"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,
"source": "docker-monitor",
"conditions": [],
"rules": [
{
"type": "temporary",
"reason": "CorruptDockerImage",
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*"
}
]
}
kind: ConfigMap
metadata:
name: node-problem-detector-config
namespace: kube-system
{{ end }}

View File

@ -594,6 +594,27 @@ func (b *BootstrapChannelBuilder) buildAddons(c *fi.ModelBuilderContext) (*chann
}
}
npd := b.Cluster.Spec.NodeProblemDetector
if npd != nil && fi.BoolValue(npd.Enabled) {
key := "node-problem-detector.addons.k8s.io"
version := "0.8.8"
{
location := key + "/k8s-1.17.yaml"
id := "k8s-1.17"
addons.Spec.Addons = append(addons.Spec.Addons, &channelsapi.AddonSpec{
Name: fi.String(key),
Version: fi.String(version),
Selector: map[string]string{"k8s-addon": key},
Manifest: fi.String(location),
Id: id,
})
}
}
if b.Cluster.Spec.AWSLoadBalancerController != nil && fi.BoolValue(b.Cluster.Spec.AWSLoadBalancerController.Enabled) {
key := "aws-load-balancer-controller.addons.k8s.io"

View File

@ -280,6 +280,7 @@ func (c *populateClusterSpec) run(clientset simple.Clientset) error {
codeModels = append(codeModels, &components.DiscoveryOptionsBuilder{OptionsContext: optionsContext})
codeModels = append(codeModels, &components.ClusterAutoscalerOptionsBuilder{OptionsContext: optionsContext})
codeModels = append(codeModels, &components.NodeTerminationHandlerOptionsBuilder{OptionsContext: optionsContext})
codeModels = append(codeModels, &components.NodeProblemDetectorOptionsBuilder{OptionsContext: optionsContext})
codeModels = append(codeModels, &components.AWSEBSCSIDriverOptionsBuilder{OptionsContext: optionsContext})
codeModels = append(codeModels, &components.AWSCloudControllerManagerOptionsBuilder{OptionsContext: optionsContext})
}