diff --git a/pkg/apis/kops/validation/validation.go b/pkg/apis/kops/validation/validation.go index c1b1613345..01f77c7dd9 100644 --- a/pkg/apis/kops/validation/validation.go +++ b/pkg/apis/kops/validation/validation.go @@ -1244,8 +1244,8 @@ func validateNetworkingCilium(cluster *kops.Cluster, v *kops.CiliumNetworkingSpe allErrs = append(allErrs, field.Invalid(versionFld, v.Version, "Could not parse as semantic version")) } - if version.Minor != 13 { - allErrs = append(allErrs, field.Invalid(versionFld, v.Version, "Only version 1.13 is supported")) + if version.Minor != 14 { + allErrs = append(allErrs, field.Invalid(versionFld, v.Version, "Only version 1.14 is supported")) } if v.Hubble != nil && fi.ValueOf(v.Hubble.Enabled) { @@ -1293,15 +1293,6 @@ func validateNetworkingCilium(cluster *kops.Cluster, v *kops.CiliumNetworkingSpe } allErrs = append(allErrs, IsValidValue(fldPath.Child("encryptionType"), &v.EncryptionType, []kops.CiliumEncryptionType{kops.CiliumEncryptionTypeIPSec, kops.CiliumEncryptionTypeWireguard})...) - - if v.EncryptionType == "wireguard" { - // Cilium with Wireguard integration follow-up --> https://github.com/cilium/cilium/issues/15462. - // The following rule of validation should be deleted as this combination - // will be supported on future releases of Cilium (>= v1.11.0). - if fi.ValueOf(v.EnableL7Proxy) { - allErrs = append(allErrs, field.Forbidden(fldPath.Child("enableL7Proxy"), "L7 proxy cannot be enabled if wireguard is enabled.")) - } - } } if fi.ValueOf(v.EnableL7Proxy) && v.InstallIptablesRules != nil && !*v.InstallIptablesRules { diff --git a/pkg/apis/kops/validation/validation_test.go b/pkg/apis/kops/validation/validation_test.go index 279c86f7f9..537994f9e8 100644 --- a/pkg/apis/kops/validation/validation_test.go +++ b/pkg/apis/kops/validation/validation_test.go @@ -963,7 +963,7 @@ func Test_Validate_Cilium(t *testing.T) { }, { Cilium: kops.CiliumNetworkingSpec{ - Version: "v1.13.5", + Version: "v1.14.2", Ingress: &kops.CiliumIngressSpec{ Enabled: fi.PtrTo(true), DefaultLoadBalancerMode: "bad-value", @@ -973,7 +973,7 @@ func Test_Validate_Cilium(t *testing.T) { }, { Cilium: kops.CiliumNetworkingSpec{ - Version: "v1.13.5", + Version: "v1.14.2", Ingress: &kops.CiliumIngressSpec{ Enabled: fi.PtrTo(true), DefaultLoadBalancerMode: "dedicated", @@ -982,7 +982,7 @@ func Test_Validate_Cilium(t *testing.T) { }, { Cilium: kops.CiliumNetworkingSpec{ - Version: "v1.13.5", + Version: "v1.14.2", Hubble: &kops.HubbleSpec{ Enabled: fi.PtrTo(true), }, diff --git a/pkg/model/components/cilium.go b/pkg/model/components/cilium.go index f7b8274e56..851e0b719b 100644 --- a/pkg/model/components/cilium.go +++ b/pkg/model/components/cilium.go @@ -40,7 +40,7 @@ func (b *CiliumOptionsBuilder) BuildOptions(o interface{}) error { } if c.Version == "" { - c.Version = "v1.13.5" + c.Version = "v1.14.2" } if c.EnableEndpointHealthChecking == nil { diff --git a/upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.13.yaml.template b/upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.14.yaml.template similarity index 64% rename from upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.13.yaml.template rename to upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.14.yaml.template index a6df0ca642..0f1a163613 100644 --- a/upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.13.yaml.template +++ b/upup/models/cloudup/resources/addons/networking.cilium.io/k8s-1.16-v1.14.yaml.template @@ -1,6 +1,7 @@ {{ with .Networking.Cilium }} {{ $semver := (trimPrefix "v" .Version) }} {{ $healthPort := (ternary 9879 9876 (semverCompare ">=1.11.6" $semver)) }} +{{ $operatorHealthPort := 9234 }} {{- if CiliumSecret }} apiVersion: v1 kind: Secret @@ -190,7 +191,13 @@ data: # - disabled # - vxlan (default) # - geneve - tunnel: "{{ .Tunnel }}" + {{ if eq .Tunnel "disabled" }} + # This option enables native-routing mode, in place of tunnel=disabled, now deprecated. + routing-mode: "native" + {{ else }} + routing-mode: "tunnel" + tunnel-protocol: "{{ .Tunnel }}" + {{ end }} # Name of the cluster. Only relevant when building a mesh of clusters. cluster-name: "{{ .ClusterName }}" @@ -200,6 +207,10 @@ data: cluster-id: "{{ .ClusterID }}" {{ end }} + remove-cilium-node-taints: "true" + set-cilium-node-taints: "true" + set-cilium-is-up-condition: "true" + # DNS response code for rejecting DNS requests, # available options are "nameError" and "refused" tofqdns-dns-reject-response-code: "{{ .ToFQDNsDNSRejectResponseCode }}" @@ -246,7 +257,7 @@ data: enable-host-reachable-services: "{{ .EnableHostReachableServices }}" {{ end }} enable-node-port: "{{ .EnableNodePort }}" - kube-proxy-replacement: "{{- if .EnableNodePort -}}strict{{- else -}}partial{{- end -}}" + kube-proxy-replacement: "{{- if .EnableNodePort -}}true{{- else -}}false{{- end -}}" {{ with .IPAM }} ipam: {{ . }} @@ -305,6 +316,11 @@ data: ingress-lb-annotation-prefixes: "{{ .Ingress.LoadBalancerAnnotationPrefixes }}" {{ end }} {{ end }} + + # Tell the agent to generate and write a CNI configuration file + write-cni-conf-when-ready: /host/etc/cni/net.d/05-cilium.conflist + cni-exclusive: "true" + cni-log-file: "/var/run/cilium/cilium-cni.log" {{ if WithDefaultBool .Hubble.Enabled false }} # Enable Hubble gRPC service. @@ -336,21 +352,45 @@ metadata: namespace: kube-system data: config.yaml: | - peer-service: unix:///var/run/cilium/hubble.sock + cluster-name: "{{ .ClusterName }}" + peer-service: "hubble-peer.kube-system.svc.cluster.local:443" listen-address: :4245 + gops: true + gops-port: "9893" disable-server-tls: true tls-client-cert-file: /var/lib/hubble-relay/tls/client.crt tls-client-key-file: /var/lib/hubble-relay/tls/client.key tls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt - +--- +# Source: cilium/templates/hubble/peer-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: hubble-peer + namespace: kube-system + labels: + k8s-app: cilium + app.kubernetes.io/part-of: cilium + app.kubernetes.io/name: hubble-peer +spec: + selector: + k8s-app: cilium + ports: + - name: peer-service + port: 443 + protocol: TCP + targetPort: 4244 + internalTrafficPolicy: Local {{ end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: cilium + labels: + app.kubernetes.io/part-of: cilium rules: - apiGroups: - networking.k8s.io @@ -387,6 +427,9 @@ rules: verbs: - list - watch + # This is used when validating policies in preflight. This will need to stay + # until we figure out how to avoid "get" inside the preflight, and then + # should be removed ideally. - get - apiGroups: - cilium.io @@ -396,7 +439,6 @@ rules: - ciliumclusterwideenvoyconfigs - ciliumclusterwidenetworkpolicies - ciliumegressgatewaypolicies - - ciliumegressnatpolicies - ciliumendpoints - ciliumendpointslices - ciliumenvoyconfigs @@ -404,6 +446,10 @@ rules: - ciliumlocalredirectpolicies - ciliumnetworkpolicies - ciliumnodes + - ciliumnodeconfigs + - ciliumcidrgroups + - ciliuml2announcementpolicies + - ciliumpodippools verbs: - list - watch @@ -444,6 +490,7 @@ rules: - ciliumclusterwidenetworkpolicies/status - ciliumendpoints/status - ciliumendpoints + - ciliuml2announcementpolicies/status verbs: - patch --- @@ -451,6 +498,8 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: cilium-operator + labels: + app.kubernetes.io/part-of: cilium rules: - apiGroups: - "" @@ -460,6 +509,25 @@ rules: - get - list - watch + # to automatically delete [core|kube]dns pods so that are starting to being + # managed by Cilium + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + # To remove node taints + - nodes + # To set NetworkUnavailable false on startup + - nodes/status + verbs: + - patch - apiGroups: - discovery.k8s.io resources: @@ -471,8 +539,18 @@ rules: - apiGroups: - "" resources: - - nodes + # to perform LB IP allocation for BGP + - services/status verbs: + - update + - patch +- apiGroups: + - "" + resources: + # to check apiserver connectivity + - namespaces + verbs: + - get - list - watch - apiGroups: @@ -481,8 +559,6 @@ rules: # to perform the translation of a CNP that contains `ToGroup` to its endpoints - services - endpoints - # to check apiserver connectivity - - namespaces verbs: - get - list @@ -580,7 +656,6 @@ rules: - ciliumclusterwideenvoyconfigs.cilium.io - ciliumclusterwidenetworkpolicies.cilium.io - ciliumegressgatewaypolicies.cilium.io - - ciliumegressnatpolicies.cilium.io - ciliumendpoints.cilium.io - ciliumendpointslices.cilium.io - ciliumenvoyconfigs.cilium.io @@ -589,20 +664,37 @@ rules: - ciliumlocalredirectpolicies.cilium.io - ciliumnetworkpolicies.cilium.io - ciliumnodes.cilium.io + - ciliumnodeconfigs.cilium.io + - ciliumcidrgroups.cilium.io + - ciliuml2announcementpolicies.cilium.io + - ciliumpodippools.cilium.io - apiGroups: - cilium.io resources: - ciliumloadbalancerippools + - ciliumpodippools verbs: - get - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumpodippools + verbs: + - create - apiGroups: - cilium.io resources: - ciliumloadbalancerippools/status verbs: - patch +# For cilium-operator running in HA mode. +# +# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election +# between multiple running instances. +# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less +# common and fewer objects in the cluster watch "all Leases". - apiGroups: - coordination.k8s.io resources: @@ -633,27 +725,65 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: cilium + labels: + app.kubernetes.io/part-of: cilium roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cilium subjects: - kind: ServiceAccount - name: cilium + name: "cilium" namespace: kube-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: cilium-operator + labels: + app.kubernetes.io/part-of: cilium roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cilium-operator subjects: - kind: ServiceAccount - name: cilium-operator + name: "cilium-operator" namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cilium-config-agent + namespace: kube-system + labels: + app.kubernetes.io/part-of: cilium +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch +--- +# Source: cilium/templates/cilium-agent/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cilium-config-agent + namespace: kube-system + labels: + app.kubernetes.io/part-of: cilium +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cilium-config-agent +subjects: + - kind: ServiceAccount + name: "cilium" + namespace: kube-system {{ if WithDefaultBool .Ingress.Enabled false }} --- # Source: cilium/templates/cilium-agent/role.yaml @@ -674,7 +804,6 @@ rules: - list - watch --- -# Source: cilium/templates/cilium-agent/rolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: @@ -779,6 +908,7 @@ metadata: namespace: kube-system labels: k8s-app: hubble-relay + app.kubernetes.io/part-of: cilium spec: type: ClusterIP selector: @@ -792,21 +922,32 @@ spec: apiVersion: apps/v1 kind: DaemonSet metadata: + name: cilium + namespace: kube-system labels: k8s-app: cilium kubernetes.io/cluster-service: "true" - name: cilium - namespace: kube-system + app.kubernetes.io/name: cilium-agent + app.kubernetes.io/part-of: cilium spec: selector: matchLabels: k8s-app: cilium kubernetes.io/cluster-service: "true" updateStrategy: - type: OnDelete + rollingUpdate: + maxUnavailable: 2 + type: RollingUpdate template: metadata: annotations: + # Set app AppArmor's profile to "unconfined". The value of this annotation + # can be modified as long users know which profiles they have available + # in AppArmor. + container.apparmor.security.beta.kubernetes.io/cilium-agent: "unconfined" + container.apparmor.security.beta.kubernetes.io/clean-cilium-state: "unconfined" + container.apparmor.security.beta.kubernetes.io/mount-cgroup: "unconfined" + container.apparmor.security.beta.kubernetes.io/apply-sysctl-overwrites: "unconfined" {{ if .EnablePrometheusMetrics }} # Annotation required for prometheus auto-discovery scraping # https://docs.cilium.io/en/v1.9/operations/metrics/#installation @@ -819,21 +960,17 @@ spec: labels: k8s-app: cilium kubernetes.io/cluster-service: "true" + app.kubernetes.io/name: cilium-agent + app.kubernetes.io/part-of: cilium spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/os - operator: In - values: - - linux containers: - - args: - - --config-dir=/tmp/cilium/config-map + - name: cilium-agent + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent command: - cilium-agent + args: + - --config-dir=/tmp/cilium/config-map startupProbe: httpGet: host: '{{- if IsIPv6Only -}}::1{{- else -}}127.0.0.1{{- end -}}' @@ -845,7 +982,7 @@ spec: value: "true" failureThreshold: 105 periodSeconds: 2 - successThreshold: + successThreshold: 1 livenessProbe: httpGet: host: '{{- if IsIPv6Only -}}::1{{- else -}}127.0.0.1{{- end -}}' @@ -855,14 +992,10 @@ spec: httpHeaders: - name: "brief" value: "true" - failureThreshold: 10 periodSeconds: 30 successThreshold: 1 + failureThreshold: 10 timeoutSeconds: 5 - resources: - requests: - cpu: {{ or .CPURequest "25m" }} - memory: {{ or .MemoryRequest "128Mi" }} readinessProbe: httpGet: host: '{{- if IsIPv6Only -}}::1{{- else -}}127.0.0.1{{- end -}}' @@ -872,10 +1005,9 @@ spec: httpHeaders: - name: "brief" value: "true" - failureThreshold: 3 - initialDelaySeconds: 5 periodSeconds: 30 successThreshold: 1 + failureThreshold: 3 timeoutSeconds: 5 env: - name: K8S_NODE_NAME @@ -910,21 +1042,47 @@ spec: - name: CILIUM_ENABLE_POLICY value: {{ . }} {{ end }} - image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" - imagePullPolicy: IfNotPresent lifecycle: + {{ if eq .IPAM "eni" }} postStart: exec: command: - - /cni-install.sh - - --cni-exclusive=true + - "bash" + - "-c" + - | + set -o errexit + set -o pipefail + set -o nounset + + # When running in AWS ENI mode, it's likely that 'aws-node' has + # had a chance to install SNAT iptables rules. These can result + # in dropped traffic, so we should attempt to remove them. + # We do it using a 'postStart' hook since this may need to run + # for nodes which might have already been init'ed but may still + # have dangling rules. This is safe because there are no + # dependencies on anything that is part of the startup script + # itself, and can be safely run multiple times per node (e.g. in + # case of a restart). + if [[ "$(iptables-save | grep -c AWS-SNAT-CHAIN)" != "0" ]]; + then + echo 'Deleting iptables rules created by the AWS CNI VPC plugin' + iptables-save | grep -v AWS-SNAT-CHAIN | iptables-restore + fi + echo 'Done!' + {{- end }} preStop: exec: command: - /cni-uninstall.sh - name: cilium-agent - {{ if or .EnablePrometheusMetrics .Hubble.Metrics }} + resources: + requests: + cpu: {{ or .CPURequest "25m" }} + memory: {{ or .MemoryRequest "128Mi" }} ports: + - name: peer-service + containerPort: 4244 + hostPort: 4244 + protocol: TCP {{ if .EnablePrometheusMetrics }} - containerPort: {{ .AgentPrometheusPort }} name: prometheus @@ -936,90 +1094,245 @@ spec: name: hubble-metrics protocol: TCP {{- end }} - {{ end }} terminationMessagePolicy: FallbackToLogsOnError securityContext: + {{- if ContainerdSELinuxEnabled }} + seLinuxOptions: + type: spc_t + level: s0 + {{- end }} + # Writing to /host/proc/sys/net does not work without a privileged container privileged: true + terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps - {{- if semverCompare ">=1.10.4 || ~1.9.10" $semver }} - mountPropagation: Bidirectional - {{- end }} - - mountPath: /var/run/cilium - name: cilium-run + # Unprivileged containers need to mount /proc/sys/net from the host + # to have write access + - mountPath: /host/proc/sys/net + name: host-proc-sys-net + # Unprivileged containers need to mount /proc/sys/kernel from the host + # to have write access + - mountPath: /host/proc/sys/kernel + name: host-proc-sys-kernel + - name: bpf-maps + mountPath: /sys/fs/bpf + # Unprivileged containers can't set mount propagation to bidirectional + # in this case we will mount the bpf fs from an init container that + # is privileged and set the mount propagation from host to container + # in Cilium. + mountPropagation: HostToContainer + - name: cilium-cgroup + mountPath: /run/cilium/cgroupv2 + - name: cilium-run + mountPath: /var/run/cilium {{- if not (semverCompare "~1.11.15 || ~1.12.8 || >=1.13.1" $semver) }} - mountPath: /host/opt/cni/bin name: cni-path {{- end }} - - mountPath: /host/etc/cni/net.d - name: etc-cni-netd + - name: etc-cni-netd + mountPath: /host/etc/cni/net.d {{ if .EtcdManaged }} - - mountPath: /var/lib/etcd-config - name: etcd-config-path + - name: etcd-config-path + mountPath: /var/lib/etcd-config readOnly: true - - mountPath: /var/lib/etcd-secrets - name: etcd-secrets + - name: etcd-secrets + mountPath: /var/lib/etcd-secrets readOnly: true {{ end }} - - mountPath: /var/lib/cilium/clustermesh - name: clustermesh-secrets + - name: clustermesh-secrets + mountPath: /var/lib/cilium/clustermesh readOnly: true - mountPath: /tmp/cilium/config-map name: cilium-config-path readOnly: true # Needed to be able to load kernel modules - - mountPath: /lib/modules - name: lib-modules + - name: lib-modules + mountPath: /lib/modules readOnly: true - - mountPath: /run/xtables.lock - name: xtables-lock -{{ if WithDefaultBool .Hubble.Enabled false }} - - mountPath: /var/lib/cilium/tls/hubble - name: hubble-tls - readOnly: true -{{ end }} + - name: xtables-lock + mountPath: /run/xtables.lock {{ if CiliumSecret }} - - mountPath: /etc/ipsec - name: cilium-ipsec-secrets + - name: cilium-ipsec-secrets + mountPath: /etc/ipsec {{ end }} - hostNetwork: true - initContainers: - {{- if semverCompare "~1.11.15 || ~1.12.8 || >=1.13.1" $semver }} - - command: - - /install-plugin.sh +{{ if WithDefaultBool .Hubble.Enabled false }} + - name: hubble-tls + mountPath: /var/lib/cilium/tls/hubble + readOnly: true +{{ end }} + - name: tmp + mountPath: /tmp +{{ if .Debug }} + - name: cilium-monitor image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" imagePullPolicy: IfNotPresent - name: install-cni-binaries + command: + - /bin/bash + - -c + - -- + args: + - |- + for i in {1..5}; do \ + [ -S /var/run/cilium/monitor1_2.sock ] && break || sleep 10;\ + done; \ + cilium monitor --type=agent + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: cilium-run + mountPath: /var/run/cilium +{{ end }} + initContainers: + - name: config + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + command: + - cilium + - build-config + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: CILIUM_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: KUBERNETES_SERVICE_HOST + value: "{{ APIInternalName }}" + - name: KUBERNETES_SERVICE_PORT + value: "443" + volumeMounts: + - name: tmp + mountPath: /tmp + terminationMessagePolicy: FallbackToLogsOnError + # Required to mount cgroup2 filesystem on the underlying Kubernetes node. + # We use nsenter command with host's cgroup and mount namespaces enabled. + - name: mount-cgroup + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + env: + - name: CGROUP_ROOT + value: /run/cilium/cgroupv2 + - name: BIN_PATH + value: /opt/cni/bin resources: requests: cpu: 100m - memory: 10Mi - securityContext: - capabilities: - drop: - - ALL - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: FallbackToLogsOnError + memory: 128Mi + command: + - sh + - -ec + # The statically linked Go program binary is invoked to avoid any + # dependency on utilities like sh and mount that can be missing on certain + # distros installed on the underlying host. Copy the binary to the + # same directory where we install cilium cni plugin so that exec permissions + # are available. + - | + cp /usr/bin/cilium-mount /hostbin/cilium-mount; + nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-mount" $CGROUP_ROOT; + rm /hostbin/cilium-mount volumeMounts: - - mountPath: /host/opt/cni/bin - name: cni-path - {{- end }} - - command: + - name: hostproc + mountPath: /hostproc + - name: cni-path + mountPath: /hostbin + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + {{- if ContainerdSELinuxEnabled }} + seLinuxOptions: + level: s0 + type: spc_t + {{- end }} + capabilities: + add: + # Only used for 'mount' cgroup + - SYS_ADMIN + # Used for nsenter + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL + - name: apply-sysctl-overwrites + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + env: + - name: BIN_PATH + value: /opt/cni/bin + command: + - sh + - -ec + # The statically linked Go program binary is invoked to avoid any + # dependency on utilities like sh that can be missing on certain + # distros installed on the underlying host. Copy the binary to the + # same directory where we install cilium cni plugin so that exec permissions + # are available. + - | + cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix; + nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix"; + rm /hostbin/cilium-sysctlfix + volumeMounts: + - name: hostproc + mountPath: /hostproc + - name: cni-path + mountPath: /hostbin + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + {{- if ContainerdSELinuxEnabled }} + seLinuxOptions: + level: s0 + type: spc_t + {{- end }} + capabilities: + add: + # Required in order to access host's /etc/sysctl.d dir + - SYS_ADMIN + # Used for nsenter + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL + # Mount the bpf fs if it is not mounted. We will perform this task + # from a privileged container because the mount propagation bidirectional + # only works from privileged containers. + - name: mount-bpf-fs + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + args: + - 'mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf' + command: + - /bin/bash + - -c + - -- + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + privileged: true + volumeMounts: + - name: bpf-maps + mountPath: /sys/fs/bpf + mountPropagation: Bidirectional + - name: clean-cilium-state + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + command: - /init-container.sh env: - name: CILIUM_ALL_STATE valueFrom: configMapKeyRef: - key: clean-cilium-state name: cilium-config + key: clean-cilium-state optional: true - name: CILIUM_BPF_STATE valueFrom: configMapKeyRef: - key: clean-cilium-bpf-state name: cilium-config + key: clean-cilium-bpf-state optional: true + - name: KUBERNETES_SERVICE_HOST + value: "{{ APIInternalName }}" + - name: KUBERNETES_SERVICE_PORT + value: "443" {{- if not (semverCompare ">=1.10.4 || ~1.9.10" $semver) }} - name: CILIUM_WAIT_BPF_MOUNT valueFrom: @@ -1028,88 +1341,142 @@ spec: name: cilium-config optional: true {{- end }} - image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" - imagePullPolicy: IfNotPresent - name: clean-cilium-state terminationMessagePolicy: FallbackToLogsOnError securityContext: - privileged: true + {{- if ContainerdSELinuxEnabled }} + seLinuxOptions: + level: s0 + type: spc_t + {{- end }} + capabilities: + add: + # Most of the capabilities here are the same ones used in the + # cilium-agent's container because this container can be used to + # uninstall all Cilium resources, and therefore it is likely that + # will need the same capabilities. + # Used since cilium modifies routing tables, etc... + - NET_ADMIN + # Used in iptables. Consider removing once we are iptables-free + - SYS_MODULE + # We need it for now but might not need it for >= 5.11 specially + # for the 'SYS_RESOURCE'. + # In >= 5.8 there's already BPF and PERMON capabilities + - SYS_ADMIN + # Could be an alternative for the SYS_ADMIN for the RLIMIT_NPROC + - SYS_RESOURCE + # Both PERFMON and BPF requires kernel 5.8, container runtime + # cri-o >= v1.22.0 or containerd >= v1.5.0. + # If available, SYS_ADMIN can be removed. + #- PERFMON + #- BPF + drop: + - ALL volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps + - name: bpf-maps + mountPath: /sys/fs/bpf {{- if not (semverCompare ">=1.10.4 || ~1.9.10" $semver) }} mountPropagation: HostToContainer {{- end }} - # Required to mount cgroup filesystem from the host to cilium agent pod - - mountPath: /run/cilium/cgroupv2 - name: cilium-cgroup + # Required to mount cgroup filesystem from the host to cilium agent pod + - name: cilium-cgroup + mountPath: /run/cilium/cgroupv2 mountPropagation: HostToContainer - - mountPath: /var/run/cilium - name: cilium-run + - name: cilium-run + mountPath: /var/run/cilium + {{- if semverCompare "~1.11.15 || ~1.12.8 || >=1.13.1" $semver }} + # Install the CNI binaries in an InitContainer so we don't have a writable host mount in the agent + - name: install-cni-binaries + image: "{{ or .Registry "quay.io" }}/cilium/cilium:{{ .Version }}" + imagePullPolicy: IfNotPresent + command: + - "/install-plugin.sh" resources: requests: cpu: 100m - memory: 100Mi - limits: - memory: 100Mi + memory: 10Mi + securityContext: + privileged: true + {{- if ContainerdSELinuxEnabled }} + seLinuxOptions: + level: s0 + type: spc_t + {{- end }} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: cni-path + mountPath: /host/opt/cni/bin + {{- end }} restartPolicy: Always priorityClassName: system-node-critical -{{ if ContainerdSELinuxEnabled }} - securityContext: - seLinuxOptions: - type: spc_t - level: s0 -{{ end }} - serviceAccount: cilium - serviceAccountName: cilium + serviceAccount: "cilium" + serviceAccountName: "cilium" + automountServiceAccountToken: true terminationGracePeriodSeconds: 1 + hostNetwork: true + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + k8s-app: cilium + topologyKey: kubernetes.io/hostname + nodeSelector: + kubernetes.io/os: linux tolerations: - - operator: Exists + - operator: Exists volumes: + # For sharing configuration between the "config" initContainer and the agent + - name: tmp + emptyDir: {} # To keep state between restarts / upgrades - - hostPath: + - name: cilium-run + hostPath: path: /var/run/cilium type: DirectoryOrCreate - name: cilium-run - # To keep state between restarts / upgrades for bpf maps - - hostPath: + # To keep state between restarts / upgrades for bpf maps + - name: bpf-maps + hostPath: path: /sys/fs/bpf type: DirectoryOrCreate - name: bpf-maps + # To mount cgroup2 filesystem on the host + - name: hostproc + hostPath: + path: /proc + type: Directory + # To keep state between restarts / upgrades for cgroup2 filesystem + - name: cilium-cgroup + hostPath: + path: /run/cilium/cgroupv2 + type: DirectoryOrCreate # To install cilium cni plugin in the host - - hostPath: + - name: cni-path + hostPath: path: /opt/cni/bin type: DirectoryOrCreate - name: cni-path - # To keep state between restarts / upgrades for cgroup2 filesystem - - hostPath: - path: /run/cilium/cgroupv2 - type: Directory - name: cilium-cgroup # To install cilium cni configuration in the host - - hostPath: + - name: etc-cni-netd + hostPath: path: /etc/cni/net.d type: DirectoryOrCreate - name: etc-cni-netd # To be able to load kernel modules - - hostPath: + - name: lib-modules + hostPath: path: /lib/modules - name: lib-modules # To access iptables concurrently with other processes (e.g. kube-proxy) - - hostPath: + - name: xtables-lock + hostPath: path: /run/xtables.lock type: FileOrCreate - name: xtables-lock - # To read the clustermesh configuration {{- if .EtcdManaged }} # To read the etcd config stored in config maps - - configMap: - defaultMode: 420 + - name: etcd-config-path + configMap: + name: cilium-config + # note: the leading zero means this number is in octal representation: do not remove it + defaultMode: 0400 items: - key: etcd-config path: etcd.config - name: cilium-config - name: etcd-config-path # To read the Cilium etcd secrets in case the user might want to use TLS - name: etcd-secrets hostPath: @@ -1117,24 +1484,52 @@ spec: type: Directory {{- end }} - name: clustermesh-secrets - secret: - defaultMode: 420 - optional: true - secretName: cilium-clustermesh - # To read the configuration from the config map + projected: + # note: the leading zero means this number is in octal representation: do not remove it + defaultMode: 0400 + sources: + - secret: + name: cilium-clustermesh + optional: true + # note: items are not explicitly listed here, since the entries of this secret + # depend on the peers configured, and that would cause a restart of all agents + # at every addition/removal. Leaving the field empty makes each secret entry + # to be automatically projected into the volume as a file whose name is the key. + - secret: + name: clustermesh-apiserver-remote-cert + optional: true + items: + - key: tls.key + path: common-etcd-client.key + - key: tls.crt + path: common-etcd-client.crt + - key: ca.crt + path: common-etcd-client-ca.crt - configMap: name: cilium-config name: cilium-config-path -{{ if CiliumSecret }} + {{- if CiliumSecret }} - name: cilium-ipsec-secrets secret: secretName: cilium-ipsec-keys -{{ end }} + {{- end }} + - name: host-proc-sys-net + hostPath: + path: /proc/sys/net + type: Directory + - name: host-proc-sys-kernel + hostPath: + path: /proc/sys/kernel + type: Directory {{ if WithDefaultBool .Hubble.Enabled false }} - name: hubble-tls - secret: - secretName: hubble-server-certs - optional: true + projected: + # note: the leading zero means this number is in octal representation: do not remove it + defaultMode: 0400 + sources: + - secret: + name: hubble-server-certs + optional: true {{ end }} --- apiVersion: apps/v1 @@ -1143,6 +1538,8 @@ metadata: labels: io.cilium/app: operator name: cilium-operator + app.kubernetes.io/name: cilium-operator + app.kubernetes.io/part-of: cilium name: cilium-operator namespace: kube-system spec: @@ -1165,6 +1562,8 @@ spec: labels: io.cilium/app: operator name: cilium-operator + app.kubernetes.io/part-of: cilium + app.kubernetes.io/name: cilium-operator spec: nodeSelector: null affinity: @@ -1177,13 +1576,26 @@ spec: - matchExpressions: - key: node-role.kubernetes.io/master operator: Exists + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + io.cilium/app: operator + topologyKey: kubernetes.io/hostname + nodeSelector: + kubernetes.io/os: linux + tolerations: + - operator: Exists containers: - - args: + - name: cilium-operator + image: "{{ or .Registry "quay.io" }}/cilium/operator:{{ .Version }}" + imagePullPolicy: IfNotPresent + command: + - cilium-operator + args: - "--config-dir=/tmp/cilium/config-map" - "--debug=$(CILIUM_DEBUG)" - "--eni-tags={{ CloudLabels }}" - command: - - cilium-operator env: - name: K8S_NODE_NAME valueFrom: @@ -1205,9 +1617,6 @@ spec: value: "{{ APIInternalName }}" - name: KUBERNETES_SERVICE_PORT value: "443" - image: "{{ or .Registry "quay.io" }}/cilium/operator:{{ .Version }}" - imagePullPolicy: IfNotPresent - name: cilium-operator {{ if .EnablePrometheusMetrics }} ports: - containerPort: 6942 @@ -1221,13 +1630,23 @@ spec: memory: {{ or .MemoryRequest "128Mi" }} livenessProbe: httpGet: - host: '127.0.0.1' + host: '{{- if IsIPv6Only -}}::1{{- else -}}127.0.0.1{{- end -}}' path: /healthz - port: 9234 + port: {{ $operatorHealthPort }} scheme: HTTP initialDelaySeconds: 60 periodSeconds: 10 timeoutSeconds: 3 + readinessProbe: + httpGet: + host: '{{- if IsIPv6Only -}}::1{{- else -}}127.0.0.1{{- end -}}' + path: /healthz + port: {{ $operatorHealthPort }} + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 5 terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp/cilium/config-map @@ -1296,9 +1715,11 @@ apiVersion: apps/v1 kind: Deployment metadata: name: hubble-relay + namespace: kube-system labels: k8s-app: hubble-relay - namespace: kube-system + app.kubernetes.io/name: hubble-relay + app.kubernetes.io/part-of: cilium spec: replicas: 2 selector: @@ -1312,21 +1733,29 @@ spec: metadata: labels: k8s-app: hubble-relay + app.kubernetes.io/name: hubble-relay + app.kubernetes.io/part-of: cilium spec: + securityContext: + fsGroup: 65532 containers: - name: hubble-relay image: "{{ or .Registry "quay.io" }}/cilium/hubble-relay:{{ .Version }}" imagePullPolicy: IfNotPresent + securityContext: + capabilities: + drop: + - ALL + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 command: - hubble-relay args: - - "serve" - - "--peer-service=unix:///var/run/cilium/hubble.sock" - - "--listen-address=:4245" - env: - # unfortunately, the addon CAs use only CN - - name: GODEBUG - value: x509ignoreCN=0 + - serve + {{- if .Debug }} + - '--debug' + {{- end }} ports: - name: grpc containerPort: 4245 @@ -1336,46 +1765,51 @@ spec: livenessProbe: tcpSocket: port: grpc - terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - - mountPath: /var/run/cilium - name: hubble-sock-dir + - name: config + mountPath: /etc/hubble-relay readOnly: true - - mountPath: /etc/hubble-relay - name: config - readOnly: true - - mountPath: /var/lib/hubble-relay/tls - name: tls + - name: tls + mountPath: /var/lib/hubble-relay/tls readOnly: true + terminationMessagePolicy: FallbackToLogsOnError restartPolicy: Always - serviceAccount: hubble-relay - serviceAccountName: hubble-relay - terminationGracePeriodSeconds: 0 + serviceAccount: "hubble-relay" + serviceAccountName: "hubble-relay" + terminationGracePeriodSeconds: 1 + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + k8s-app: cilium + topologyKey: kubernetes.io/hostname + nodeSelector: + kubernetes.io/os: linux topologySpreadConstraints: - maxSkew: 1 - topologyKey: "topology.kubernetes.io/zone" + topologyKey: topology.kubernetes.io/zone whenUnsatisfiable: ScheduleAnyway labelSelector: matchLabels: k8s-app: hubble-relay - maxSkew: 1 - topologyKey: "kubernetes.io/hostname" + topologyKey: kubernetes.io/hostname whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: k8s-app: hubble-relay volumes: - - hostPath: - path: /var/run/cilium - type: Directory - name: hubble-sock-dir - - configMap: + - name: config + configMap: name: hubble-relay-config items: - key: config.yaml path: config.yaml - name: config - - projected: + - name: tls + projected: + # note: the leading zero means this number is in octal representation: do not remove it + defaultMode: 0400 sources: - secret: name: hubble-relay-client-certs @@ -1386,13 +1820,14 @@ spec: path: client.key - key: ca.crt path: hubble-server-ca.crt - name: tls --- apiVersion: cert-manager.io/v1 kind: Certificate metadata: labels: k8s-app: cilium + app.kubernetes.io/name: cilium-agent + app.kubernetes.io/part-of: cilium name: hubble-server-certs namespace: kube-system spec: @@ -1408,6 +1843,8 @@ kind: Certificate metadata: labels: k8s-app: cilium + app.kubernetes.io/name: cilium-agent + app.kubernetes.io/part-of: cilium name: hubble-relay-client-certs namespace: kube-system spec: @@ -1430,6 +1867,8 @@ metadata: labels: io.cilium/app: operator name: cilium-operator + app.kubernetes.io/name: cilium-operator + app.kubernetes.io/part-of: cilium spec: selector: matchLabels: diff --git a/upup/pkg/fi/cloudup/bootstrapchannelbuilder/cilium.go b/upup/pkg/fi/cloudup/bootstrapchannelbuilder/cilium.go index 4bc5fdb8a1..7ba66ee6c9 100644 --- a/upup/pkg/fi/cloudup/bootstrapchannelbuilder/cilium.go +++ b/upup/pkg/fi/cloudup/bootstrapchannelbuilder/cilium.go @@ -35,7 +35,7 @@ func addCiliumAddon(b *BootstrapChannelBuilder, addons *AddonList) error { klog.Infof("found cilium (%q) in addons; won't use builtin", key) } else { id := "k8s-1.16" - location := key + "/" + id + "-v1.13.yaml" + location := key + "/" + id + "-v1.14.yaml" addon := &api.AddonSpec{ Name: fi.PtrTo(key),