From 2e1a024b9f64c2dcee53d9c5d6e78c3daad50060 Mon Sep 17 00:00:00 2001 From: Otto Yiu Date: Thu, 25 May 2017 15:51:06 -0700 Subject: [PATCH] Add ability to set cross-subnet mode in Calico This gives the ability to a user to enable cross-subnet mode in Calico. Also introduces a new addon that, full disclosure, I wrote. [ottoyiu/k8s-ec2-srcdst](https://github.com/ottoyiu/k8s-ec2-srcdst) --- docs/networking.md | 46 ++++++++ pkg/apis/kops/networking.go | 1 + pkg/apis/kops/v1alpha1/networking.go | 1 + .../kops/v1alpha1/zz_generated.conversion.go | 2 + pkg/apis/kops/v1alpha2/networking.go | 1 + .../kops/v1alpha2/zz_generated.conversion.go | 2 + .../k8s-1.6.yaml.template | 105 +++++++++++++++++- .../pre-k8s-1.6.yaml.template | 54 ++++++++- 8 files changed, 210 insertions(+), 2 deletions(-) diff --git a/docs/networking.md b/docs/networking.md index 2c4c4f3bc8..a6bdfb0686 100644 --- a/docs/networking.md +++ b/docs/networking.md @@ -121,6 +121,52 @@ $ kops create cluster \ The above will deploy a daemonset installation which requires K8s 1.4.x or above. +##### Enable Cross-Subnet mode in Calico (AWS only) +Calico [since 2.1] supports a new option for IP-in-IP mode where traffic is only encapsulated +when it’s destined to subnets with intermediate infrastructure lacking Calico route awareness +– for example, across heterogeneous public clouds or on AWS where traffic is crossing availability zones/ regions. + +With this mode, IP-in-IP encapsulation is only performed selectively. This provides better performance in AWS +multi-AZ deployments, and in general when deploying on networks where pools of nodes with L2 connectivity +are connected via a router. + +Reference: [Calico 2.1 Release Notes](https://www.projectcalico.org/project-calico-2-1-released/) + +Note that Calico by default, routes between nodes within a subnet are distributed using a full node-to-node BGP mesh. +Each node automatically sets up a BGP peering with every other node within the same L2 network. +This full node-to-node mesh per L2 network has its scaling challenges for larger scale deployments. +BGP route reflectors can be used as a replacement to a full mesh, and is useful for scaling up a cluster. +The setup of BGP route reflectors is currently out of the scope of kops. + +Read more here: [BGP route reflectors](http://docs.projectcalico.org/v2.2/usage/routereflector/calico-routereflector) + + +To enable this mode in a cluster, with Calico as the CNI and Network Policy provider, you must edit the cluster after the previous `kops create ...` command. + +`kops edit cluster` will show you a block like this: + +``` + networking: + calico: {} +``` + +You will need to change that block, and add an additional field, to look like this: + +``` + networking: + calico: + crossSubnet: true +``` + +This `crossSubnet` field can also be defined within a cluster specification file, and the entire cluster can be create by running: +`kops create -f k8s-cluster.example.com.yaml` + +In the case of AWS, EC2 instances have source/destination checks enabled by default. +When you enable cross-subnet mode in kops, an addon controller ([k8s-ec2-srcdst](https://github.com/ottoyiu/k8s-ec2-srcdst)) +will be deployed as a Pod (which will be scheduled on one of the masters) to facilitate the disabling of said source/destination address checks. +Only the masters have the IAM policy (`ec2:*`) to allow k8s-ec2-srcdst to execute `ec2:ModifyInstanceAttribute`. + + #### More information about Calico For Calico specific documentation please visit the [Calico Docs](http://docs.projectcalico.org/v2.0/getting-started/kubernetes/). diff --git a/pkg/apis/kops/networking.go b/pkg/apis/kops/networking.go index d882d0f495..44cc95586c 100644 --- a/pkg/apis/kops/networking.go +++ b/pkg/apis/kops/networking.go @@ -62,6 +62,7 @@ type FlannelNetworkingSpec struct { // Calico declares that we want Calico networking type CalicoNetworkingSpec struct { + CrossSubnet bool `json:"crossSubnet,omitempty"` // Enables Calico's cross-subnet mode when set to true } // Canal declares that we want Canal networking diff --git a/pkg/apis/kops/v1alpha1/networking.go b/pkg/apis/kops/v1alpha1/networking.go index 2e9cd15a34..e1fa902d24 100644 --- a/pkg/apis/kops/v1alpha1/networking.go +++ b/pkg/apis/kops/v1alpha1/networking.go @@ -62,6 +62,7 @@ type FlannelNetworkingSpec struct { // Calico declares that we want Calico networking type CalicoNetworkingSpec struct { + CrossSubnet bool `json:"crossSubnet,omitempty"` // Enables Calico's cross-subnet mode when set to true } // Canal declares that we want Canal networking diff --git a/pkg/apis/kops/v1alpha1/zz_generated.conversion.go b/pkg/apis/kops/v1alpha1/zz_generated.conversion.go index 36db24cce9..dd7ddd8028 100644 --- a/pkg/apis/kops/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha1/zz_generated.conversion.go @@ -253,6 +253,7 @@ func Convert_kops_CNINetworkingSpec_To_v1alpha1_CNINetworkingSpec(in *kops.CNINe } func autoConvert_v1alpha1_CalicoNetworkingSpec_To_kops_CalicoNetworkingSpec(in *CalicoNetworkingSpec, out *kops.CalicoNetworkingSpec, s conversion.Scope) error { + out.CrossSubnet = in.CrossSubnet return nil } @@ -261,6 +262,7 @@ func Convert_v1alpha1_CalicoNetworkingSpec_To_kops_CalicoNetworkingSpec(in *Cali } func autoConvert_kops_CalicoNetworkingSpec_To_v1alpha1_CalicoNetworkingSpec(in *kops.CalicoNetworkingSpec, out *CalicoNetworkingSpec, s conversion.Scope) error { + out.CrossSubnet = in.CrossSubnet return nil } diff --git a/pkg/apis/kops/v1alpha2/networking.go b/pkg/apis/kops/v1alpha2/networking.go index 20725f39d6..c24c917a79 100644 --- a/pkg/apis/kops/v1alpha2/networking.go +++ b/pkg/apis/kops/v1alpha2/networking.go @@ -62,6 +62,7 @@ type FlannelNetworkingSpec struct { // Calico declares that we want Calico networking type CalicoNetworkingSpec struct { + CrossSubnet bool `json:"crossSubnet,omitempty"` // Enables Calico's cross-subnet mode when set to true } // Canal declares that we want Canal networking diff --git a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go index 3e6119e331..0dce94c859 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go @@ -279,6 +279,7 @@ func Convert_kops_CNINetworkingSpec_To_v1alpha2_CNINetworkingSpec(in *kops.CNINe } func autoConvert_v1alpha2_CalicoNetworkingSpec_To_kops_CalicoNetworkingSpec(in *CalicoNetworkingSpec, out *kops.CalicoNetworkingSpec, s conversion.Scope) error { + out.CrossSubnet = in.CrossSubnet return nil } @@ -287,6 +288,7 @@ func Convert_v1alpha2_CalicoNetworkingSpec_To_kops_CalicoNetworkingSpec(in *Cali } func autoConvert_kops_CalicoNetworkingSpec_To_v1alpha2_CalicoNetworkingSpec(in *kops.CalicoNetworkingSpec, out *CalicoNetworkingSpec, s conversion.Scope) error { + out.CrossSubnet = in.CrossSubnet return nil } diff --git a/upup/models/cloudup/resources/addons/networking.projectcalico.org/k8s-1.6.yaml.template b/upup/models/cloudup/resources/addons/networking.projectcalico.org/k8s-1.6.yaml.template index 95e833413e..630641d708 100644 --- a/upup/models/cloudup/resources/addons/networking.projectcalico.org/k8s-1.6.yaml.template +++ b/upup/models/cloudup/resources/addons/networking.projectcalico.org/k8s-1.6.yaml.template @@ -146,7 +146,7 @@ spec: - name: CALICO_IPV4POOL_CIDR value: "{{ .NonMasqueradeCIDR }}" - name: CALICO_IPV4POOL_IPIP - value: "always" + value: "{{- if and (eq .CloudProvider "aws") (.Networking.Calico.CrossSubnet) -}}cross-subnet{{- else -}}always{{- end -}}" # Disable file logging so `kubectl logs` works. - name: CALICO_DISABLE_FILE_LOGGING value: "true" @@ -261,3 +261,106 @@ spec: # kubernetes.default to the correct service clusterIP. - name: CONFIGURE_ETC_HOSTS value: "true" + +{{ if and (eq .CloudProvider "aws") (.Networking.Calico.CrossSubnet) -}} +# This manifest installs the k8s-ec2-srcdst container, which disables +# src/dst ip checks to allow BGP to function for calico for hosts within subnets +# This only applies for AWS environments. +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: k8s-ec2-srcdst + labels: + role.kubernetes.io/networking: "1" +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - update + - patch + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8s-ec2-srcdst + namespace: kube-system + labels: + role.kubernetes.io/networking: "1" +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: k8s-ec2-srcdst + labels: + role.kubernetes.io/networking: "1" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: k8s-ec2-srcdst +subjects: +- kind: ServiceAccount + name: k8s-ec2-srcdst + namespace: kube-system + +--- + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: k8s-ec2-srcdst + namespace: kube-system + labels: + k8s-app: k8s-ec2-srcdst + role.kubernetes.io/networking: "1" +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: k8s-ec2-srcdst + template: + metadata: + labels: + k8s-app: k8s-ec2-srcdst + role.kubernetes.io/networking: "1" + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + hostNetwork: true + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + serviceAccountName: k8s-ec2-srcdst + containers: + - image: ottoyiu/k8s-ec2-srcdst:v0.0.3 + name: k8s-ec2-srcdst + resources: + requests: + cpu: 10m + memory: 64Mi + env: + - name: AWS_REGION + value: {{ Region }} + volumeMounts: + - name: ssl-certs + mountPath: "/etc/ssl/certs/ca-certificates.crt" + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-certificates.crt" + nodeSelector: + node-role.kubernetes.io/master: "" +{{- end -}} diff --git a/upup/models/cloudup/resources/addons/networking.projectcalico.org/pre-k8s-1.6.yaml.template b/upup/models/cloudup/resources/addons/networking.projectcalico.org/pre-k8s-1.6.yaml.template index a1324fdbc5..bdd99f9745 100644 --- a/upup/models/cloudup/resources/addons/networking.projectcalico.org/pre-k8s-1.6.yaml.template +++ b/upup/models/cloudup/resources/addons/networking.projectcalico.org/pre-k8s-1.6.yaml.template @@ -89,7 +89,7 @@ spec: - name: CALICO_IPV4POOL_CIDR value: "{{ .NonMasqueradeCIDR }}" - name: CALICO_IPV4POOL_IPIP - value: "always" + value: "{{- if and (eq .CloudProvider "aws") (.Networking.Calico.CrossSubnet) -}}cross-subnet{{- else -}}always{{- end -}}" # Auto-detect the BGP IP address. - name: IP value: "" @@ -199,3 +199,55 @@ spec: - name: CONFIGURE_ETC_HOSTS value: "true" +{{ if and (eq .CloudProvider "aws") (.Networking.Calico.CrossSubnet) -}} +--- +# This manifest installs the k8s-ec2-srcdst container, which disables +# src/dst ip checks to allow BGP to function for calico for hosts within subnets +# This only applies for AWS environments. +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: k8s-ec2-srcdst + namespace: kube-system + labels: + k8s-app: k8s-ec2-srcdst + role.kubernetes.io/networking: "1" +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: k8s-ec2-srcdst + template: + metadata: + labels: + k8s-app: k8s-ec2-srcdst + role.kubernetes.io/networking: "1" + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + scheduler.alpha.kubernetes.io/tolerations: | + [{"key": "dedicated", "value": "master", "effect": "NoSchedule" }, + {"key":"CriticalAddonsOnly", "operator":"Exists"}] + spec: + hostNetwork: true + containers: + - image: ottoyiu/k8s-ec2-srcdst:v0.0.3 + name: k8s-ec2-srcdst + resources: + requests: + cpu: 10m + memory: 64Mi + env: + - name: AWS_REGION + value: {{ Region }} + volumeMounts: + - name: ssl-certs + mountPath: "/etc/ssl/certs/ca-certificates.crt" + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-certificates.crt" + nodeSelector: + kubernetes.io/role: master +{{- end -}}