Add support for Linkerd 2.13

In Linkerd 2.13 the Prometheus instance in
the `linkerd-viz` namespace is now locked behind an
[_AuthorizationPolicy_](https://github.com/linkerd/linkerd2/blob/stable-2.13.1/viz/charts/linkerd-viz/templates/prometheus-policy.yaml)
that only allows access to the `metrics-api` _ServiceAccount_.

This adds an extra _AuthorizationPolicy_ to authorize the `flagger`
_ServiceAccount_. It's created by default when using Kustomize, but
needs to be opted-in when using Helm via the new
`linkerdAuthPolicy.create` value. This also implies that the Flagger
workload has to be injected by the Linkerd proxy, and that can't happen
in the same `linkerd` namespace where the control plane lives, so we're
moving Flagger into the new injected `flagger-system` namespace.

The `namespace` field in `kustomization.yml` was resetting the namespace
for the new _AuthorizationPolicy_ resource, so that gets restored back
  to `linkerd-viz` using a `patchesJson6902` entry. A better way to do
  this would have been to use the `unsetOnly` field in a
  _NamespaceTransformer_ (see kubernetes-sigs/kustomize#4708) but for
  the life of me I couldn't make that work...

Signed-off-by: Alejandro Pedraza <alejandro@buoyant.io>
This commit is contained in:
Alejandro Pedraza 2023-04-24 10:26:58 -05:00
parent 9d4ebd9ddd
commit 7242fa7d5c
No known key found for this signature in database
GPG Key ID: E3AB7750B0DC32DF
11 changed files with 102 additions and 18 deletions

View File

@ -40,10 +40,13 @@ $ helm upgrade -i flagger flagger/flagger \
To install Flagger for **Linkerd** (requires Linkerd Viz extension):
```console
# Note that linkerdAuthPolicy.create=true is only required for Linkerd 2.12 and
# later
$ helm upgrade -i flagger flagger/flagger \
--namespace=linkerd \
--namespace=flagger-system \
--set meshProvider=linkerd \
--set metricsServer=http://prometheus.linkerd-viz:9090
--set metricsServer=http://prometheus.linkerd-viz:9090 \
--set linkerdAuthPolicy.create=true
```
To install Flagger for **AWS App Mesh**:

View File

@ -0,0 +1,16 @@
{{- if .Values.linkerdAuthPolicy.create }}
apiVersion: policy.linkerd.io/v1alpha1
kind: AuthorizationPolicy
metadata:
namespace: {{ .Values.linkerdAuthPolicy.namespace }}
name: prometheus-admin-flagger
spec:
targetRef:
group: policy.linkerd.io
kind: Server
name: prometheus-admin
requiredAuthenticationRefs:
- kind: ServiceAccount
name: {{ template "flagger.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
{{- end }}

View File

@ -16,6 +16,7 @@ podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
appmesh.k8s.aws/sidecarInjectorWebhook: disabled
linkerd.io/inject: enabled
# priority class name for pod priority configuration
podPriorityClassName: ""
@ -123,6 +124,13 @@ crd:
# crd.create: `true` if custom resource definitions should be created
create: false
linkerdAuthPolicy:
# linkerdAuthPolicy.create: Whether to create an AuthorizationPolicy in
# linkerd viz' namespace to allow flagger to reach viz' prometheus service
create: false
# linkerdAuthPolicy.namespace: linkerd-viz' namespace
namespace: linkerd-viz
nameOverride: ""
fullnameOverride: ""

View File

@ -0,0 +1,14 @@
apiVersion: policy.linkerd.io/v1alpha1
kind: AuthorizationPolicy
metadata:
namespace: linkerd-viz
name: prometheus-admin-flagger
spec:
targetRef:
group: policy.linkerd.io
kind: Server
name: prometheus-admin
requiredAuthenticationRefs:
- kind: ServiceAccount
name: flagger
namespace: flagger-system

View File

@ -1,5 +1,18 @@
namespace: linkerd
namespace: flagger-system
bases:
- ../base/flagger/
- namespace.yaml
- authorizationpolicy.yaml
patchesStrategicMerge:
- patch.yaml
# restore overridden namespace field
patchesJson6902:
- target:
group: policy.linkerd.io
version: v1alpha1
kind: AuthorizationPolicy
name: prometheus-admin-flagger
patch: |-
- op: replace
path: /metadata/namespace
value: linkerd-viz

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
annotations:
linkerd.io/inject: enabled
name: flagger-system

View File

@ -24,4 +24,4 @@ roleRef:
subjects:
- kind: ServiceAccount
name: flagger
namespace: linkerd
namespace: flagger-system

View File

@ -2,24 +2,44 @@
set -o errexit
LINKERD_VER="stable-2.11.2"
LINKERD_VER="stable-2.13.2"
LINKERD_SMI_VER="0.2.0"
REPO_ROOT=$(git rev-parse --show-toplevel)
mkdir -p ${REPO_ROOT}/bin
curl -SsL https://github.com/linkerd/linkerd2/releases/download/${LINKERD_VER}/linkerd2-cli-${LINKERD_VER}-linux-amd64 > ${REPO_ROOT}/bin/linkerd
chmod +x ${REPO_ROOT}/bin/linkerd
curl -SsL https://github.com/linkerd/linkerd-smi/releases/download/v${LINKERD_SMI_VER}/linkerd-smi-${LINKERD_SMI_VER}-linux-amd64 > ${REPO_ROOT}/bin/linkerd-smi
chmod +x ${REPO_ROOT}/bin/linkerd-smi
echo ">>> Installing Linkerd ${LINKERD_VER}"
${REPO_ROOT}/bin/linkerd install --crds | kubectl apply -f -
${REPO_ROOT}/bin/linkerd install | kubectl apply -f -
${REPO_ROOT}/bin/linkerd check
echo ">>> Installing Linkerd SMI"
${REPO_ROOT}/bin/linkerd-smi install | kubectl apply -f -
${REPO_ROOT}/bin/linkerd-smi check
echo ">>> Installing Linkerd Viz"
${REPO_ROOT}/bin/linkerd viz install | kubectl apply -f -
kubectl -n linkerd-viz rollout status deploy/prometheus
${REPO_ROOT}/bin/linkerd viz check
# Scale down Deployments we don't need as they take up CPU and block other
# pods from being scheduled later.
kubectl -n linkerd-viz scale deploy web --replicas=0
kubectl -n linkerd-viz scale deploy tap --replicas=0
kubectl -n linkerd-viz scale deploy tap-injector --replicas=0
kubectl -n linkerd-viz scale deploy metrics-api --replicas=0
# Delete this APIService as it blocks the deletion of the test ns later
# (since we delete the linkerd-viz/tap Deployment which in turns makes the
# APIService unavailable due to missing Endpoints).
kubectl delete apiservices v1alpha1.tap.linkerd.io
echo '>>> Installing Flagger'
kubectl apply -k ${REPO_ROOT}/kustomize/linkerd
kubectl -n linkerd set image deployment/flagger flagger=test/flagger:latest
kubectl -n linkerd rollout status deployment/flagger
kubectl -n flagger-system set image deployment/flagger flagger=test/flagger:latest
kubectl -n flagger-system rollout status deployment/flagger

View File

@ -8,7 +8,11 @@ DIR="$(cd "$(dirname "$0")" && pwd)"
"$DIR"/install.sh
"$REPO_ROOT"/test/workloads/init.sh
# Delete Daemonset as it eats up precious CPU requests and we don't need it anyway.
kubectl -n test delete ds podinfo-ds
"$DIR"/test-canary.sh
"$REPO_ROOT"/test/workloads/init.sh
# Delete Daemonset as it eats up precious CPU requests and we don't need it anyway.
kubectl -n test delete ds podinfo-ds
"$DIR"/test-steps.sh

View File

@ -122,7 +122,7 @@ until ${ok}; do
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
@ -153,10 +153,10 @@ ok=false
until ${ok}; do
kubectl -n test describe deployment/podinfo-primary | grep '6.0.1' && ok=true || ok=false
sleep 10
kubectl -n linkerd logs deployment/flagger --tail 1
kubectl -n flagger-system logs deployment/flagger flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
@ -171,7 +171,7 @@ until ${ok}; do
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
@ -232,13 +232,13 @@ ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Failed' && ok=true || ok=false
sleep 10
kubectl -n linkerd logs deployment/flagger --tail 1
kubectl -n flagger-system logs deployment/flagger flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
done
echo '✔ Canary rollback test passed'
echo '✔ Canary rollback test passed'

View File

@ -50,7 +50,7 @@ until ${ok}; do
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
@ -68,10 +68,10 @@ ok=false
until ${ok}; do
kubectl -n test describe deployment/podinfo-primary | grep '6.0.1' && ok=true || ok=false
sleep 10
kubectl -n linkerd logs deployment/flagger --tail 1
kubectl -n flagger-system logs deployment/flagger flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi
@ -86,7 +86,7 @@ until ${ok}; do
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n linkerd logs deployment/flagger
kubectl -n flagger-system logs deployment/flagger flagger
echo "No more retries left"
exit 1
fi