diff --git a/cockroachdb/README.md b/cockroachdb/README.md new file mode 100644 index 00000000..d886f55c --- /dev/null +++ b/cockroachdb/README.md @@ -0,0 +1,97 @@ + + + + +WARNING +WARNING +WARNING +WARNING +WARNING + +

PLEASE NOTE: This document applies to the HEAD of the source tree

+ +If you are using a released version of Kubernetes, you should +refer to the docs that go with that version. + +Documentation for other releases can be found at +[releases.k8s.io](http://releases.k8s.io). + +-- + + + + + +# CockroachDB on Kubernetes as a PetSet + +This example deploys [CockroachDB](https://cockroachlabs.com) on Kubernetes as +a PetSet. CockroachDB is a distributed, scalable NewSQL database. Please see +[the homepage](https://cockroachlabs.com) and the +[documentation](https://www.cockroachlabs.com/docs/) for details. + +## Limitations + +### PetSet limitations + +Standard PetSet limitations apply: There is currently no possibility to use +node-local storage (outside of single-node tests), and so there is likely +a performance hit associated with running CockroachDB on some external storage. +Note that CockroachDB already does replication and thus should not be deployed on +a persistent volume which already replicates internally. +High-performance use cases on a private Kubernetes cluster should consider +a DaemonSet deployment. + +### Recovery after persistent storage failure + +A persistent storage failure (e.g. losing the hard drive) is gracefully handled +by CockroachDB as long as enough replicas survive (two out of three by +default). Due to the bootstrapping in this deployment, a storage failure of the +first node is special in that the administrator must manually prepopulate the +"new" storage medium by running an instance of CockroachDB with the `--join` +parameter. If this is not done, the first node will bootstrap a new cluster, +which will lead to a lot of trouble. + +### Dynamic provisioning + +The deployment is written for a use case in which dynamic provisioning is +available. When that is not the case, the persistent volume claims need +to be created manually. See [minikube.sh](minikube.sh) for the necessary +steps. + +## Testing locally on minikube + +Follow the steps in [minikube.sh](minikube.sh) (or simply run that file). + +## Simulating failures + +When all (or enough) nodes are up, simulate a failure like this: + +```shell +kubectl exec cockroachdb-0 -- /bin/bash -c "while true; do kill 1; done" +``` + +On one of the other pods, run `./cockroach sql --host $(hostname)` and use +(mostly) Postgres-flavor SQL. The example runs with three-fold replication, +so it can tolerate one failure of any given node at a time. +Note also that there is a brief period of time immediately after the creation +of the cluster during which the three-fold replication is established, and +during which killing a node may lead to unavailability. + +There is also a [demo script](demo.sh). + +## Scaling up or down + +Simply edit the PetSet (but note that you may need to create a new persistent +volume claim first). If you ran `minikube.sh`, there's a spare volume so you +can immediately scale up by one. Convince yourself that the new node +immediately serves reads and writes. + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/cockroachdb/README.md?pixel)]() + diff --git a/cockroachdb/cockroachdb-petset.yaml b/cockroachdb/cockroachdb-petset.yaml new file mode 100644 index 00000000..3f333d63 --- /dev/null +++ b/cockroachdb/cockroachdb-petset.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + name: cockroachdb + labels: + app: cockroachdb +spec: + ports: + # The main port, served by gRPC, serves Postgres-flavor SQL, internode + # traffic and the cli. + - port: 26257 + targetPort: 26257 + name: grpc + # The secondary port serves the UI as well as health and debug endpoints. + - port: 8080 + targetPort: 8080 + name: http + clusterIP: None + selector: + app: cockroachdb +--- +apiVersion: apps/v1alpha1 +kind: PetSet +metadata: + name: cockroachdb +spec: + serviceName: "cockroachdb" + replicas: 5 + template: + metadata: + labels: + app: cockroachdb + annotations: + pod.alpha.kubernetes.io/initialized: "true" + spec: + containers: + - name: cockroachdb + # Runs the master branch. Not recommended for production, but since + # CockroachDB is in Beta, you don't want to run it in production + # anyway. See + # https://hub.docker.com/r/cockroachdb/cockroach/tags/ + # if you prefer to run a beta release. + image: cockroachdb/cockroach + imagePullPolicy: IfNotPresent + ports: + - containerPort: 26257 + name: grpc + - containerPort: 8080 + name: http + volumeMounts: + - name: datadir + mountPath: /cockroach/cockroach-data + command: + - "/bin/bash" + - "-ecx" + - | + # The use of qualified `hostname -f` is crucial: + # Other nodes aren't able to look up the unqualified hostname. + CRARGS=("start" "--logtostderr" "--insecure" "--host" "$(hostname -f)") + # TODO(tschottdorf): really want to use an init container to do + # the bootstrapping. The idea is that the container would know + # whether it's on the first node and could check whether there's + # already a data directory. If not, it would bootstrap the cluster. + # We will need some version of `cockroach init` back for this to + # work. For now, just do the same in a shell snippet. + # Of course this isn't without danger - if node0 loses its data, + # upon restarting it will simply bootstrap a new cluster and smack + # it into our existing cluster. + # There are likely ways out. For example, the init container could + # query the kubernetes API and see whether any other nodes are + # around, etc. Or, of course, the admin can pre-seed the lost + # volume somehow (and in that case we should provide a better way, + # for example a marker file). + if [ ! "$(hostname)" == "cockroachdb-0" ] || \ + [ -e "/cockroach/cockroach-data/COCKROACHDB_VERSION" ] + then + CRARGS+=("--join" "cockroachdb") + fi + /cockroach/cockroach ${CRARGS[*]} + volumes: + - name: datadir + persistentVolumeClaim: + claimName: datadir + volumeClaimTemplates: + - metadata: + name: datadir + annotations: + volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi diff --git a/cockroachdb/demo.sh b/cockroachdb/demo.sh new file mode 100755 index 00000000..8b3031fb --- /dev/null +++ b/cockroachdb/demo.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Copyright 2016 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +function sql() { + # TODO(knz): Why does the more idiomatic read from stdin not produce any + # output? + kubectl exec "cockroachdb-${1}" -- /cockroach/cockroach sql \ + --host "cockroachdb-${1}.cockroachdb" \ + -e "$(cat /dev/stdin)" +} + +function kill() { + ! kubectl exec -t "cockroachdb-${1}" -- /bin/bash -c "while true; do kill 1; done" &> /dev/null +} + +# Create database on second node (idempotently for convenience). +cat <