diff --git a/examples_test.go b/examples_test.go index bdad9c9b..d5e42f52 100644 --- a/examples_test.go +++ b/examples_test.go @@ -317,6 +317,7 @@ func TestExampleObjectSchemas(t *testing.T) { "spark-master-service": &api.Service{}, "spark-master": &api.Pod{}, "spark-worker-controller": &api.ReplicationController{}, + "spark-driver": &api.Pod{}, }, "../examples/storm": { "storm-nimbus-service": &api.Service{}, diff --git a/spark/README.md b/spark/README.md index 42944e71..cefc2762 100644 --- a/spark/README.md +++ b/spark/README.md @@ -144,45 +144,36 @@ $ kubectl logs spark-master 15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM ``` -## Step Three: Do something with the cluster +## Step Three: Start your Spark driver to launch jobs on your Spark cluster -Get the address and port of the Master service. +The Spark driver is used to launch jobs into Spark cluster. You can read more about it in +[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html). -```sh -$ kubectl get service spark-master -NAME LABELS SELECTOR IP(S) PORT(S) -spark-master name=spark-master name=spark-master 10.0.204.187 7077/TCP +```shell +$ kubectl create -f examples/spark/spark-driver.json ``` -SSH to one of your cluster nodes. On GCE/GKE you can either use [Developers Console](https://console.developers.google.com) -(more details [here](https://cloud.google.com/compute/docs/ssh-in-browser)) -or run `gcloud compute ssh ` where the name can be taken from `kubectl get nodes` -(more details [here](https://cloud.google.com/compute/docs/gcloud-compute/#connecting)). +The Spark driver needs the Master service to be running. -``` -$ kubectl get nodes -NAME LABELS STATUS -kubernetes-minion-5jvu kubernetes.io/hostname=kubernetes-minion-5jvu Ready -kubernetes-minion-6fbi kubernetes.io/hostname=kubernetes-minion-6fbi Ready -kubernetes-minion-8y2v kubernetes.io/hostname=kubernetes-minion-8y2v Ready -kubernetes-minion-h0tr kubernetes.io/hostname=kubernetes-minion-h0tr Ready +### Check to see if the driver is running -$ gcloud compute ssh kubernetes-minion-5jvu --zone=us-central1-b -Linux kubernetes-minion-5jvu 3.16.0-0.bpo.4-amd64 #1 SMP Debian 3.16.7-ckt9-3~deb8u1~bpo70+1 (2015-04-27) x86_64 - -=== GCE Kubernetes node setup complete === - -me@kubernetes-minion-5jvu:~$ +```shell +$ kubectl get pods +NAME READY REASON RESTARTS AGE +[...] +spark-master 1/1 Running 0 14m +spark-driver 1/1 Running 0 10m ``` -Once logged in run spark-base image. Inside of the image there is a script -that sets up the environment based on the provided IP and port of the Master. +## Step Four: Do something with the cluster + +Use the kubectl exec to connect to Spark driver ``` -cluster-node $ sudo docker run -it gcr.io/google_containers/spark-base -root@f12a6fec45ce:/# . /setup_client.sh 10.0.204.187 7077 -root@f12a6fec45ce:/# pyspark -Python 2.7.9 (default, Mar 1 2015, 12:57:24) +$ kubectl exec spark-driver -it bash +root@spark-driver:/# +root@spark-driver:/# pyspark +Python 2.7.9 (default, Mar 1 2015, 12:57:24) [GCC 4.9.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. 15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable @@ -201,9 +192,9 @@ SparkContext available as sc, HiveContext available as sqlContext. ## Result -You now have services, replication controllers, and pods for the Spark master and Spark workers. -You can take this example to the next step and start using the Apache Spark cluster -you just created, see [Spark documentation](https://spark.apache.org/documentation.html) +You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers. +You can take this example to the next step and start using the Apache Spark cluster +you just created, see [Spark documentation](https://spark.apache.org/documentation.html) for more information. ## tl;dr @@ -216,6 +207,8 @@ Make sure the Master Pod is running (use: ```kubectl get pods```). ```kubectl create -f spark-worker-controller.json``` +```kubectl create -f spark-driver.json``` + [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]() diff --git a/spark/images/driver/Dockerfile b/spark/images/driver/Dockerfile new file mode 100644 index 00000000..cfb1dad7 --- /dev/null +++ b/spark/images/driver/Dockerfile @@ -0,0 +1,4 @@ +FROM gcr.io/google_containers/spark-base +ADD start.sh /start.sh +ADD log4j.properties /opt/spark/conf/log4j.properties +CMD ["/start.sh"] diff --git a/spark/images/driver/README.md b/spark/images/driver/README.md new file mode 100644 index 00000000..2a36c4ee --- /dev/null +++ b/spark/images/driver/README.md @@ -0,0 +1,37 @@ + + + + +WARNING +WARNING +WARNING +WARNING +WARNING + +

PLEASE NOTE: This document applies to the HEAD of the source tree

+ +If you are using a released version of Kubernetes, you should +refer to the docs that go with that version. + + +The latest 1.0.x release of this document can be found +[here](http://releases.k8s.io/release-1.0/examples/spark/images/driver/README.md). + +Documentation for other releases can be found at +[releases.k8s.io](http://releases.k8s.io). + +-- + + + + + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/images/driver/README.md?pixel)]() + diff --git a/spark/images/driver/start.sh b/spark/images/driver/start.sh new file mode 100755 index 00000000..13be0699 --- /dev/null +++ b/spark/images/driver/start.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright 2015 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts +echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh +echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh + +while true; do + sleep 100 +done diff --git a/spark/spark-driver.json b/spark/spark-driver.json new file mode 100644 index 00000000..ee695eea --- /dev/null +++ b/spark/spark-driver.json @@ -0,0 +1,23 @@ +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "spark-driver", + "labels": { + "name": "spark-driver" + } + }, + "spec": { + "containers": [ + { + "name": "spark-driver", + "image": "gurvin/spark-driver", + "resources": { + "limits": { + "cpu": "100m" + } + } + } + ] + } +}