From 5102d9115e6c7eb8b32bbf8ded382a515c93cb72 Mon Sep 17 00:00:00 2001
From: Zach Loafman <zml@google.com>
Date: Sat, 14 Nov 2015 16:44:27 -0800
Subject: [PATCH] Zeppelin: Compress the image by doing the build on a separate
 image

Pull the tar through to the image we need it on.

Addresses #17231
---
 spark/images/Makefile                  | 14 +++++--
 spark/images/zeppelin-build/Dockerfile | 54 ++++++++++++++++++++++++++
 spark/images/zeppelin/.gitignore       |  1 +
 spark/images/zeppelin/Dockerfile       | 48 ++---------------------
 spark/zeppelin-controller.yaml         |  2 +-
 5 files changed, 71 insertions(+), 48 deletions(-)
 create mode 100644 spark/images/zeppelin-build/Dockerfile
 create mode 100644 spark/images/zeppelin/.gitignore

diff --git a/spark/images/Makefile b/spark/images/Makefile
index 66364924..a115ff93 100644
--- a/spark/images/Makefile
+++ b/spark/images/Makefile
@@ -1,6 +1,6 @@
 all: push
 push: push-spark push-zeppelin
-.PHONY: push push-spark push-zeppelin spark zeppelin
+.PHONY: push push-spark push-zeppelin spark zeppelin zeppelin-build
 
 # To bump the Spark version, bump the version in base/Dockerfile, bump
 # the version in zeppelin/Dockerfile, bump this tag and reset to
@@ -12,7 +12,7 @@ TAG = 1.5.1_v2
 
 # To bump the Zeppelin version, bump the version in
 # zeppelin/Dockerfile and bump this tag and reset to v1.
-ZEPPELIN_TAG = v0.5.5_v1
+ZEPPELIN_TAG = v0.5.5_v2
 
 spark:
 	docker build -t gcr.io/google_containers/spark-base base
@@ -24,7 +24,15 @@ spark:
 	docker build -t gcr.io/google_containers/spark-driver driver
 	docker tag gcr.io/google_containers/spark-driver gcr.io/google_containers/spark-driver:$(TAG)
 
-zeppelin:
+zeppelin-build:
+	docker build -t gcr.io/google_containers/zeppelin-build zeppelin-build
+	docker tag -f gcr.io/google_containers/zeppelin-build gcr.io/google_containers/zeppelin-build:$(ZEPPELIN_TAG)
+
+zeppelin: zeppelin-build
+	docker create --name=zeppelin-build-tmp gcr.io/google_containers/zeppelin-build:$(ZEPPELIN_TAG)
+	docker cp zeppelin-build-tmp:/zeppelin.tgz zeppelin
+	docker rm -f zeppelin-build-tmp
+
 	docker build -t gcr.io/google_containers/zeppelin zeppelin
 	docker tag -f gcr.io/google_containers/zeppelin gcr.io/google_containers/zeppelin:$(ZEPPELIN_TAG)
 
diff --git a/spark/images/zeppelin-build/Dockerfile b/spark/images/zeppelin-build/Dockerfile
new file mode 100644
index 00000000..01f9c909
--- /dev/null
+++ b/spark/images/zeppelin-build/Dockerfile
@@ -0,0 +1,54 @@
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the Zeppelin *build* image. It spits out a /zeppelin.tgz
+# alone, which is then copied out by the Makefile and used in the
+# actual Zeppelin image.
+#
+# Based heavily on
+# https://github.com/dylanmei/docker-zeppelin/blob/master/Dockerfile
+# (which is similar to many others out there), but rebased onto maven
+# image.
+
+FROM maven:3.3.3-jdk-8
+
+ENV ZEPPELIN_TAG  v0.5.5
+ENV SPARK_MINOR   1.5
+ENV SPARK_PATCH   1
+ENV SPARK_VER     ${SPARK_MINOR}.${SPARK_PATCH}
+ENV HADOOP_MINOR  2.6
+ENV HADOOP_PATCH  1
+ENV HADOOP_VER    ${HADOOP_MINOR}.${HADOOP_PATCH}
+
+# libfontconfig is a workaround for
+# https://github.com/karma-runner/karma/issues/1270, which caused a
+# build break similar to
+# https://www.mail-archive.com/users@zeppelin.incubator.apache.org/msg01586.html
+
+RUN apt-get update \
+  && apt-get install -y net-tools build-essential git wget unzip python python-setuptools python-dev python-numpy libfontconfig
+
+RUN git clone https://github.com/apache/incubator-zeppelin.git --branch ${ZEPPELIN_TAG} /opt/zeppelin
+RUN cd /opt/zeppelin && \
+  mvn clean package \
+    -Pbuild-distr \
+    -Pspark-${SPARK_MINOR} -Dspark.version=${SPARK_VER} \
+    -Phadoop-${HADOOP_MINOR} -Dhadoop.version=${HADOOP_VER} \
+    -Ppyspark \
+    -DskipTests && \
+  echo "Successfully built Zeppelin"
+
+RUN cd /opt/zeppelin/zeppelin-distribution/target/zeppelin-* && \
+  mv zeppelin-* zeppelin && \
+  tar cvzf /zeppelin.tgz zeppelin
diff --git a/spark/images/zeppelin/.gitignore b/spark/images/zeppelin/.gitignore
new file mode 100644
index 00000000..9fae9b4c
--- /dev/null
+++ b/spark/images/zeppelin/.gitignore
@@ -0,0 +1 @@
+zeppelin.tgz
diff --git a/spark/images/zeppelin/Dockerfile b/spark/images/zeppelin/Dockerfile
index 57c8d6d2..8b08d872 100644
--- a/spark/images/zeppelin/Dockerfile
+++ b/spark/images/zeppelin/Dockerfile
@@ -12,53 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Based heavily on
-# https://github.com/dylanmei/docker-zeppelin/blob/master/Dockerfile
-# (which is similar to many others out there), but rebased onto maven
-# image.
-#
-# This image is a composition of the official docker-maven
-# Docker image from https://github.com/carlossg/docker-maven/ and
-# spark-base.
+# This image relies on the zeppelin-build image to get the zeppelin
+# binaries built, and the Makefile to put it in this directory.
 
 FROM gcr.io/google_containers/spark-base:latest
 
-ENV ZEPPELIN_TAG  v0.5.5
-ENV MAVEN_VERSION 3.3.3
-ENV SPARK_MINOR   1.5
-ENV SPARK_PATCH   1
-ENV SPARK_VER     ${SPARK_MINOR}.${SPARK_PATCH}
-ENV HADOOP_MINOR  2.6
-ENV HADOOP_PATCH  1
-ENV HADOOP_VER    ${HADOOP_MINOR}.${HADOOP_PATCH}
-
-RUN curl -fsSL http://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz | tar xzf - -C /usr/share \
-  && mv /usr/share/apache-maven-${MAVEN_VERSION} /usr/share/maven \
-  && ln -s /usr/share/maven/bin/mvn /usr/bin/mvn
-
-ENV MAVEN_HOME /usr/share/maven
-
-# libfontconfig is a workaround for
-# https://github.com/karma-runner/karma/issues/1270, which caused a
-# build break similar to
-# https://www.mail-archive.com/users@zeppelin.incubator.apache.org/msg01586.html
-
-RUN apt-get update \
-  && apt-get install -y net-tools build-essential git wget unzip python python-setuptools python-dev python-numpy libfontconfig \
-  && apt-get clean \
-  && rm -rf /var/lib/apt/lists/*
-
-RUN git clone https://github.com/apache/incubator-zeppelin.git --branch ${ZEPPELIN_TAG} /opt/zeppelin
-RUN cd /opt/zeppelin && \
-  mvn clean package \
-    -Pspark-${SPARK_MINOR} -Dspark.version=${SPARK_VER} \
-    -Phadoop-${HADOOP_MINOR} -Dhadoop.version=${HADOOP_VER} \
-    -Ppyspark \
-    -DskipTests && \
-  rm -rf /root/.m2 && \
-  rm -rf /root/.npm && \
-  echo "Successfully built Zeppelin"
-
+# Expands to /opt/zeppelin/
+ADD zeppelin.tgz /opt/
 ADD zeppelin-log4j.properties /opt/zeppelin/conf/log4j.properties
 ADD zeppelin-env.sh /opt/zeppelin/conf/zeppelin-env.sh
 ADD docker-zeppelin.sh /opt/zeppelin/bin/docker-zeppelin.sh
diff --git a/spark/zeppelin-controller.yaml b/spark/zeppelin-controller.yaml
index 9ef4a367..aac24a3e 100644
--- a/spark/zeppelin-controller.yaml
+++ b/spark/zeppelin-controller.yaml
@@ -13,7 +13,7 @@ spec:
     spec:
       containers:
         - name: zeppelin
-          image: gcr.io/google_containers/zeppelin:v0.5.5_v1
+          image: gcr.io/google_containers/zeppelin:v0.5.5_v2
           ports:
             - containerPort: 8080
           resources: