Add a job to download the data to PVC. (#97)

* This is the first step to doing training and serving using a PV as opposed
  to GCS.

* This will make the sample easier to run anyhere and in particular on Katacoda.

* This currently would work as follows

User creates a PVC

ks apply ${ENV} -c data-pvc

User runs a K8s job to download the data to PVC

ks apply ${ENV} -c data-downloader

In subsequent PRs we will update the train and serve steps to load the
model from the PVC as opposed to GCS.

Related to #91
This commit is contained in:
Jeremy Lewi 2018-04-25 10:36:02 -07:00 committed by k8s-ci-robot
parent 1a4f4dc1ea
commit 34d6f8809d
4 changed files with 118 additions and 0 deletions

View File

@ -0,0 +1,74 @@
// Run a job to download the data to a persistent volume.
//
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["data-pvc"];
local k = import "k.libsonnet";
local script = importstr "download_data.sh";
local scriptConfigMap = {
apiVersion: "v1",
kind: "ConfigMap",
metadata: {
name: "downloader",
namespace: env.namespace,
},
data: {
"download_data.sh": script,
},
};
local downLoader = {
apiVersion: "batch/v1",
kind: "Job",
metadata: {
name: "download-data",
namespace: env.namespace,
},
spec: {
backoffLimit: 4,
template: {
spec: {
containers: [
{
command: [
"/bin/ash",
"/scripts/download_data.sh",
],
image: "busybox",
name: "downloader",
volumeMounts: [
{
name: "script",
mountPath: "/scripts",
},
{
name: "data",
mountPath: "/data",
},
],
},
],
restartPolicy: "Never",
volumes: [
{
name: "script",
configMap: {
name: "downloader",
},
},
{
name: "data",
persistentVolumeClaim: {
claimName: "data-pvc",
},
},
],
},
},
},
};
std.prune(k.core.v1.list.new([downLoader, scriptConfigMap]))

View File

@ -0,0 +1,28 @@
// Create a PVC to store the data.
// This PVC can be used if you don't have access to an object store
// but your cluster has a default storage class
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["data-pvc"];
local k = import "k.libsonnet";
local pvc = {
apiVersion: "v1",
kind: "PersistentVolumeClaim",
metadata: {
name: "data-pvc",
namespace: env.namespace,
},
spec: {
accessModes: [
"ReadWriteOnce",
],
resources: {
requests: {
storage: "10Gi",
},
},
},
};
std.prune(k.core.v1.list.new([pvc]))

View File

@ -0,0 +1,13 @@
#!/bin/bash
#
# Script to download the data
set -ex
DATA_DIR=/data
mkdir -p ${DATA_DIR}
wget --directory-prefix=${DATA_DIR} \
https://storage.googleapis.com/kubeflow-examples/github-issue-summarization-data/github-issues.zip
unzip -d ${DATA_DIR} ${DATA_DIR}/github-issues.zip

View File

@ -6,6 +6,9 @@
components: {
// Component-level parameters, defined initially from 'ks prototype use ...'
// Each object below should correspond to a component in the components/ directory
"data-pvc": {
},
"kubeflow-core": {
cloud: "null",
disks: "null",