mirror of https://github.com/kubeflow/examples.git
Add a job to download the data to PVC. (#97)
* This is the first step to doing training and serving using a PV as opposed to GCS. * This will make the sample easier to run anyhere and in particular on Katacoda. * This currently would work as follows User creates a PVC ks apply ${ENV} -c data-pvc User runs a K8s job to download the data to PVC ks apply ${ENV} -c data-downloader In subsequent PRs we will update the train and serve steps to load the model from the PVC as opposed to GCS. Related to #91
This commit is contained in:
parent
1a4f4dc1ea
commit
34d6f8809d
|
@ -0,0 +1,74 @@
|
||||||
|
// Run a job to download the data to a persistent volume.
|
||||||
|
//
|
||||||
|
local env = std.extVar("__ksonnet/environments");
|
||||||
|
local params = std.extVar("__ksonnet/params").components["data-pvc"];
|
||||||
|
local k = import "k.libsonnet";
|
||||||
|
|
||||||
|
|
||||||
|
local script = importstr "download_data.sh";
|
||||||
|
|
||||||
|
local scriptConfigMap = {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "ConfigMap",
|
||||||
|
metadata: {
|
||||||
|
name: "downloader",
|
||||||
|
namespace: env.namespace,
|
||||||
|
},
|
||||||
|
|
||||||
|
data: {
|
||||||
|
"download_data.sh": script,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
local downLoader = {
|
||||||
|
apiVersion: "batch/v1",
|
||||||
|
kind: "Job",
|
||||||
|
metadata: {
|
||||||
|
name: "download-data",
|
||||||
|
namespace: env.namespace,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
backoffLimit: 4,
|
||||||
|
template: {
|
||||||
|
spec: {
|
||||||
|
containers: [
|
||||||
|
{
|
||||||
|
command: [
|
||||||
|
"/bin/ash",
|
||||||
|
"/scripts/download_data.sh",
|
||||||
|
],
|
||||||
|
image: "busybox",
|
||||||
|
name: "downloader",
|
||||||
|
volumeMounts: [
|
||||||
|
{
|
||||||
|
name: "script",
|
||||||
|
mountPath: "/scripts",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "data",
|
||||||
|
mountPath: "/data",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
restartPolicy: "Never",
|
||||||
|
volumes: [
|
||||||
|
{
|
||||||
|
name: "script",
|
||||||
|
configMap: {
|
||||||
|
name: "downloader",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "data",
|
||||||
|
persistentVolumeClaim: {
|
||||||
|
claimName: "data-pvc",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
std.prune(k.core.v1.list.new([downLoader, scriptConfigMap]))
|
|
@ -0,0 +1,28 @@
|
||||||
|
// Create a PVC to store the data.
|
||||||
|
// This PVC can be used if you don't have access to an object store
|
||||||
|
// but your cluster has a default storage class
|
||||||
|
local env = std.extVar("__ksonnet/environments");
|
||||||
|
local params = std.extVar("__ksonnet/params").components["data-pvc"];
|
||||||
|
local k = import "k.libsonnet";
|
||||||
|
|
||||||
|
|
||||||
|
local pvc = {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "PersistentVolumeClaim",
|
||||||
|
metadata: {
|
||||||
|
name: "data-pvc",
|
||||||
|
namespace: env.namespace,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
accessModes: [
|
||||||
|
"ReadWriteOnce",
|
||||||
|
],
|
||||||
|
resources: {
|
||||||
|
requests: {
|
||||||
|
storage: "10Gi",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
std.prune(k.core.v1.list.new([pvc]))
|
|
@ -0,0 +1,13 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Script to download the data
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
DATA_DIR=/data
|
||||||
|
|
||||||
|
mkdir -p ${DATA_DIR}
|
||||||
|
|
||||||
|
wget --directory-prefix=${DATA_DIR} \
|
||||||
|
https://storage.googleapis.com/kubeflow-examples/github-issue-summarization-data/github-issues.zip
|
||||||
|
|
||||||
|
unzip -d ${DATA_DIR} ${DATA_DIR}/github-issues.zip
|
|
@ -6,6 +6,9 @@
|
||||||
components: {
|
components: {
|
||||||
// Component-level parameters, defined initially from 'ks prototype use ...'
|
// Component-level parameters, defined initially from 'ks prototype use ...'
|
||||||
// Each object below should correspond to a component in the components/ directory
|
// Each object below should correspond to a component in the components/ directory
|
||||||
|
"data-pvc": {
|
||||||
|
},
|
||||||
|
|
||||||
"kubeflow-core": {
|
"kubeflow-core": {
|
||||||
cloud: "null",
|
cloud: "null",
|
||||||
disks: "null",
|
disks: "null",
|
||||||
|
|
Loading…
Reference in New Issue