mirror of https://github.com/kubeflow/examples.git
Add a job to download the data to PVC. (#97)
* This is the first step to doing training and serving using a PV as opposed to GCS. * This will make the sample easier to run anyhere and in particular on Katacoda. * This currently would work as follows User creates a PVC ks apply ${ENV} -c data-pvc User runs a K8s job to download the data to PVC ks apply ${ENV} -c data-downloader In subsequent PRs we will update the train and serve steps to load the model from the PVC as opposed to GCS. Related to #91
This commit is contained in:
parent
1a4f4dc1ea
commit
34d6f8809d
|
@ -0,0 +1,74 @@
|
|||
// Run a job to download the data to a persistent volume.
|
||||
//
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["data-pvc"];
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
|
||||
local script = importstr "download_data.sh";
|
||||
|
||||
local scriptConfigMap = {
|
||||
apiVersion: "v1",
|
||||
kind: "ConfigMap",
|
||||
metadata: {
|
||||
name: "downloader",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
|
||||
data: {
|
||||
"download_data.sh": script,
|
||||
},
|
||||
};
|
||||
|
||||
local downLoader = {
|
||||
apiVersion: "batch/v1",
|
||||
kind: "Job",
|
||||
metadata: {
|
||||
name: "download-data",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
backoffLimit: 4,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"/bin/ash",
|
||||
"/scripts/download_data.sh",
|
||||
],
|
||||
image: "busybox",
|
||||
name: "downloader",
|
||||
volumeMounts: [
|
||||
{
|
||||
name: "script",
|
||||
mountPath: "/scripts",
|
||||
},
|
||||
{
|
||||
name: "data",
|
||||
mountPath: "/data",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
restartPolicy: "Never",
|
||||
volumes: [
|
||||
{
|
||||
name: "script",
|
||||
configMap: {
|
||||
name: "downloader",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "data",
|
||||
persistentVolumeClaim: {
|
||||
claimName: "data-pvc",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([downLoader, scriptConfigMap]))
|
|
@ -0,0 +1,28 @@
|
|||
// Create a PVC to store the data.
|
||||
// This PVC can be used if you don't have access to an object store
|
||||
// but your cluster has a default storage class
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["data-pvc"];
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
|
||||
local pvc = {
|
||||
apiVersion: "v1",
|
||||
kind: "PersistentVolumeClaim",
|
||||
metadata: {
|
||||
name: "data-pvc",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
accessModes: [
|
||||
"ReadWriteOnce",
|
||||
],
|
||||
resources: {
|
||||
requests: {
|
||||
storage: "10Gi",
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([pvc]))
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Script to download the data
|
||||
set -ex
|
||||
|
||||
DATA_DIR=/data
|
||||
|
||||
mkdir -p ${DATA_DIR}
|
||||
|
||||
wget --directory-prefix=${DATA_DIR} \
|
||||
https://storage.googleapis.com/kubeflow-examples/github-issue-summarization-data/github-issues.zip
|
||||
|
||||
unzip -d ${DATA_DIR} ${DATA_DIR}/github-issues.zip
|
|
@ -6,6 +6,9 @@
|
|||
components: {
|
||||
// Component-level parameters, defined initially from 'ks prototype use ...'
|
||||
// Each object below should correspond to a component in the components/ directory
|
||||
"data-pvc": {
|
||||
},
|
||||
|
||||
"kubeflow-core": {
|
||||
cloud: "null",
|
||||
disks: "null",
|
||||
|
|
Loading…
Reference in New Issue