mirror of https://github.com/kubeflow/examples.git
GIS E2E test verify the TFJob runs successfully (#456)
* Create a test for submitting the TFJob for the GitHub issue summarization example. * This test needs to be run manually right now. In a follow on PR we will integrate it into CI. * We use the image built from Dockerfile.estimator because that is the image we are running train_test.py in. * Note: The current version of the code now requires Python3 (I think this is due to an earlier PR which refactored the code into a shared implementation for using TF estimator and not TF estimator). * Create a TFJob component for TFJob v1beta1; this is the version in KF 0.4. TFJob component * Upgrade to v1beta to work with 0.4 * Update command line arguments to match the versions in the current code * input & output are now single parameters rather then separate parameters for bucket and name * change default input to a CSV file because the current version of the code doesn't handle unzipping it. * Use ks_util from kubeflow/testing * Address comments.
This commit is contained in:
parent
959d072e68
commit
1cc4550b7d
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1alpha2"+: {
|
||||
output_model_gcs_bucket: 'kubecon-gh-demo',
|
||||
output_model_gcs_path: 'gh-demo/20181008/output',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1beta1"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'kubeflow',
|
||||
num_epochs: 1,
|
||||
sample_size: 10,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
outputGCSPath: 'gs://cloud-ml-dev_jlewi/gh-t2t-out/temp',
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params {
|
||||
components+: {},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1alpha2"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'jlewi',
|
||||
},
|
||||
"tfjob-v1beta1"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'kubeflow',
|
||||
num_epochs: 1,
|
||||
sample_size: 10,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1beta1"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'kubeflow',
|
||||
num_epochs: 1,
|
||||
sample_size: 10,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1beta1"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'jlewi',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1alpha2"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'jlewi',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
{
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
// uncomment if you reference ksonnet-lib
|
||||
// local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named \"nginx-deployment\", you might have something like:\n")
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components+: {
|
||||
"tfjob-v1beta1"+: {
|
||||
name: 'jlewi-gis-test',
|
||||
namespace: 'kubeflow',
|
||||
num_epochs: 1,
|
||||
sample_size: 10,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
local k8s = import 'k8s.libsonnet';
|
||||
local fn = {
|
||||
mapContainers(f):: {
|
||||
local podContainers = super.spec.template.spec.containers,
|
||||
spec+: {
|
||||
template+: {
|
||||
spec+: {
|
||||
containers: std.map(f, podContainers),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
mapContainersWithName(names, f)::
|
||||
local nameSet = if std.type(names) == 'array' then std.set(names) else std.set([names]);
|
||||
local inNameSet(name) = std.length(std.setInter(nameSet, std.set([name]))) > 0;
|
||||
|
||||
self.mapContainers(function(c) if std.objectHas(c, 'name') && inNameSet(c.name) then f(c) else c),
|
||||
};
|
||||
|
||||
k8s + {
|
||||
apps:: k8s.apps + {
|
||||
v1:: k8s.apps.v1 + {
|
||||
daemonSet:: k8s.apps.v1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.apps.v1beta1 + {
|
||||
deployment:: k8s.apps.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta2:: k8s.apps.v1beta2 + {
|
||||
daemonSet:: k8s.apps.v1beta2.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1beta2.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1beta2.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta2.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
batch:: k8s.batch + {
|
||||
v1:: k8s.batch.v1 + {
|
||||
job:: k8s.batch.v1.job + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.batch.v1beta1 + {
|
||||
cronJob:: k8s.batch.v1beta1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v2alpha1:: k8s.batch.v2alpha1 + {
|
||||
cronJob:: k8s.batch.v2alpha1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
core:: k8s.core + {
|
||||
v1:: k8s.core.v1 + {
|
||||
list:: {
|
||||
new(items):: {
|
||||
apiVersion: 'v1',
|
||||
} + {
|
||||
kind: 'List',
|
||||
} + self.items(items),
|
||||
items(items):: if std.type(items) == 'array' then { items+: items } else { items+: [items] },
|
||||
},
|
||||
pod:: k8s.core.v1.pod + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
podTemplate:: k8s.core.v1.podTemplate + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicationController:: k8s.core.v1.replicationController + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
extensions:: k8s.extensions + {
|
||||
v1beta1:: k8s.extensions.v1beta1 + {
|
||||
daemonSet:: k8s.extensions.v1beta1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.extensions.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.extensions.v1beta1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,129 +0,0 @@
|
|||
local k8s = import 'k8s.libsonnet';
|
||||
local fn = {
|
||||
mapContainers(f):: {
|
||||
local podContainers = super.spec.template.spec.containers,
|
||||
spec+: {
|
||||
template+: {
|
||||
spec+: {
|
||||
containers: std.map(f, podContainers),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
mapContainersWithName(names, f)::
|
||||
local nameSet = if std.type(names) == 'array' then std.set(names) else std.set([names]);
|
||||
local inNameSet(name) = std.length(std.setInter(nameSet, std.set([name]))) > 0;
|
||||
|
||||
self.mapContainers(function(c) if std.objectHas(c, 'name') && inNameSet(c.name) then f(c) else c),
|
||||
};
|
||||
|
||||
k8s + {
|
||||
apps:: k8s.apps + {
|
||||
v1:: k8s.apps.v1 + {
|
||||
daemonSet:: k8s.apps.v1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.apps.v1beta1 + {
|
||||
deployment:: k8s.apps.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta2:: k8s.apps.v1beta2 + {
|
||||
daemonSet:: k8s.apps.v1beta2.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1beta2.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1beta2.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta2.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
batch:: k8s.batch + {
|
||||
v1:: k8s.batch.v1 + {
|
||||
job:: k8s.batch.v1.job + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.batch.v1beta1 + {
|
||||
cronJob:: k8s.batch.v1beta1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v2alpha1:: k8s.batch.v2alpha1 + {
|
||||
cronJob:: k8s.batch.v2alpha1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
core:: k8s.core + {
|
||||
v1:: k8s.core.v1 + {
|
||||
list:: {
|
||||
new(items):: {
|
||||
apiVersion: 'v1',
|
||||
} + {
|
||||
kind: 'List',
|
||||
} + self.items(items),
|
||||
items(items):: if std.type(items) == 'array' then { items+: items } else { items+: [items] },
|
||||
},
|
||||
pod:: k8s.core.v1.pod + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
podTemplate:: k8s.core.v1.podTemplate + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicationController:: k8s.core.v1.replicationController + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
extensions:: k8s.extensions + {
|
||||
v1beta1:: k8s.extensions.v1beta1 + {
|
||||
daemonSet:: k8s.extensions.v1beta1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.extensions.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.extensions.v1beta1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,129 +0,0 @@
|
|||
local k8s = import 'k8s.libsonnet';
|
||||
local fn = {
|
||||
mapContainers(f):: {
|
||||
local podContainers = super.spec.template.spec.containers,
|
||||
spec+: {
|
||||
template+: {
|
||||
spec+: {
|
||||
containers: std.map(f, podContainers),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
mapContainersWithName(names, f)::
|
||||
local nameSet = if std.type(names) == 'array' then std.set(names) else std.set([names]);
|
||||
local inNameSet(name) = std.length(std.setInter(nameSet, std.set([name]))) > 0;
|
||||
|
||||
self.mapContainers(function(c) if std.objectHas(c, 'name') && inNameSet(c.name) then f(c) else c),
|
||||
};
|
||||
|
||||
k8s + {
|
||||
apps:: k8s.apps + {
|
||||
v1:: k8s.apps.v1 + {
|
||||
daemonSet:: k8s.apps.v1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.apps.v1beta1 + {
|
||||
deployment:: k8s.apps.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta1.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta2:: k8s.apps.v1beta2 + {
|
||||
daemonSet:: k8s.apps.v1beta2.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.apps.v1beta2.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.apps.v1beta2.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
statefulSet:: k8s.apps.v1beta2.statefulSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
batch:: k8s.batch + {
|
||||
v1:: k8s.batch.v1 + {
|
||||
job:: k8s.batch.v1.job + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v1beta1:: k8s.batch.v1beta1 + {
|
||||
cronJob:: k8s.batch.v1beta1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
v2alpha1:: k8s.batch.v2alpha1 + {
|
||||
cronJob:: k8s.batch.v2alpha1.cronJob + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
core:: k8s.core + {
|
||||
v1:: k8s.core.v1 + {
|
||||
list:: {
|
||||
new(items):: {
|
||||
apiVersion: 'v1',
|
||||
} + {
|
||||
kind: 'List',
|
||||
} + self.items(items),
|
||||
items(items):: if std.type(items) == 'array' then { items+: items } else { items+: [items] },
|
||||
},
|
||||
pod:: k8s.core.v1.pod + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
podTemplate:: k8s.core.v1.podTemplate + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicationController:: k8s.core.v1.replicationController + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
extensions:: k8s.extensions + {
|
||||
v1beta1:: k8s.extensions.v1beta1 + {
|
||||
daemonSet:: k8s.extensions.v1beta1.daemonSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
deployment:: k8s.extensions.v1beta1.deployment + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
replicaSet:: k8s.extensions.v1beta1.replicaSet + {
|
||||
mapContainers(f):: fn.mapContainers(f),
|
||||
mapContainersWithName(names, f):: fn.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -25,12 +25,11 @@
|
|||
},
|
||||
"tfjob": {
|
||||
name: "tfjob-issue-summarization",
|
||||
image: "gcr.io/kubeflow-examples/tf-job-issue-summarization:v20180629-v0.1-2-g98ed4b4-dirty-182929",
|
||||
input_data_gcs_bucket: "kubeflow-examples",
|
||||
input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
|
||||
output_model_gcs_bucket: "kubeflow-examples",
|
||||
output_model_gcs_path: "github-issue-summarization-data",
|
||||
image: "gcr.io/kubeflow-examples/github-issue-summarization/trainer-estimator:v20181229-v0.2-131-g662c666-dirty-312900",
|
||||
input_data: "gs://kubeflow-examples/github-issue-summarization-data/github_issues_sample.csv",
|
||||
output_model: "/tmp/model.h5",
|
||||
sample_size: "100000",
|
||||
num_epochs: "7",
|
||||
gcpSecretName: "user-gcp-sa",
|
||||
gcpSecretFile: "user-gcp-sa.json",
|
||||
},
|
||||
|
|
|
@ -14,6 +14,7 @@ local tfjob = {
|
|||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tTLSecondsAfterFinished: 60 * 60 * 24 * 7,
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
|
@ -32,15 +33,13 @@ local tfjob = {
|
|||
],
|
||||
command: [
|
||||
"python",
|
||||
],
|
||||
args: [
|
||||
"/workdir/train.py",
|
||||
"train.py",
|
||||
"--num_epochs=" + std.toString(params.num_epochs),
|
||||
"--sample_size=" + std.toString(params.sample_size),
|
||||
"--input_data_gcs_bucket=" + params.input_data_gcs_bucket,
|
||||
"--input_data_gcs_path=" + params.input_data_gcs_path,
|
||||
"--output_model_gcs_bucket=" + params.output_model_gcs_bucket,
|
||||
"--output_model_gcs_path=" + params.output_model_gcs_path,
|
||||
"--input_data=" + params.input_data,
|
||||
"--output_model=" + params.output_model,
|
||||
],
|
||||
workingDir: "/issues",
|
||||
env: [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
|
@ -68,3 +67,4 @@ local tfjob = {
|
|||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# TODO(jlewi): Can we merge with Dockerfile?
|
||||
# This Dockerfile is used for training with TF.Estimator.
|
||||
# This Dockerfile is used for training.
|
||||
# We can probably use the same notebook Docker image if
|
||||
# we just upgrade the notebook version.
|
||||
# we just upgrade the notebook version. The conda environments
|
||||
# however complicate things so it might be simpler just to
|
||||
# have a separate image.
|
||||
FROM python:3.6
|
||||
|
||||
# TODO(jlewi): We should probably pin version of TF and other libraries.
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
"""Test training using TFJob.
|
||||
|
||||
This file tests that we can submit the job from ksonnet
|
||||
and that the job runs to completion.
|
||||
|
||||
It is an integration test as it depends on having access to
|
||||
a Kubeflow deployment to submit the TFJob to.
|
||||
|
||||
Python Path Requirements:
|
||||
kubeflow/tf-operator/py - https://github.com/kubeflow/tf-operator
|
||||
* Provides utilities for testing TFJobs
|
||||
kubeflow/testing/py - https://github.com/kubeflow/testing/tree/master/py
|
||||
* Provides utilities for testing
|
||||
|
||||
Manually running the test
|
||||
1. Configure your KUBECONFIG file to point to the desired cluster
|
||||
2. Set --params=name=${NAME},namespace=${NAMESPACE}
|
||||
* name should be the name for your job
|
||||
* namespace should be the namespace to use
|
||||
3. To test a new image set the parameter image e.g
|
||||
--params=name=${NAME},namespace=${NAMESPACE},image=${IMAGE}
|
||||
4. To control how long it trains set sample_size and num_epochs
|
||||
--params=num_epochs=1,sample_size=10,...
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from kubernetes import client as k8s_client
|
||||
from py import tf_job_client
|
||||
from py import test_runner
|
||||
|
||||
from kubeflow.testing import ks_util
|
||||
from kubeflow.testing import test_util
|
||||
from kubeflow.testing import util
|
||||
|
||||
class TFJobTest(test_util.TestCase):
|
||||
def __init__(self, args):
|
||||
namespace, name, env = test_runner.parse_runtime_params(args)
|
||||
self.app_dir = args.app_dir
|
||||
|
||||
if not self.app_dir:
|
||||
self.app_dir = os.path.join(os.path.dirname(__file__), "..",
|
||||
"ks_app")
|
||||
self.app_dir = os.path.abspath(self.app_dir)
|
||||
logging.info("--app_dir not set defaulting to: %s", self.app_dir)
|
||||
|
||||
self.env = env
|
||||
self.namespace = namespace
|
||||
self.params = args.params
|
||||
self.ks_cmd = ks_util.get_ksonnet_cmd(self.app_dir)
|
||||
super(TFJobTest, self).__init__(class_name="TFJobTest", name=name)
|
||||
|
||||
def test_train(self):
|
||||
# We repeat the test multiple times.
|
||||
# This ensures that if we delete the job we can create a new job with the
|
||||
# same name.
|
||||
api_client = k8s_client.ApiClient()
|
||||
|
||||
component = "tfjob"
|
||||
# Setup the ksonnet app
|
||||
ks_util.setup_ks_app(self.app_dir, self.env, self.namespace, component,
|
||||
self.params)
|
||||
|
||||
|
||||
# Create the TF job
|
||||
util.run([self.ks_cmd, "apply", self.env, "-c", component],
|
||||
cwd=self.app_dir)
|
||||
logging.info("Created job %s in namespaces %s", self.name, self.namespace)
|
||||
|
||||
# Wait for the job to complete.
|
||||
logging.info("Waiting for job to finish.")
|
||||
results = tf_job_client.wait_for_job(
|
||||
api_client,
|
||||
self.namespace,
|
||||
self.name,
|
||||
status_callback=tf_job_client.log_status)
|
||||
logging.info("Final TFJob:\n %s", json.dumps(results, indent=2))
|
||||
|
||||
# Check for errors creating pods and services. Can potentially
|
||||
# help debug failed test runs.
|
||||
creation_failures = tf_job_client.get_creation_failures_from_tfjob(
|
||||
api_client, self.namespace, results)
|
||||
if creation_failures:
|
||||
logging.warning(creation_failures)
|
||||
|
||||
if not tf_job_client.job_succeeded(results):
|
||||
self.failure = "Job {0} in namespace {1} in status {2}".format( # pylint: disable=attribute-defined-outside-init
|
||||
self.name, self.namespace, results.get("status", {}))
|
||||
logging.error(self.failure)
|
||||
return
|
||||
|
||||
# We don't delete the jobs. We rely on TTLSecondsAfterFinished
|
||||
# to delete old jobs. Leaving jobs around should make it
|
||||
# easier to debug.
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_runner.main(module=__name__)
|
|
@ -15,13 +15,23 @@ local defaultParams = {
|
|||
dataVolume: "kubeflow-test-volume",
|
||||
|
||||
// Default step image:
|
||||
stepImage: "gcr.io/kubeflow-ci/test-worker:v20181017-bfeaaf5-dirty-4adcd0",
|
||||
stepImage: "gcr.io/kubeflow-ci/test-worker:v20190104-f2a1cdf-e3b0c4",
|
||||
|
||||
// Which Kubeflow cluster to use for running TFJobs on.
|
||||
kfProject: "kubeflow-ci",
|
||||
kfZone: "us-east1-d",
|
||||
kfCluster: "kf-v0-4-n00",
|
||||
};
|
||||
|
||||
local params = defaultParams + overrides;
|
||||
|
||||
local prowEnv = util.parseEnv(params.prow_env);
|
||||
|
||||
// Workflow template is the name of the workflow template; typically the name of the ks component.
|
||||
// This is used as a label to make it easy to identify all Argo workflows created from a given
|
||||
// template.
|
||||
local workflow_template = "gis";
|
||||
|
||||
// Create a dictionary of the different prow variables so we can refer to them in the workflow.
|
||||
//
|
||||
// Important: We want to initialize all variables we reference to some value. If we don't
|
||||
|
@ -56,11 +66,14 @@ local srcRootDir = testDir + "/src";
|
|||
// The directory containing the kubeflow/kubeflow repo
|
||||
local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
|
||||
|
||||
// value of KUBECONFIG environment variable. This should be a full path.
|
||||
local kubeConfig = testDir + "/.kube/kubeconfig";
|
||||
|
||||
// These variables control where the docker images get pushed and what
|
||||
// tag to use
|
||||
local imageBase = "gcr.io/kubeflow-ci/github-issue-summarization";
|
||||
local imageTag = "build-" + prowDict["BUILD_ID"];
|
||||
local trainerImage = imageBase + "/trainer-estimator:" + imageTag;
|
||||
|
||||
// Build template is a template for constructing Argo step templates.
|
||||
//
|
||||
|
@ -89,9 +102,18 @@ local buildTemplate = {
|
|||
// py scripts to use.
|
||||
local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py",
|
||||
|
||||
local tfOperatorPy = srcRootDir + "/kubeflow/tf-operator",
|
||||
|
||||
// Actual template for Argo
|
||||
argoTemplate: {
|
||||
name: template.name,
|
||||
metadata: {
|
||||
labels: prowDict + {
|
||||
workflow: params.name,
|
||||
workflow_template: workflow_template,
|
||||
step_name: + template.name,
|
||||
},
|
||||
},
|
||||
container: {
|
||||
command: template.command,
|
||||
name: template.name,
|
||||
|
@ -101,7 +123,7 @@ local buildTemplate = {
|
|||
{
|
||||
// Add the source directories to the python path.
|
||||
name: "PYTHONPATH",
|
||||
value: kubeflowTestingPy,
|
||||
value: kubeflowTestingPy + ":" + tfOperatorPy,
|
||||
},
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
|
@ -116,6 +138,12 @@ local buildTemplate = {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// We use a directory in our NFS share to store our kube config.
|
||||
// This way we can configure it on a single step and reuse it on subsequent steps.
|
||||
name: "KUBECONFIG",
|
||||
value: kubeConfig,
|
||||
},
|
||||
] + prowEnv + template.env_vars,
|
||||
volumeMounts: [
|
||||
{
|
||||
|
@ -135,7 +163,6 @@ local buildTemplate = {
|
|||
},
|
||||
}; // buildTemplate
|
||||
|
||||
|
||||
// Create a list of dictionary.
|
||||
// Each item is a dictionary describing one step in the graph.
|
||||
local dagTemplates = [
|
||||
|
@ -147,7 +174,9 @@ local dagTemplates = [
|
|||
|
||||
env_vars: [{
|
||||
name: "EXTRA_REPOS",
|
||||
value: "kubeflow/testing@HEAD",
|
||||
// tf-operator has utilities needed for testing TFJobs.
|
||||
// TODO(jlewi): Update extra repos once kubeflow/testing#271 are merged.
|
||||
value: "kubeflow/testing@HEAD:274;kubeflow/tf-operator@HEAD",
|
||||
}],
|
||||
},
|
||||
dependencies: null,
|
||||
|
@ -199,11 +228,50 @@ local dagTemplates = [
|
|||
"train_test.py",
|
||||
],
|
||||
// Use the newly built image.
|
||||
image: imageBase + "/trainer-estimator:" + imageTag,
|
||||
image: trainerImage,
|
||||
workingDir: "/issues",
|
||||
},
|
||||
dependencies: ["build-images"],
|
||||
}, // train-test
|
||||
{
|
||||
// Configure KUBECONFIG
|
||||
template: buildTemplate {
|
||||
name: "get-kubeconfig",
|
||||
command: util.buildCommand([
|
||||
[
|
||||
"gcloud",
|
||||
"auth",
|
||||
"activate-service-account",
|
||||
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||
],
|
||||
[
|
||||
"gcloud",
|
||||
"--project=" + params.kfProject,
|
||||
"container",
|
||||
"clusters",
|
||||
"get-credentials",
|
||||
"--zone=" + params.kfZone,
|
||||
params.kfCluster,
|
||||
]]
|
||||
),
|
||||
workingDir: srcDir + "/github_issue_summarization",
|
||||
},
|
||||
dependencies: ["checkout"],
|
||||
}, // get-kubeconfig
|
||||
{
|
||||
// Run the python test for TFJob
|
||||
template: buildTemplate {
|
||||
name: "tfjob-test",
|
||||
command: [
|
||||
"python",
|
||||
"tfjob_test.py",
|
||||
"--params=name=gis-test-" + prowDict["BUILD_ID"] + ",namespace=kubeflow,num_epochs=1,sample_size=10,image=" + trainerImage,
|
||||
"--artifacts_path=" + artifactsDir,
|
||||
],
|
||||
workingDir: srcDir + "/github_issue_summarization/testing",
|
||||
},
|
||||
dependencies: ["build-images", "get-kubeconfig"],
|
||||
}, // tfjob-test
|
||||
];
|
||||
|
||||
// Dag defines the tasks in the graph
|
||||
|
@ -282,11 +350,8 @@ local workflow = {
|
|||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
org: prowDict.REPO_OWNER,
|
||||
repo: prowDict.REPO_NAME,
|
||||
workflow: "gis",
|
||||
[if std.objectHas(prowDict, "PULL_NUMBER") then "pr"]: prowDict.PULL_NUMBER,
|
||||
labels: prowDict + {
|
||||
workflow_template: workflow_template,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
|
|
|
@ -9,8 +9,8 @@ local envParams = params + {
|
|||
},
|
||||
gis+: {
|
||||
namespace: 'kubeflow-test-infra',
|
||||
name: 'jlewi-gis-search-test-449-1228-184223',
|
||||
prow_env: 'JOB_NAME=gis-search-test,JOB_TYPE=presubmit,REPO_NAME=examples,REPO_OWNER=kubeflow,BUILD_NUMBER=1228-184223,BUILD_ID=1228-184223,PULL_NUMBER=449',
|
||||
name: 'jlewi-gis-search-test-456-0105-104058',
|
||||
prow_env: 'JOB_NAME=gis-search-test,JOB_TYPE=presubmit,REPO_NAME=examples,REPO_OWNER=kubeflow,BUILD_NUMBER=0105-104058,BUILD_ID=0105-104058,PULL_NUMBER=456',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue