mirror of https://github.com/kubeflow/examples.git
196 lines
7.4 KiB
Django/Jinja
196 lines
7.4 KiB
Django/Jinja
{#
|
|
Copyright 2016 Google Inc. All rights reserved.
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
#}
|
|
|
|
|
|
{% set NAME_PREFIX = env['deployment'] %}
|
|
{% set CLUSTER_NAME = NAME_PREFIX %}
|
|
{% set CPU_POOL = NAME_PREFIX + '-cpu-pool-' + properties['pool-version'] %}
|
|
{% set GPU_POOL = NAME_PREFIX + '-gpu-pool-' + properties['pool-version'] %}
|
|
{% set LARGE_POOL = NAME_PREFIX + '-large-pool-' + properties['pool-version'] %}
|
|
|
|
{# Type names are the names to give to deployment manager type providers
|
|
that will be created to represent Kubernetes objects.
|
|
There is type corresponding to each API endpoint.
|
|
#}
|
|
|
|
{% set VM_OAUTH_SCOPES = ['https://www.googleapis.com/auth/logging.write',
|
|
'https://www.googleapis.com/auth/monitoring',
|
|
'https://www.googleapis.com/auth/devstorage.read_only'] %}
|
|
|
|
{# Names for service accounts.
|
|
-admin is to be used for admin tasks
|
|
-user is to be used by users for actual jobs.
|
|
-vm is used for the VM service account attached to the GKE VMs.
|
|
#}
|
|
{% set KF_ADMIN_NAME = NAME_PREFIX + '-admin' %}
|
|
{% set KF_USER_NAME = NAME_PREFIX + '-user' %}
|
|
{% set KF_VM_SA_NAME = NAME_PREFIX + '-vm' %}
|
|
|
|
resources:
|
|
- name: {{ KF_ADMIN_NAME }}
|
|
type: iam.v1.serviceAccount
|
|
properties:
|
|
accountId: {{ KF_ADMIN_NAME }}
|
|
displayName: Service Account used for Kubeflow admin actions.
|
|
|
|
- name: {{ KF_USER_NAME }}
|
|
type: iam.v1.serviceAccount
|
|
properties:
|
|
accountId: {{ KF_USER_NAME }}
|
|
displayName: Service Account used for Kubeflow user actions.
|
|
|
|
- name: {{ KF_VM_SA_NAME }}
|
|
type: iam.v1.serviceAccount
|
|
properties:
|
|
accountId: {{ KF_VM_SA_NAME }}
|
|
displayName: GCP Service Account to use as VM Service Account for Kubeflow Cluster VMs
|
|
|
|
- name: {{ CLUSTER_NAME }}
|
|
{% if properties['gkeApiVersion'] == 'v1beta1' %}
|
|
type: gcp-types/container-v1beta1:projects.locations.clusters
|
|
{% else %}
|
|
type: container.v1.cluster
|
|
{% endif %}
|
|
properties:
|
|
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}
|
|
zone: {{ properties['zone'] }}
|
|
cluster:
|
|
name: {{ CLUSTER_NAME }}
|
|
initialClusterVersion: {{ properties['cluster-version'] }}
|
|
# We need 1.10.2 to support Stackdrivier GKE.
|
|
# loggingService: none
|
|
# monitoringService: none
|
|
{% if properties['gkeApiVersion'] == 'v1beta1' %}
|
|
podSecurityPolicyConfig:
|
|
enabled: {{ properties['securityConfig']['podSecurityPolicy'] }}
|
|
{% endif %}
|
|
{% if properties['securityConfig']['privatecluster'] %}
|
|
ipAllocationPolicy:
|
|
createSubnetwork: true
|
|
useIpAliases: true
|
|
masterIpv4CidrBlock: {{ properties['securityConfig']['masterIpv4CidrBlock'] }}
|
|
privateCluster: true
|
|
masterAuthorizedNetworksConfig:
|
|
enabled: {{ properties['securityConfig']['masterAuthorizedNetworksConfigEnabled'] }}
|
|
{% if properties['securityConfig']['masterAuthorizedNetworksConfigEnabled'] %}
|
|
cidrBlocks:
|
|
{{ properties['securityConfig']['masterAuthorizedNetworksConfigCidr'] }}
|
|
{% endif %}
|
|
{% endif %}
|
|
nodePools:
|
|
- name: default-pool
|
|
initialNodeCount: {{ properties['cpu-pool-initialNodeCount'] }}
|
|
autoscaling:
|
|
enabled: {{ properties['cpu-pool-enable-autoscaling'] }}
|
|
{% if properties['cpu-pool-enable-autoscaling'] %}
|
|
minNodeCount: {{ properties['cpu-pool-min-nodes'] }}
|
|
maxNodeCount: {{ properties['cpu-pool-max-nodes'] }}
|
|
{% endif %}
|
|
config:
|
|
{% if properties['securityConfig']['secureNodeMetadata'] %}
|
|
workloadMetadataConfig:
|
|
nodeMetadata: SECURE
|
|
{% endif %}
|
|
machineType: n1-standard-8
|
|
serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com
|
|
oauthScopes: {{ VM_OAUTH_SCOPES }}
|
|
# Set min cpu platform to ensure AVX2 is supported.
|
|
minCpuPlatform: 'Intel Haswell'
|
|
metadata:
|
|
dependsOn:
|
|
- {{ KF_VM_SA_NAME }}
|
|
|
|
# We manage the node pools as separate resources.
|
|
# We do this so that if we want to make changes we can delete the existing resource and then recreate it.
|
|
# Updating doesn't work so well because we are limited in what changes GKE's update method supports.
|
|
|
|
- name: {{ GPU_POOL }}
|
|
{% if properties['gkeApiVersion'] == 'v1beta1' %}
|
|
type: gcp-types/container-v1beta1:projects.locations.clusters.nodePools
|
|
{% else %}
|
|
type: container.v1.nodePool
|
|
{% endif %}
|
|
properties:
|
|
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}/clusters/{{ CLUSTER_NAME }}
|
|
project: {{ properties['securityConfig']['project'] }}
|
|
zone: {{ properties['zone'] }}
|
|
clusterId: {{ CLUSTER_NAME }}
|
|
nodePool:
|
|
name: gpu-pool
|
|
initialNodeCount: {{ properties['gpu-pool-initialNodeCount'] }}
|
|
autoscaling:
|
|
enabled: {{ properties['gpu-pool-enable-autoscaling'] }}
|
|
{% if properties['gpu-pool-enable-autoscaling'] %}
|
|
minNodeCount: {{ properties['gpu-pool-min-nodes'] }}
|
|
maxNodeCount: {{ properties['gpu-pool-max-nodes'] }}
|
|
{% endif %}
|
|
config:
|
|
{% if properties['securityConfig']['secureNodeMetadata'] %}
|
|
workloadMetadataConfig:
|
|
nodeMetadata: SECURE
|
|
{% endif %}
|
|
machineType: n1-standard-8
|
|
serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com
|
|
oauthScopes: {{ VM_OAUTH_SCOPES }}
|
|
# Set min cpu platform to ensure AVX2 is supported.
|
|
minCpuPlatform: 'Intel Haswell'
|
|
accelerators:
|
|
- acceleratorCount: 1
|
|
acceleratorType: nvidia-tesla-k80
|
|
|
|
metadata:
|
|
dependsOn:
|
|
# We can only create 1 node pool at a time.
|
|
- {{ CLUSTER_NAME }}
|
|
|
|
# Add a high memory pool because creating the search index requires a lot of memory.
|
|
- name: {{ LARGE_POOL }}
|
|
{% if properties['gkeApiVersion'] == 'v1beta1' %}
|
|
type: gcp-types/container-v1beta1:projects.locations.clusters.nodePools
|
|
{% else %}
|
|
type: container.v1.nodePool
|
|
{% endif %}
|
|
properties:
|
|
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}/clusters/{{ CLUSTER_NAME }}
|
|
project: {{ properties['securityConfig']['project'] }}
|
|
zone: {{ properties['zone'] }}
|
|
clusterId: {{ CLUSTER_NAME }}
|
|
nodePool:
|
|
name: large-pool
|
|
initialNodeCount: 0
|
|
autoscaling:
|
|
enabled: true
|
|
minNodeCount: 1
|
|
maxNodeCount: 10
|
|
config:
|
|
{% if properties['securityConfig']['secureNodeMetadata'] %}
|
|
workloadMetadataConfig:
|
|
nodeMetadata: SECURE
|
|
{% endif %}
|
|
machineType: n1-standard-32
|
|
serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com
|
|
oauthScopes: {{ VM_OAUTH_SCOPES }}
|
|
# Set min cpu platform to ensure AVX2 is supported.
|
|
minCpuPlatform: 'Intel Haswell'
|
|
|
|
metadata:
|
|
dependsOn:
|
|
# We can only create 1 node pool at a time.
|
|
- {{ GPU_POOL }}
|
|
|
|
{# Project defaults to the project of the deployment. #}
|
|
- name: {{ properties['ipName'] }}
|
|
type: compute.v1.globalAddress
|
|
properties:
|
|
description: "Static IP for Kubeflow ingress."
|