diff --git a/README.md b/README.md index 47b8e35..12e957c 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,8 @@ mpijobs.kubeflow.org 4d If it is not included you can add it as follows using [kustomize](https://github.com/kubernetes-sigs/kustomize): ```bash -git clone https://github.com/kubeflow/manifests -cd manifests/mpi-job/mpi-operator +git clone https://github.com/kubeflow/mpi-operator +cd mpi-operator/manifests/mpi-operator kustomize build base | kubectl apply -f - ``` diff --git a/manifests/mpi-operator/base/cluster-role-binding.yaml b/manifests/mpi-operator/base/cluster-role-binding.yaml new file mode 100644 index 0000000..cc5e461 --- /dev/null +++ b/manifests/mpi-operator/base/cluster-role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app: mpi-operator + name: mpi-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: mpi-operator +subjects: +- kind: ServiceAccount + name: mpi-operator diff --git a/manifests/mpi-operator/base/cluster-role.yaml b/manifests/mpi-operator/base/cluster-role.yaml new file mode 100644 index 0000000..e0d79d4 --- /dev/null +++ b/manifests/mpi-operator/base/cluster-role.yaml @@ -0,0 +1,162 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app: mpi-operator + name: mpi-operator +rules: +- apiGroups: + - "" + resources: + - configmaps + - serviceaccounts + verbs: + - create + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods/exec + verbs: + - create +- apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + - rolebindings + verbs: + - create + - list + - watch +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - create + - list + - update + - watch +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - create + - list + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - list + - update + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - create + - get +- apiGroups: + - kubeflow.org + resources: + - mpijobs + - mpijobs/finalizers + - mpijobs/status + verbs: + - "*" +- apiGroups: + - scheduling.incubator.k8s.io + - scheduling.sigs.dev + resources: + - queues + - podgroups + verbs: + - "*" + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-mpijobs-admin + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true" +rules: [] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-mpijobs-edit + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true" +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + - mpijobs/status + verbs: + - get + - list + - watch + - create + - delete + - deletecollection + - patch + - update + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-mpijobs-view + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + - mpijobs/status + verbs: + - get + - list + - watch diff --git a/manifests/mpi-operator/base/crd.yaml b/manifests/mpi-operator/base/crd.yaml new file mode 100644 index 0000000..a576df3 --- /dev/null +++ b/manifests/mpi-operator/base/crd.yaml @@ -0,0 +1,150 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: mpijobs.kubeflow.org +spec: + group: kubeflow.org + scope: Namespaced + names: + plural: mpijobs + singular: mpijob + kind: MPIJob + shortNames: + - mj + - mpij + versions: + - name: v1alpha1 + served: false + storage: false + schema: + openAPIV3Schema: + properties: + spec: + title: The MPIJob spec + description: Only one of gpus, processingUnits, or replicas should be specified + oneOf: + - properties: + gpus: + title: Total number of GPUs + description: Valid values are 1, 2, 4, or any multiple of 8 + oneOf: + - type: integer + enum: + - 1 + - 2 + - 4 + - type: integer + multipleOf: 8 + minimum: 8 + slotsPerWorker: + title: The number of slots per worker used in hostfile + description: Defaults to the number of processing units per worker + type: integer + minimum: 1 + gpusPerNode: + title: The maximum number of GPUs available per node + description: Defaults to the number of GPUs per worker + type: integer + minimum: 1 + required: + - gpus + - properties: + processingUnits: + title: Total number of processing units + description: Valid values are 1, 2, 4, or any multiple of 8 + oneOf: + - type: integer + enum: + - 1 + - 2 + - 4 + - type: integer + multipleOf: 8 + minimum: 8 + slotsPerWorker: + title: The number of slots per worker used in hostfile + description: Defaults to the number of processing units per worker + type: integer + minimum: 1 + processingUnitsPerNode: + title: The maximum number of processing units available per node + description: Defaults to the number of processing units per worker + type: integer + minimum: 1 + processingResourceType: + title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' + description: Defaults to 'nvidia.com/gpu' + type: string + enum: + - nvidia.com/gpu + - cpu + required: + - processingUnits + - properties: + replicas: + title: Total number of replicas + description: The processing resource limit should be specified for each replica + type: integer + minimum: 1 + slotsPerWorker: + title: The number of slots per worker used in hostfile + description: Defaults to the number of processing units per worker + type: integer + minimum: 1 + processingResourceType: + title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' + description: Defaults to 'nvidia.com/gpu' + type: string + enum: + - nvidia.com/gpu + - cpu + required: + - replicas + - name: v1alpha2 + served: true + storage: false + schema: + openAPIV3Schema: + properties: + spec: + properties: + slotsPerWorker: + type: integer + minimum: 1 + mpiReplicaSpecs: + properties: + Launcher: + properties: + replicas: + type: integer + minimum: 1 + maximum: 1 + Worker: + properties: + replicas: + type: integer + minimum: 1 + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + properties: + spec: + properties: + slotsPerWorker: + type: integer + minimum: 1 + mpiReplicaSpecs: + properties: + Launcher: + properties: + replicas: + type: integer + minimum: 1 + maximum: 1 + Worker: + properties: + replicas: + type: integer + minimum: 1 diff --git a/manifests/mpi-operator/base/deployment.yaml b/manifests/mpi-operator/base/deployment.yaml new file mode 100644 index 0000000..bf40e6c --- /dev/null +++ b/manifests/mpi-operator/base/deployment.yaml @@ -0,0 +1,27 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mpi-operator +spec: + replicas: 1 + selector: + matchLabels: + app: mpi-operator + template: + metadata: + labels: + app: mpi-operator + annotations: + sidecar.istio.io/inject: "false" + spec: + containers: + - args: + - -alsologtostderr + - --lock-namespace + - $(lock-namespace) + - --kubectl-delivery-image + - $(kubectl-delivery-image) + image: mpioperator/mpi-operator:latest + imagePullPolicy: Always + name: mpi-operator + serviceAccountName: mpi-operator diff --git a/manifests/mpi-operator/base/kustomization.yaml b/manifests/mpi-operator/base/kustomization.yaml new file mode 100644 index 0000000..dd325fe --- /dev/null +++ b/manifests/mpi-operator/base/kustomization.yaml @@ -0,0 +1,36 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow +resources: +- cluster-role-binding.yaml +- cluster-role.yaml +- crd.yaml +- deployment.yaml +- service-account.yaml +commonLabels: + kustomize.component: mpi-operator +images: +- name: mpioperator/mpi-operator + newName: mpioperator/mpi-operator + newTag: latest +configMapGenerator: +- name: mpi-operator-config + envs: + - params.env +generatorOptions: + disableNameSuffixHash: true +vars: +- name: kubectl-delivery-image + objref: + kind: ConfigMap + name: mpi-operator-config + apiVersion: v1 + fieldref: + fieldpath: data.kubectl-delivery-image +- name: lock-namespace + objref: + kind: ConfigMap + name: mpi-operator-config + apiVersion: v1 + fieldref: + fieldpath: data.lock-namespace diff --git a/manifests/mpi-operator/base/params.env b/manifests/mpi-operator/base/params.env new file mode 100644 index 0000000..2c20d58 --- /dev/null +++ b/manifests/mpi-operator/base/params.env @@ -0,0 +1,2 @@ +kubectl-delivery-image=mpioperator/kubectl-delivery:latest +lock-namespace=kubeflow \ No newline at end of file diff --git a/manifests/mpi-operator/base/service-account.yaml b/manifests/mpi-operator/base/service-account.yaml new file mode 100644 index 0000000..15cf4a0 --- /dev/null +++ b/manifests/mpi-operator/base/service-account.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app: mpi-operator + name: mpi-operator diff --git a/manifests/mpi-operator/overlays/application/application.yaml b/manifests/mpi-operator/overlays/application/application.yaml new file mode 100644 index 0000000..2481ac0 --- /dev/null +++ b/manifests/mpi-operator/overlays/application/application.yaml @@ -0,0 +1,43 @@ +apiVersion: app.k8s.io/v1beta1 +kind: Application +metadata: + name: mpi-operator +spec: + selector: + matchLabels: + app.kubernetes.io/name: mpi-operator + app.kubernetes.io/instance: mpi-operator + app.kubernetes.io/managed-by: kfctl + app.kubernetes.io/component: mpijob + app.kubernetes.io/part-of: kubeflow + app.kubernetes.io/version: v1.0 + componentKinds: + - group: apps + kind: Deployment + - group: core + kind: ServiceAccount + - group: kubeflow.org + kind: MPIJob + descriptor: + type: "mpi-operator" + version: "v1" + description: "Mpi-operator allows users to create and manage the \"MPIJob\" custom resource." + maintainers: + - name: Rong Ou + email: rong.ou@gmail.com + - name: Yuan Tang + email: terrytangyuan@gmail.com + - name: Abhilash Pallerlamudi + email: stp.abhi@gmail.com + owners: + - name: Rong Ou + email: rong.ou@gmail.com + - name: Yuan Tang + email: terrytangyuan@gmail.com + keywords: + - "mpijob" + - "mpi-operator" + links: + - description: About + url: "https://github.com/kubeflow/mpi-operator" + addOwnerRef: true diff --git a/manifests/mpi-operator/overlays/application/kustomization.yaml b/manifests/mpi-operator/overlays/application/kustomization.yaml new file mode 100644 index 0000000..0da42d7 --- /dev/null +++ b/manifests/mpi-operator/overlays/application/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +bases: +- ../../base +commonLabels: + app.kubernetes.io/component: mpijob + app.kubernetes.io/name: mpi-operator +kind: Kustomization +resources: +- application.yaml