synchronization
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "gpushare-installer.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "gpushare-installer.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- if contains $name .Release.Name -}}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "gpushare-installer.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
@@ -0,0 +1,43 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: device-plugin-evict-ds
|
||||
namespace: kube-system
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
selector:
|
||||
matchLabels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: device-plugin-evict-ds
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ""
|
||||
labels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: device-plugin-evict-ds
|
||||
spec:
|
||||
hostNetwork: true
|
||||
nodeSelector:
|
||||
gpushare: "true"
|
||||
containers:
|
||||
- image: "{{ .Values.images.evictor.image }}:{{ .Values.images.evictor.tag }}"
|
||||
imagePullPolicy: {{ .Values.images.evictor.pullPolicy }}
|
||||
command:
|
||||
- bash
|
||||
- /dp-evict/dp-evict-on-host.sh
|
||||
name: gpushare
|
||||
# Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: kube-dir
|
||||
mountPath: /etc/kubernetes
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes
|
||||
type: Directory
|
||||
name: kube-dir
|
@@ -0,0 +1,52 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: device-plugin-recover-ds
|
||||
namespace: kube-system
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
selector:
|
||||
matchLabels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: device-plugin-recover-ds
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ""
|
||||
labels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: device-plugin-recover-ds
|
||||
spec:
|
||||
nodeSelector:
|
||||
gpushare: "false"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: aliyun.accelerator/nvidia_count
|
||||
operator: Exists
|
||||
# nodeSelector:
|
||||
# gpu-instance: "true"
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- image: "{{ .Values.images.recover.image }}:{{ .Values.images.recover.tag }}"
|
||||
imagePullPolicy: {{ .Values.images.recover.pullPolicy }}
|
||||
command:
|
||||
- bash
|
||||
- /dp-evict/dp-recover-on-host.sh
|
||||
name: gpushare
|
||||
# Make this pod as Guaranteed pod which will never be recovered because of node's resource consumption.
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: kube-dir
|
||||
mountPath: /etc/kubernetes
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes
|
||||
type: Directory
|
||||
name: kube-dir
|
@@ -0,0 +1,61 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: gpushare-device-plugin-ds
|
||||
namespace: kube-system
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: gpushare-device-plugin-ds
|
||||
type: runtime
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ""
|
||||
labels:
|
||||
component: gpushare-device-plugin
|
||||
app: gpushare
|
||||
name: gpushare-device-plugin-ds
|
||||
type: runtime
|
||||
spec:
|
||||
serviceAccount: gpushare-device-plugin
|
||||
hostNetwork: true
|
||||
nodeSelector:
|
||||
gpushare: "true"
|
||||
containers:
|
||||
- image: "{{ .Values.images.devicePlugin.image }}:{{ .Values.images.devicePlugin.tag }}"
|
||||
imagePullPolicy: {{ .Values.images.devicePlugin.pullPolicy }}
|
||||
name: gpushare
|
||||
# Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
|
||||
command:
|
||||
- gpushare-device-plugin-v2
|
||||
- -logtostderr
|
||||
- --v=5
|
||||
- --memory-unit=GiB
|
||||
resources:
|
||||
limits:
|
||||
memory: "300Mi"
|
||||
cpu: "1"
|
||||
requests:
|
||||
memory: "300Mi"
|
||||
cpu: "1"
|
||||
env:
|
||||
- name: KUBECONFIG
|
||||
value: /etc/kubernetes/kubelet.conf
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
@@ -0,0 +1,59 @@
|
||||
# rbac.yaml
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpushare-device-plugin
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- update
|
||||
- patch
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/status
|
||||
verbs:
|
||||
- patch
|
||||
- update
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: gpushare-device-plugin
|
||||
namespace: kube-system
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpushare-device-plugin
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: gpushare-device-plugin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: gpushare-device-plugin
|
||||
namespace: kube-system
|
@@ -0,0 +1,45 @@
|
||||
# deployment yaml
|
||||
---
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
metadata:
|
||||
name: gpushare-schd-extender
|
||||
namespace: kube-system
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: gpushare
|
||||
component: gpushare-schd-extender
|
||||
type: runtime
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: gpushare
|
||||
component: gpushare-schd-extender
|
||||
type: runtime
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||
spec:
|
||||
hostNetwork: true
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node-role.kubernetes.io/master
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node.cloudprovider.kubernetes.io/uninitialized
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/master: ""
|
||||
serviceAccount: gpushare-schd-extender
|
||||
containers:
|
||||
- name: gpushare-schd-extender
|
||||
image: "{{ .Values.images.extender.image }}:{{ .Values.images.extender.tag }}"
|
||||
imagePullPolicy: {{ .Values.images.extender.pullPolicy }}
|
||||
env:
|
||||
- name: LOG_LEVEL
|
||||
value: debug
|
||||
- name: PORT
|
||||
value: "12345"
|
@@ -0,0 +1,67 @@
|
||||
# rbac.yaml
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpushare-schd-extender
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- update
|
||||
- patch
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- bindings
|
||||
- pods/binding
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: gpushare-schd-extender
|
||||
namespace: kube-system
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpushare-schd-extender
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: gpushare-schd-extender
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: gpushare-schd-extender
|
||||
namespace: kube-system
|
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: gpushare-schd-extender
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: gpushare
|
||||
component: gpushare-schd-extender
|
||||
spec:
|
||||
# type: ClusterIP
|
||||
type: NodePort
|
||||
ports:
|
||||
- port: 12345
|
||||
name: http
|
||||
targetPort: 12345
|
||||
nodePort: 32766
|
||||
selector:
|
||||
app: gpushare
|
||||
component: gpushare-schd-extender
|
@@ -0,0 +1,66 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: gpushare-installer
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: gpushare
|
||||
name: gpushare-installer
|
||||
chart: {{ template "gpushare-installer.chart" . }}
|
||||
release: {{ .Release.Name }}
|
||||
heritage: {{ .Release.Service }}
|
||||
spec:
|
||||
parallelism: {{ .Values.masterCount }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
chart: {{ template "gpushare-installer.chart" . }}
|
||||
release: {{ .Release.Name }}
|
||||
heritage: {{ .Release.Service }}
|
||||
app: gpushare
|
||||
name: gpushare-installer
|
||||
spec:
|
||||
hostNetwork: true
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node-role.kubernetes.io/master
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node.cloudprovider.kubernetes.io/uninitialized
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/master: ""
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: deploy-schd
|
||||
image: "{{ .Values.images.installer.image }}:{{ .Values.images.installer.tag }}"
|
||||
imagePullPolicy: {{ .Values.images.installer.pullPolicy }}
|
||||
securityContext:
|
||||
privileged: true
|
||||
command:
|
||||
- bash
|
||||
- /schd-extender/install-sched-extender-on-host.sh
|
||||
env:
|
||||
- name: NODE_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
volumeMounts:
|
||||
- name: kube-dir
|
||||
mountPath: /etc/kubernetes
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes
|
||||
type: Directory
|
||||
name: kube-dir
|
||||
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: name
|
||||
operator: In
|
||||
values:
|
||||
- gpushare-installer
|
||||
topologyKey: "kubernetes.io/hostname"
|
Reference in New Issue
Block a user