synchronization
This commit is contained in:
		@@ -0,0 +1,32 @@
 | 
			
		||||
{{/* vim: set filetype=mustache: */}}
 | 
			
		||||
{{/*
 | 
			
		||||
Expand the name of the chart.
 | 
			
		||||
*/}}
 | 
			
		||||
{{- define "gpushare-installer.name" -}}
 | 
			
		||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 | 
			
		||||
{{- end -}}
 | 
			
		||||
 | 
			
		||||
{{/*
 | 
			
		||||
Create a default fully qualified app name.
 | 
			
		||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 | 
			
		||||
If release name contains chart name it will be used as a full name.
 | 
			
		||||
*/}}
 | 
			
		||||
{{- define "gpushare-installer.fullname" -}}
 | 
			
		||||
{{- if .Values.fullnameOverride -}}
 | 
			
		||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
 | 
			
		||||
{{- else -}}
 | 
			
		||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
 | 
			
		||||
{{- if contains $name .Release.Name -}}
 | 
			
		||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
 | 
			
		||||
{{- else -}}
 | 
			
		||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
 | 
			
		||||
{{- end -}}
 | 
			
		||||
{{- end -}}
 | 
			
		||||
{{- end -}}
 | 
			
		||||
 | 
			
		||||
{{/*
 | 
			
		||||
Create chart name and version as used by the chart label.
 | 
			
		||||
*/}}
 | 
			
		||||
{{- define "gpushare-installer.chart" -}}
 | 
			
		||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
 | 
			
		||||
{{- end -}}
 | 
			
		||||
@@ -0,0 +1,43 @@
 | 
			
		||||
apiVersion: apps/v1
 | 
			
		||||
kind: DaemonSet
 | 
			
		||||
metadata:
 | 
			
		||||
  name: device-plugin-evict-ds
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
spec:
 | 
			
		||||
  updateStrategy:
 | 
			
		||||
    type: RollingUpdate
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      component: gpushare-device-plugin
 | 
			
		||||
      app: gpushare
 | 
			
		||||
      name: device-plugin-evict-ds
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      annotations:
 | 
			
		||||
        scheduler.alpha.kubernetes.io/critical-pod: ""
 | 
			
		||||
      labels:
 | 
			
		||||
        component: gpushare-device-plugin
 | 
			
		||||
        app: gpushare
 | 
			
		||||
        name: device-plugin-evict-ds
 | 
			
		||||
    spec:
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      nodeSelector:
 | 
			
		||||
        gpushare: "true"
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: "{{ .Values.images.evictor.image }}:{{ .Values.images.evictor.tag }}"
 | 
			
		||||
        imagePullPolicy: {{ .Values.images.evictor.pullPolicy }}
 | 
			
		||||
        command: 
 | 
			
		||||
          - bash
 | 
			
		||||
          - /dp-evict/dp-evict-on-host.sh
 | 
			
		||||
        name: gpushare
 | 
			
		||||
        # Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
 | 
			
		||||
        securityContext:
 | 
			
		||||
          privileged: true
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
        - name: kube-dir
 | 
			
		||||
          mountPath: /etc/kubernetes
 | 
			
		||||
      volumes:
 | 
			
		||||
      - hostPath:
 | 
			
		||||
          path: /etc/kubernetes
 | 
			
		||||
          type: Directory
 | 
			
		||||
        name: kube-dir
 | 
			
		||||
@@ -0,0 +1,52 @@
 | 
			
		||||
apiVersion: apps/v1
 | 
			
		||||
kind: DaemonSet
 | 
			
		||||
metadata:
 | 
			
		||||
  name: device-plugin-recover-ds
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
spec:
 | 
			
		||||
  updateStrategy:
 | 
			
		||||
    type: RollingUpdate
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      component: gpushare-device-plugin
 | 
			
		||||
      app: gpushare
 | 
			
		||||
      name: device-plugin-recover-ds
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      annotations:
 | 
			
		||||
        scheduler.alpha.kubernetes.io/critical-pod: ""
 | 
			
		||||
      labels:
 | 
			
		||||
        component: gpushare-device-plugin
 | 
			
		||||
        app: gpushare
 | 
			
		||||
        name: device-plugin-recover-ds
 | 
			
		||||
    spec:
 | 
			
		||||
      nodeSelector:
 | 
			
		||||
        gpushare: "false"
 | 
			
		||||
      affinity:
 | 
			
		||||
        nodeAffinity:
 | 
			
		||||
          requiredDuringSchedulingIgnoredDuringExecution:
 | 
			
		||||
            nodeSelectorTerms:
 | 
			
		||||
            - matchExpressions:
 | 
			
		||||
              - key: aliyun.accelerator/nvidia_count
 | 
			
		||||
                operator: Exists
 | 
			
		||||
      # nodeSelector:
 | 
			
		||||
      #   gpu-instance: "true"
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: "{{ .Values.images.recover.image }}:{{ .Values.images.recover.tag }}"
 | 
			
		||||
        imagePullPolicy: {{ .Values.images.recover.pullPolicy }}
 | 
			
		||||
        command: 
 | 
			
		||||
          - bash 
 | 
			
		||||
          - /dp-evict/dp-recover-on-host.sh
 | 
			
		||||
        name: gpushare
 | 
			
		||||
        # Make this pod as Guaranteed pod which will never be recovered because of node's resource consumption.
 | 
			
		||||
        securityContext:
 | 
			
		||||
          privileged: true
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
        - name: kube-dir
 | 
			
		||||
          mountPath: /etc/kubernetes
 | 
			
		||||
      volumes:
 | 
			
		||||
      - hostPath:
 | 
			
		||||
          path: /etc/kubernetes
 | 
			
		||||
          type: Directory
 | 
			
		||||
        name: kube-dir
 | 
			
		||||
@@ -0,0 +1,61 @@
 | 
			
		||||
apiVersion: apps/v1
 | 
			
		||||
kind: DaemonSet
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-device-plugin-ds
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      component: gpushare-device-plugin
 | 
			
		||||
      app: gpushare
 | 
			
		||||
      name: gpushare-device-plugin-ds
 | 
			
		||||
      type: runtime
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      annotations:
 | 
			
		||||
        scheduler.alpha.kubernetes.io/critical-pod: ""
 | 
			
		||||
      labels:
 | 
			
		||||
        component: gpushare-device-plugin
 | 
			
		||||
        app: gpushare
 | 
			
		||||
        name: gpushare-device-plugin-ds
 | 
			
		||||
        type: runtime
 | 
			
		||||
    spec:
 | 
			
		||||
      serviceAccount: gpushare-device-plugin
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      nodeSelector:
 | 
			
		||||
        gpushare: "true"
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: "{{ .Values.images.devicePlugin.image }}:{{ .Values.images.devicePlugin.tag }}"
 | 
			
		||||
        imagePullPolicy: {{ .Values.images.devicePlugin.pullPolicy }}
 | 
			
		||||
        name: gpushare
 | 
			
		||||
        # Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
 | 
			
		||||
        command:
 | 
			
		||||
          - gpushare-device-plugin-v2
 | 
			
		||||
          - -logtostderr
 | 
			
		||||
          - --v=5
 | 
			
		||||
          - --memory-unit=GiB
 | 
			
		||||
        resources:
 | 
			
		||||
          limits:
 | 
			
		||||
            memory: "300Mi"
 | 
			
		||||
            cpu: "1"
 | 
			
		||||
          requests:
 | 
			
		||||
            memory: "300Mi"
 | 
			
		||||
            cpu: "1"
 | 
			
		||||
        env:
 | 
			
		||||
        - name: KUBECONFIG
 | 
			
		||||
          value: /etc/kubernetes/kubelet.conf
 | 
			
		||||
        - name: NODE_NAME
 | 
			
		||||
          valueFrom:
 | 
			
		||||
            fieldRef:
 | 
			
		||||
              fieldPath: spec.nodeName
 | 
			
		||||
        securityContext:
 | 
			
		||||
          allowPrivilegeEscalation: false
 | 
			
		||||
          capabilities:
 | 
			
		||||
            drop: ["ALL"]
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
          - name: device-plugin
 | 
			
		||||
            mountPath: /var/lib/kubelet/device-plugins
 | 
			
		||||
      volumes:
 | 
			
		||||
        - name: device-plugin
 | 
			
		||||
          hostPath:
 | 
			
		||||
            path: /var/lib/kubelet/device-plugins
 | 
			
		||||
@@ -0,0 +1,59 @@
 | 
			
		||||
# rbac.yaml
 | 
			
		||||
---
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-device-plugin
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  verbs:
 | 
			
		||||
  - get
 | 
			
		||||
  - list
 | 
			
		||||
  - watch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - events
 | 
			
		||||
  verbs:
 | 
			
		||||
  - create
 | 
			
		||||
  - patch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs:
 | 
			
		||||
  - update
 | 
			
		||||
  - patch
 | 
			
		||||
  - get
 | 
			
		||||
  - list
 | 
			
		||||
  - watch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes/status
 | 
			
		||||
  verbs:
 | 
			
		||||
  - patch
 | 
			
		||||
  - update
 | 
			
		||||
---
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-device-plugin
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
---
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-device-plugin
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: gpushare-device-plugin
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: gpushare-device-plugin
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
@@ -0,0 +1,45 @@
 | 
			
		||||
# deployment yaml
 | 
			
		||||
---
 | 
			
		||||
kind: Deployment
 | 
			
		||||
apiVersion: apps/v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      app: gpushare
 | 
			
		||||
      component: gpushare-schd-extender
 | 
			
		||||
      type: runtime
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  strategy:
 | 
			
		||||
    type: Recreate
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        app: gpushare
 | 
			
		||||
        component: gpushare-schd-extender
 | 
			
		||||
        type: runtime
 | 
			
		||||
      annotations:
 | 
			
		||||
        scheduler.alpha.kubernetes.io/critical-pod: ''
 | 
			
		||||
    spec:
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      tolerations:
 | 
			
		||||
      - effect: NoSchedule
 | 
			
		||||
        operator: Exists
 | 
			
		||||
        key: node-role.kubernetes.io/master
 | 
			
		||||
      - effect: NoSchedule
 | 
			
		||||
        operator: Exists
 | 
			
		||||
        key: node.cloudprovider.kubernetes.io/uninitialized
 | 
			
		||||
      nodeSelector:
 | 
			
		||||
         node-role.kubernetes.io/master: ""
 | 
			
		||||
      serviceAccount: gpushare-schd-extender
 | 
			
		||||
      containers:
 | 
			
		||||
        - name: gpushare-schd-extender
 | 
			
		||||
          image: "{{ .Values.images.extender.image }}:{{ .Values.images.extender.tag }}"
 | 
			
		||||
          imagePullPolicy: {{ .Values.images.extender.pullPolicy }}
 | 
			
		||||
          env:
 | 
			
		||||
          - name: LOG_LEVEL
 | 
			
		||||
            value: debug
 | 
			
		||||
          - name: PORT
 | 
			
		||||
            value: "12345"
 | 
			
		||||
@@ -0,0 +1,67 @@
 | 
			
		||||
# rbac.yaml
 | 
			
		||||
---
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  verbs:
 | 
			
		||||
  - get
 | 
			
		||||
  - list
 | 
			
		||||
  - watch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - events
 | 
			
		||||
  verbs:
 | 
			
		||||
  - create
 | 
			
		||||
  - patch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs:
 | 
			
		||||
  - update
 | 
			
		||||
  - patch
 | 
			
		||||
  - get
 | 
			
		||||
  - list
 | 
			
		||||
  - watch
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - bindings
 | 
			
		||||
  - pods/binding
 | 
			
		||||
  verbs:
 | 
			
		||||
  - create
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - ""
 | 
			
		||||
  resources:
 | 
			
		||||
  - configmaps
 | 
			
		||||
  verbs:
 | 
			
		||||
  - get
 | 
			
		||||
  - list
 | 
			
		||||
  - watch
 | 
			
		||||
---
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
---
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
@@ -0,0 +1,19 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-schd-extender
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
  labels:
 | 
			
		||||
    app: gpushare
 | 
			
		||||
    component: gpushare-schd-extender
 | 
			
		||||
spec:
 | 
			
		||||
  # type: ClusterIP
 | 
			
		||||
  type: NodePort
 | 
			
		||||
  ports:
 | 
			
		||||
  - port: 12345
 | 
			
		||||
    name: http
 | 
			
		||||
    targetPort: 12345
 | 
			
		||||
    nodePort: 32766
 | 
			
		||||
  selector:
 | 
			
		||||
    app: gpushare
 | 
			
		||||
    component: gpushare-schd-extender
 | 
			
		||||
@@ -0,0 +1,66 @@
 | 
			
		||||
apiVersion: batch/v1
 | 
			
		||||
kind: Job
 | 
			
		||||
metadata:
 | 
			
		||||
  name: gpushare-installer
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
  labels:
 | 
			
		||||
    app: gpushare
 | 
			
		||||
    name: gpushare-installer
 | 
			
		||||
    chart: {{ template "gpushare-installer.chart" . }}
 | 
			
		||||
    release: {{ .Release.Name }}
 | 
			
		||||
    heritage: {{ .Release.Service }}
 | 
			
		||||
spec:
 | 
			
		||||
  parallelism: {{ .Values.masterCount }}
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels: 
 | 
			
		||||
        chart: {{ template "gpushare-installer.chart" . }}
 | 
			
		||||
        release: {{ .Release.Name }}
 | 
			
		||||
        heritage: {{ .Release.Service }}
 | 
			
		||||
        app: gpushare
 | 
			
		||||
        name: gpushare-installer
 | 
			
		||||
    spec:
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      tolerations:
 | 
			
		||||
      - effect: NoSchedule
 | 
			
		||||
        operator: Exists
 | 
			
		||||
        key: node-role.kubernetes.io/master
 | 
			
		||||
      - effect: NoSchedule
 | 
			
		||||
        operator: Exists
 | 
			
		||||
        key: node.cloudprovider.kubernetes.io/uninitialized
 | 
			
		||||
      nodeSelector:
 | 
			
		||||
         node-role.kubernetes.io/master: ""
 | 
			
		||||
      restartPolicy: OnFailure
 | 
			
		||||
      containers:
 | 
			
		||||
      - name: deploy-schd
 | 
			
		||||
        image: "{{ .Values.images.installer.image }}:{{ .Values.images.installer.tag }}"
 | 
			
		||||
        imagePullPolicy: {{ .Values.images.installer.pullPolicy }}
 | 
			
		||||
        securityContext:
 | 
			
		||||
          privileged: true
 | 
			
		||||
        command: 
 | 
			
		||||
          - bash
 | 
			
		||||
          - /schd-extender/install-sched-extender-on-host.sh
 | 
			
		||||
        env:
 | 
			
		||||
          - name: NODE_IP
 | 
			
		||||
            valueFrom:
 | 
			
		||||
              fieldRef:
 | 
			
		||||
                fieldPath: status.hostIP
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
        - name: kube-dir
 | 
			
		||||
          mountPath: /etc/kubernetes
 | 
			
		||||
      volumes:
 | 
			
		||||
      - hostPath:
 | 
			
		||||
          path: /etc/kubernetes
 | 
			
		||||
          type: Directory
 | 
			
		||||
        name: kube-dir
 | 
			
		||||
 | 
			
		||||
      affinity:
 | 
			
		||||
        podAntiAffinity:
 | 
			
		||||
          requiredDuringSchedulingIgnoredDuringExecution:
 | 
			
		||||
            - labelSelector:
 | 
			
		||||
                matchExpressions:
 | 
			
		||||
                  - key: name
 | 
			
		||||
                    operator: In
 | 
			
		||||
                    values:
 | 
			
		||||
                       - gpushare-installer
 | 
			
		||||
              topologyKey: "kubernetes.io/hostname"
 | 
			
		||||
		Reference in New Issue
	
	Block a user