Rainbond/gpushare-scheduler-extender/deployer/chart/gpushare-installer/templates/device-plugin-recover.yaml

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: device-plugin-recover-ds
  namespace: kube-system
spec:
  updateStrategy:
    type: RollingUpdate
  selector:
    matchLabels:
      component: gpushare-device-plugin
      app: gpushare
      name: device-plugin-recover-ds
  template:
    metadata:
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ""
      labels:
        component: gpushare-device-plugin
        app: gpushare
        name: device-plugin-recover-ds
    spec:
      nodeSelector:
        gpushare: "false"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: aliyun.accelerator/nvidia_count
                operator: Exists
      # nodeSelector:
      #   gpu-instance: "true"
      hostNetwork: true
      containers:
      - image: "{{ .Values.images.recover.image }}:{{ .Values.images.recover.tag }}"
        imagePullPolicy: {{ .Values.images.recover.pullPolicy }}
        command:
          - bash
          - /dp-evict/dp-recover-on-host.sh
        name: gpushare
        # Make this pod as Guaranteed pod which will never be recovered because of node's resource consumption.
        securityContext:
          privileged: true
        volumeMounts:
        - name: kube-dir
          mountPath: /etc/kubernetes
      volumes:
      - hostPath:
          path: /etc/kubernetes
          type: Directory
        name: kube-dir