synchronization
This commit is contained in:
198
gpushare-device-plugin/pkg/gpu/nvidia/allocate.go
Normal file
198
gpushare-device-plugin/pkg/gpu/nvidia/allocate.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
var (
|
||||
clientTimeout = 30 * time.Second
|
||||
lastAllocateTime time.Time
|
||||
)
|
||||
|
||||
// create docker client
|
||||
func init() {
|
||||
kubeInit()
|
||||
}
|
||||
|
||||
func buildErrResponse(reqs *pluginapi.AllocateRequest, podReqGPU uint) *pluginapi.AllocateResponse {
|
||||
responses := pluginapi.AllocateResponse{}
|
||||
for _, req := range reqs.ContainerRequests {
|
||||
response := pluginapi.ContainerAllocateResponse{
|
||||
Envs: map[string]string{
|
||||
envNVGPU: fmt.Sprintf("no-gpu-has-%d%s-to-run", podReqGPU, metric),
|
||||
EnvResourceIndex: fmt.Sprintf("-1"),
|
||||
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
|
||||
EnvResourceByContainer: fmt.Sprintf("%d", uint(len(req.DevicesIDs))),
|
||||
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
|
||||
},
|
||||
}
|
||||
responses.ContainerResponses = append(responses.ContainerResponses, &response)
|
||||
}
|
||||
return &responses
|
||||
}
|
||||
|
||||
// Allocate which return list of devices.
|
||||
func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
|
||||
reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
|
||||
responses := pluginapi.AllocateResponse{}
|
||||
|
||||
log.Infoln("----Allocating GPU for gpu mem is started----")
|
||||
var (
|
||||
podReqGPU uint
|
||||
found bool
|
||||
assumePod *v1.Pod
|
||||
)
|
||||
|
||||
// podReqGPU = uint(0)
|
||||
for _, req := range reqs.ContainerRequests {
|
||||
podReqGPU += uint(len(req.DevicesIDs))
|
||||
}
|
||||
log.Infof("RequestPodGPUs: %d", podReqGPU)
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
log.Infoln("checking...")
|
||||
pods, err := getCandidatePods(m.queryKubelet, m.kubeletClient)
|
||||
if err != nil {
|
||||
log.Infof("invalid allocation requst: Failed to find candidate pods due to %v", err)
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
|
||||
if log.V(4) {
|
||||
for _, pod := range pods {
|
||||
log.Infof("Pod %s in ns %s request GPU Memory %d with timestamp %v",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
getGPUMemoryFromPodResource(pod),
|
||||
getAssumeTimeFromPodAnnotation(pod))
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
if getGPUMemoryFromPodResource(pod) == podReqGPU {
|
||||
log.Infof("Found Assumed GPU shared Pod %s in ns %s with GPU Memory %d",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
podReqGPU)
|
||||
assumePod = pod
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if found {
|
||||
id := getGPUIDFromPodAnnotation(assumePod)
|
||||
if id < 0 {
|
||||
log.Warningf("Failed to get the dev ", assumePod)
|
||||
}
|
||||
|
||||
candidateDevID := ""
|
||||
if id >= 0 {
|
||||
ok := false
|
||||
candidateDevID, ok = m.GetDeviceNameByIndex(uint(id))
|
||||
if !ok {
|
||||
log.Warningf("Failed to find the dev for pod %v because it's not able to find dev with index %d",
|
||||
assumePod,
|
||||
id)
|
||||
id = -1
|
||||
}
|
||||
}
|
||||
|
||||
if id < 0 {
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
log.Infof("gpu index %v,uuid: %v", id, candidateDevID)
|
||||
// 1. Create container requests
|
||||
for _, req := range reqs.ContainerRequests {
|
||||
reqGPU := uint(len(req.DevicesIDs))
|
||||
response := pluginapi.ContainerAllocateResponse{
|
||||
Envs: map[string]string{
|
||||
envNVGPU: fmt.Sprintf("%v", id),
|
||||
EnvResourceIndex: fmt.Sprintf("%d", id),
|
||||
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
|
||||
EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
|
||||
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
|
||||
},
|
||||
}
|
||||
if m.disableCGPUIsolation {
|
||||
response.Envs["CGPU_DISABLE"] = "true"
|
||||
}
|
||||
responses.ContainerResponses = append(responses.ContainerResponses, &response)
|
||||
}
|
||||
|
||||
// 2. Update Pod spec
|
||||
patchedAnnotationBytes, err := patchPodAnnotationSpecAssigned()
|
||||
if err != nil {
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
|
||||
if err != nil {
|
||||
// the object has been modified; please apply your changes to the latest version and try again
|
||||
if err.Error() == OptimisticLockErrorMsg {
|
||||
// retry
|
||||
_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
|
||||
if err != nil {
|
||||
log.Warningf("Failed due to %v", err)
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
} else {
|
||||
log.Warningf("Failed due to %v", err)
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
}
|
||||
|
||||
} else if len(m.devNameMap) == 1 {
|
||||
var devName string
|
||||
var devIndex uint
|
||||
for d, index := range m.devNameMap {
|
||||
devName = d
|
||||
devIndex = index
|
||||
break
|
||||
}
|
||||
log.Infof("this node has only one gpu device,skip to search pod and directly specify the device %v(%v) for container", devIndex, devName)
|
||||
for _, req := range reqs.ContainerRequests {
|
||||
reqGPU := uint(len(req.DevicesIDs))
|
||||
response := pluginapi.ContainerAllocateResponse{
|
||||
Envs: map[string]string{
|
||||
envNVGPU: devName,
|
||||
EnvResourceIndex: fmt.Sprintf("%d", devIndex),
|
||||
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
|
||||
EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
|
||||
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
|
||||
},
|
||||
}
|
||||
if m.disableCGPUIsolation {
|
||||
response.Envs["CGPU_DISABLE"] = "true"
|
||||
}
|
||||
responses.ContainerResponses = append(responses.ContainerResponses, &response)
|
||||
}
|
||||
log.Infof("get allocated GPUs info %v", responses)
|
||||
return &responses, nil
|
||||
|
||||
} else {
|
||||
log.Warningf("invalid allocation requst: request GPU memory %d can't be satisfied.",
|
||||
podReqGPU)
|
||||
// return &responses, fmt.Errorf("invalid allocation requst: request GPU memory %d can't be satisfied", reqGPU)
|
||||
return buildErrResponse(reqs, podReqGPU), nil
|
||||
}
|
||||
|
||||
podName := ""
|
||||
if assumePod != nil {
|
||||
podName = assumePod.Name
|
||||
}
|
||||
log.Infof("pod %v, new allocated GPUs info %v", podName, &responses)
|
||||
log.Infof("----Allocating GPU for gpu mem for %v is ended----", podName)
|
||||
// // Add this to make sure the container is created at least
|
||||
// currentTime := time.Now()
|
||||
|
||||
// currentTime.Sub(lastAllocateTime)
|
||||
|
||||
return &responses, nil
|
||||
}
|
36
gpushare-device-plugin/pkg/gpu/nvidia/const.go
Normal file
36
gpushare-device-plugin/pkg/gpu/nvidia/const.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
// MemoryUnit describes GPU Memory, now only supports Gi, Mi
|
||||
type MemoryUnit string
|
||||
|
||||
const (
|
||||
resourceName = "rainbond.com/gpu-mem"
|
||||
resourceCount = "rainbond.com/gpu-count"
|
||||
serverSock = pluginapi.DevicePluginPath + "aliyungpushare.sock"
|
||||
|
||||
OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
|
||||
|
||||
allHealthChecks = "xids"
|
||||
containerTypeLabelKey = "io.kubernetes.docker.type"
|
||||
containerTypeLabelSandbox = "podsandbox"
|
||||
containerTypeLabelContainer = "container"
|
||||
containerLogPathLabelKey = "io.kubernetes.container.logpath"
|
||||
sandboxIDLabelKey = "io.kubernetes.sandbox.id"
|
||||
|
||||
envNVGPU = "NVIDIA_VISIBLE_DEVICES"
|
||||
EnvResourceIndex = "ALIYUN_COM_GPU_MEM_IDX"
|
||||
EnvResourceByPod = "ALIYUN_COM_GPU_MEM_POD"
|
||||
EnvResourceByContainer = "ALIYUN_COM_GPU_MEM_CONTAINER"
|
||||
EnvResourceByDev = "ALIYUN_COM_GPU_MEM_DEV"
|
||||
EnvAssignedFlag = "ALIYUN_COM_GPU_MEM_ASSIGNED"
|
||||
EnvResourceAssumeTime = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
|
||||
EnvResourceAssignTime = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
|
||||
EnvNodeLabelForDisableCGPU = "cgpu.disable.isolation"
|
||||
|
||||
GiBPrefix = MemoryUnit("GiB")
|
||||
MiBPrefix = MemoryUnit("MiB")
|
||||
)
|
30
gpushare-device-plugin/pkg/gpu/nvidia/coredump.go
Normal file
30
gpushare-device-plugin/pkg/gpu/nvidia/coredump.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"runtime"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
func StackTrace(all bool) string {
|
||||
buf := make([]byte, 10240)
|
||||
|
||||
for {
|
||||
size := runtime.Stack(buf, all)
|
||||
|
||||
if size == len(buf) {
|
||||
buf = make([]byte, len(buf)<<1)
|
||||
continue
|
||||
}
|
||||
break
|
||||
|
||||
}
|
||||
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func coredump(fileName string) {
|
||||
log.Infoln("Dump stacktrace to ", fileName)
|
||||
ioutil.WriteFile(fileName, []byte(StackTrace(true)), 0644)
|
||||
}
|
111
gpushare-device-plugin/pkg/gpu/nvidia/gpumanager.go
Normal file
111
gpushare-device-plugin/pkg/gpu/nvidia/gpumanager.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
|
||||
"syscall"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
log "github.com/golang/glog"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
type sharedGPUManager struct {
|
||||
enableMPS bool
|
||||
healthCheck bool
|
||||
queryKubelet bool
|
||||
kubeletClient *client.KubeletClient
|
||||
}
|
||||
|
||||
func NewSharedGPUManager(enableMPS, healthCheck, queryKubelet bool, bp MemoryUnit, client *client.KubeletClient) *sharedGPUManager {
|
||||
metric = bp
|
||||
return &sharedGPUManager{
|
||||
enableMPS: enableMPS,
|
||||
healthCheck: healthCheck,
|
||||
queryKubelet: queryKubelet,
|
||||
kubeletClient: client,
|
||||
}
|
||||
}
|
||||
|
||||
func (ngm *sharedGPUManager) Run() error {
|
||||
log.V(1).Infoln("Loading NVML")
|
||||
|
||||
if err := nvml.Init(); err != nil {
|
||||
log.V(1).Infof("Failed to initialize NVML: %s.", err)
|
||||
log.V(1).Infof("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
|
||||
select {}
|
||||
}
|
||||
defer func() { log.V(1).Infoln("Shutdown of NVML returned:", nvml.Shutdown()) }()
|
||||
|
||||
log.V(1).Infoln("Fetching devices.")
|
||||
if getDeviceCount() == uint(0) {
|
||||
log.V(1).Infoln("No devices found. Waiting indefinitely.")
|
||||
select {}
|
||||
}
|
||||
|
||||
log.V(1).Infoln("Starting FS watcher.")
|
||||
watcher, err := newFSWatcher(pluginapi.DevicePluginPath)
|
||||
if err != nil {
|
||||
log.V(1).Infoln("Failed to created FS watcher.")
|
||||
return err
|
||||
}
|
||||
defer watcher.Close()
|
||||
|
||||
log.V(1).Infoln("Starting OS watcher.")
|
||||
sigs := newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
|
||||
restart := true
|
||||
var devicePlugin *NvidiaDevicePlugin
|
||||
|
||||
L:
|
||||
for {
|
||||
if restart {
|
||||
if devicePlugin != nil {
|
||||
devicePlugin.Stop()
|
||||
}
|
||||
|
||||
devicePlugin, err = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck, ngm.queryKubelet, ngm.kubeletClient)
|
||||
if err != nil {
|
||||
log.Warningf("Failed to get device plugin due to %v", err)
|
||||
os.Exit(1)
|
||||
} else if err = devicePlugin.Serve(); err != nil {
|
||||
log.Warningf("Failed to start device plugin due to %v", err)
|
||||
os.Exit(2)
|
||||
} else {
|
||||
restart = false
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case event := <-watcher.Events:
|
||||
if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
|
||||
log.V(1).Infof("inotify: %s created, restarting.", pluginapi.KubeletSocket)
|
||||
restart = true
|
||||
}
|
||||
|
||||
case err := <-watcher.Errors:
|
||||
log.Warningf("inotify: %s", err)
|
||||
|
||||
case s := <-sigs:
|
||||
switch s {
|
||||
case syscall.SIGHUP:
|
||||
log.V(1).Infoln("Received SIGHUP, restarting.")
|
||||
restart = true
|
||||
case syscall.SIGQUIT:
|
||||
t := time.Now()
|
||||
timestamp := fmt.Sprint(t.Format("20060102150405"))
|
||||
log.Infoln("generate core dump")
|
||||
coredump("/etc/kubernetes/go_" + timestamp + ".txt")
|
||||
default:
|
||||
log.V(1).Infof("Received signal \"%v\", shutting down.", s)
|
||||
devicePlugin.Stop()
|
||||
break L
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
152
gpushare-device-plugin/pkg/gpu/nvidia/nvidia.go
Normal file
152
gpushare-device-plugin/pkg/gpu/nvidia/nvidia.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
|
||||
"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
var (
|
||||
gpuMemory uint
|
||||
metric MemoryUnit
|
||||
)
|
||||
|
||||
func check(err error) {
|
||||
if err != nil {
|
||||
log.Fatalln("Fatal:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func generateFakeDeviceID(realID string, fakeCounter uint) string {
|
||||
return fmt.Sprintf("%s-_-%d", realID, fakeCounter)
|
||||
}
|
||||
|
||||
func extractRealDeviceID(fakeDeviceID string) string {
|
||||
return strings.Split(fakeDeviceID, "-_-")[0]
|
||||
}
|
||||
|
||||
func setGPUMemory(raw uint) {
|
||||
v := raw
|
||||
if metric == GiBPrefix {
|
||||
v = raw / 1024
|
||||
}
|
||||
gpuMemory = v
|
||||
log.Infof("set gpu memory: %d", gpuMemory)
|
||||
}
|
||||
|
||||
func getGPUMemory() uint {
|
||||
return gpuMemory
|
||||
}
|
||||
|
||||
func getDeviceCount() uint {
|
||||
n, err := nvml.GetDeviceCount()
|
||||
check(err)
|
||||
return n
|
||||
}
|
||||
|
||||
func getDevices() ([]*pluginapi.Device, map[string]uint) {
|
||||
n, err := nvml.GetDeviceCount()
|
||||
check(err)
|
||||
|
||||
var devs []*pluginapi.Device
|
||||
realDevNames := map[string]uint{}
|
||||
for i := uint(0); i < n; i++ {
|
||||
d, err := nvml.NewDevice(i)
|
||||
check(err)
|
||||
// realDevNames = append(realDevNames, d.UUID)
|
||||
var id uint
|
||||
log.Infof("Deivce %s's Path is %s", d.UUID, d.Path)
|
||||
_, err = fmt.Sscanf(d.Path, "/dev/nvidia%d", &id)
|
||||
check(err)
|
||||
realDevNames[d.UUID] = id
|
||||
// var KiB uint64 = 1024
|
||||
log.Infof("# device Memory: %d", uint(*d.Memory))
|
||||
if getGPUMemory() == uint(0) {
|
||||
setGPUMemory(uint(*d.Memory))
|
||||
}
|
||||
for j := uint(0); j < getGPUMemory(); j++ {
|
||||
fakeID := generateFakeDeviceID(d.UUID, j)
|
||||
if j == 0 {
|
||||
log.Infoln("# Add first device ID: " + fakeID)
|
||||
}
|
||||
if j == getGPUMemory()-1 {
|
||||
log.Infoln("# Add last device ID: " + fakeID)
|
||||
}
|
||||
devs = append(devs, &pluginapi.Device{
|
||||
ID: fakeID,
|
||||
Health: pluginapi.Healthy,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return devs, realDevNames
|
||||
}
|
||||
|
||||
func deviceExists(devs []*pluginapi.Device, id string) bool {
|
||||
for _, d := range devs {
|
||||
if d.ID == id {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func watchXIDs(ctx context.Context, devs []*pluginapi.Device, xids chan<- *pluginapi.Device) {
|
||||
eventSet := nvml.NewEventSet()
|
||||
defer nvml.DeleteEventSet(eventSet)
|
||||
|
||||
for _, d := range devs {
|
||||
realDeviceID := extractRealDeviceID(d.ID)
|
||||
err := nvml.RegisterEventForDevice(eventSet, nvml.XidCriticalError, realDeviceID)
|
||||
if err != nil && strings.HasSuffix(err.Error(), "Not Supported") {
|
||||
log.Infof("Warning: %s (%s) is too old to support healthchecking: %s. Marking it unhealthy.", realDeviceID, d.ID, err)
|
||||
|
||||
xids <- d
|
||||
continue
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Fatal error:", err)
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
e, err := nvml.WaitForEvent(eventSet, 5000)
|
||||
if err != nil && e.Etype != nvml.XidCriticalError {
|
||||
continue
|
||||
}
|
||||
|
||||
// FIXME: formalize the full list and document it.
|
||||
// http://docs.nvidia.com/deploy/xid-errors/index.html#topic_4
|
||||
// Application errors: the GPU should still be healthy
|
||||
if e.Edata == 31 || e.Edata == 43 || e.Edata == 45 {
|
||||
continue
|
||||
}
|
||||
|
||||
if e.UUID == nil || len(*e.UUID) == 0 {
|
||||
// All devices are unhealthy
|
||||
for _, d := range devs {
|
||||
xids <- d
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, d := range devs {
|
||||
if extractRealDeviceID(d.ID) == *e.UUID {
|
||||
xids <- d
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
262
gpushare-device-plugin/pkg/gpu/nvidia/podmanager.go
Normal file
262
gpushare-device-plugin/pkg/gpu/nvidia/podmanager.go
Normal file
@@ -0,0 +1,262 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
|
||||
log "github.com/golang/glog"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||
"os"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
clientset *kubernetes.Clientset
|
||||
nodeName string
|
||||
retries = 8
|
||||
)
|
||||
|
||||
func kubeInit() {
|
||||
kubeconfigFile := os.Getenv("KUBECONFIG")
|
||||
var err error
|
||||
var config *rest.Config
|
||||
|
||||
if _, err = os.Stat(kubeconfigFile); err != nil {
|
||||
log.V(5).Infof("kubeconfig %s failed to find due to %v", kubeconfigFile, err)
|
||||
config, err = rest.InClusterConfig()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed due to %v", err)
|
||||
}
|
||||
} else {
|
||||
config, err = clientcmd.BuildConfigFromFlags("", kubeconfigFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed due to %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
clientset, err = kubernetes.NewForConfig(config)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed due to %v", err)
|
||||
}
|
||||
|
||||
nodeName = os.Getenv("NODE_NAME")
|
||||
if nodeName == "" {
|
||||
log.Fatalln("Please set env NODE_NAME")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func disableCGPUIsolationOrNot() (bool, error) {
|
||||
disable := false
|
||||
node, err := clientset.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return disable, err
|
||||
}
|
||||
labels := node.ObjectMeta.Labels
|
||||
value, ok := labels[EnvNodeLabelForDisableCGPU]
|
||||
if ok && value == "true" {
|
||||
log.Infof("enable gpusharing mode and disable cgpu mode")
|
||||
disable = true
|
||||
}
|
||||
return disable, nil
|
||||
}
|
||||
|
||||
func patchGPUCount(gpuCount int) error {
|
||||
node, err := clientset.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if val, ok := node.Status.Capacity[resourceCount]; ok {
|
||||
if val.Value() == int64(gpuCount) {
|
||||
log.Infof("No need to update Capacity %s", resourceCount)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
newNode := node.DeepCopy()
|
||||
newNode.Status.Capacity[resourceCount] = *resource.NewQuantity(int64(gpuCount), resource.DecimalSI)
|
||||
newNode.Status.Allocatable[resourceCount] = *resource.NewQuantity(int64(gpuCount), resource.DecimalSI)
|
||||
// content := fmt.Sprintf(`[{"op": "add", "path": "/status/capacity/aliyun.com~gpu-count", "value": "%d"}]`, gpuCount)
|
||||
// _, err = clientset.CoreV1().Nodes().PatchStatus(nodeName, []byte(content))
|
||||
_, _, err = nodeutil.PatchNodeStatus(clientset.CoreV1(), types.NodeName(nodeName), node, newNode)
|
||||
if err != nil {
|
||||
log.Infof("Failed to update Capacity %s.", resourceCount)
|
||||
} else {
|
||||
log.Infof("Updated Capacity %s successfully.", resourceCount)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func getPodList(kubeletClient *client.KubeletClient) (*v1.PodList, error) {
|
||||
podList, err := kubeletClient.GetNodeRunningPods()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
list, _ := json.Marshal(podList)
|
||||
log.V(8).Infof("get pods list %v", string(list))
|
||||
|
||||
resultPodList := &v1.PodList{}
|
||||
for _, metaPod := range podList.Items {
|
||||
if metaPod.Status.Phase != v1.PodPending {
|
||||
continue
|
||||
}
|
||||
resultPodList.Items = append(resultPodList.Items, metaPod)
|
||||
}
|
||||
|
||||
if len(resultPodList.Items) == 0 {
|
||||
return nil, fmt.Errorf("not found pending pod")
|
||||
}
|
||||
|
||||
return resultPodList, nil
|
||||
}
|
||||
|
||||
func getPodListsByQueryKubelet(kubeletClient *client.KubeletClient) (*v1.PodList, error) {
|
||||
podList, err := getPodList(kubeletClient)
|
||||
for i := 0; i < retries && err != nil; i++ {
|
||||
podList, err = getPodList(kubeletClient)
|
||||
log.Warningf("failed to get pending pod list, retry")
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if err != nil {
|
||||
log.Warningf("not found from kubelet /pods api, start to list apiserver")
|
||||
podList, err = getPodListsByListAPIServer()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return podList, nil
|
||||
}
|
||||
|
||||
func getPodListsByListAPIServer() (*v1.PodList, error) {
|
||||
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName, "status.phase": "Pending"})
|
||||
podList, err := clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
for i := 0; i < 3 && err != nil; i++ {
|
||||
podList, err = clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
|
||||
}
|
||||
|
||||
return podList, nil
|
||||
}
|
||||
|
||||
func getPendingPodsInNode(queryKubelet bool, kubeletClient *client.KubeletClient) ([]v1.Pod, error) {
|
||||
// pods, err := m.lister.List(labels.Everything())
|
||||
// if err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
pods := []v1.Pod{}
|
||||
|
||||
podIDMap := map[types.UID]bool{}
|
||||
|
||||
var podList *v1.PodList
|
||||
var err error
|
||||
if queryKubelet {
|
||||
podList, err = getPodListsByQueryKubelet(kubeletClient)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
podList, err = getPodListsByListAPIServer()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.V(5).Infof("all pod list %v", podList.Items)
|
||||
|
||||
// if log.V(5) {
|
||||
for _, pod := range podList.Items {
|
||||
if pod.Spec.NodeName != nodeName {
|
||||
log.Warningf("Pod name %s in ns %s is not assigned to node %s as expected, it's placed on node %s ",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
nodeName,
|
||||
pod.Spec.NodeName)
|
||||
} else {
|
||||
log.Infof("list pod %s in ns %s in node %s and status is %s",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
nodeName,
|
||||
pod.Status.Phase,
|
||||
)
|
||||
if _, ok := podIDMap[pod.UID]; !ok {
|
||||
pods = append(pods, pod)
|
||||
podIDMap[pod.UID] = true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// }
|
||||
|
||||
return pods, nil
|
||||
}
|
||||
|
||||
// pick up the gpushare pod with assigned status is false, and
|
||||
func getCandidatePods(queryKubelet bool, client *client.KubeletClient) ([]*v1.Pod, error) {
|
||||
candidatePods := []*v1.Pod{}
|
||||
allPods, err := getPendingPodsInNode(queryKubelet, client)
|
||||
if err != nil {
|
||||
return candidatePods, err
|
||||
}
|
||||
for _, pod := range allPods {
|
||||
current := pod
|
||||
if isGPUMemoryAssumedPod(¤t) {
|
||||
candidatePods = append(candidatePods, ¤t)
|
||||
}
|
||||
}
|
||||
|
||||
if log.V(4) {
|
||||
for _, pod := range candidatePods {
|
||||
log.Infof("candidate pod %s in ns %s with timestamp %d is found.",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
getAssumeTimeFromPodAnnotation(pod))
|
||||
}
|
||||
}
|
||||
|
||||
return makePodOrderdByAge(candidatePods), nil
|
||||
}
|
||||
|
||||
// make the pod ordered by GPU assumed time
|
||||
func makePodOrderdByAge(pods []*v1.Pod) []*v1.Pod {
|
||||
newPodList := make(orderedPodByAssumeTime, 0, len(pods))
|
||||
for _, v := range pods {
|
||||
newPodList = append(newPodList, v)
|
||||
}
|
||||
sort.Sort(newPodList)
|
||||
return []*v1.Pod(newPodList)
|
||||
}
|
||||
|
||||
type orderedPodByAssumeTime []*v1.Pod
|
||||
|
||||
func (this orderedPodByAssumeTime) Len() int {
|
||||
return len(this)
|
||||
}
|
||||
|
||||
func (this orderedPodByAssumeTime) Less(i, j int) bool {
|
||||
return getAssumeTimeFromPodAnnotation(this[i]) <= getAssumeTimeFromPodAnnotation(this[j])
|
||||
}
|
||||
|
||||
func (this orderedPodByAssumeTime) Swap(i, j int) {
|
||||
this[i], this[j] = this[j], this[i]
|
||||
}
|
182
gpushare-device-plugin/pkg/gpu/nvidia/podutils.go
Normal file
182
gpushare-device-plugin/pkg/gpu/nvidia/podutils.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// update pod env with assigned status
|
||||
func updatePodAnnotations(oldPod *v1.Pod) (newPod *v1.Pod) {
|
||||
newPod = oldPod.DeepCopy()
|
||||
if len(newPod.ObjectMeta.Annotations) == 0 {
|
||||
newPod.ObjectMeta.Annotations = map[string]string{}
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
newPod.ObjectMeta.Annotations[EnvAssignedFlag] = "true"
|
||||
newPod.ObjectMeta.Annotations[EnvResourceAssumeTime] = fmt.Sprintf("%d", now.UnixNano())
|
||||
|
||||
return newPod
|
||||
}
|
||||
|
||||
func patchPodAnnotationSpecAssigned() ([]byte, error) {
|
||||
now := time.Now()
|
||||
patchAnnotations := map[string]interface{}{
|
||||
"metadata": map[string]map[string]string{"annotations": {
|
||||
EnvAssignedFlag: "true",
|
||||
EnvResourceAssumeTime: fmt.Sprintf("%d", now.UnixNano()),
|
||||
}}}
|
||||
return json.Marshal(patchAnnotations)
|
||||
}
|
||||
|
||||
func getGPUIDFromPodAnnotation(pod *v1.Pod) (id int) {
|
||||
var err error
|
||||
id = -1
|
||||
|
||||
if len(pod.ObjectMeta.Annotations) > 0 {
|
||||
value, found := pod.ObjectMeta.Annotations[EnvResourceIndex]
|
||||
if found {
|
||||
id, err = strconv.Atoi(value)
|
||||
if err != nil {
|
||||
log.Warningf("Failed to parse dev id %s due to %v for pod %s in ns %s",
|
||||
value,
|
||||
err,
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
id = -1
|
||||
}
|
||||
} else {
|
||||
log.Warningf("Failed to get dev id %s for pod %s in ns %s",
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
}
|
||||
}
|
||||
|
||||
return id
|
||||
}
|
||||
|
||||
// get assumed timestamp
|
||||
func getAssumeTimeFromPodAnnotation(pod *v1.Pod) (assumeTime uint64) {
|
||||
if assumeTimeStr, ok := pod.ObjectMeta.Annotations[EnvResourceAssumeTime]; ok {
|
||||
u64, err := strconv.ParseUint(assumeTimeStr, 10, 64)
|
||||
if err != nil {
|
||||
log.Warningf("Failed to parse assume Timestamp %s due to %v", assumeTimeStr, err)
|
||||
} else {
|
||||
assumeTime = u64
|
||||
}
|
||||
}
|
||||
|
||||
return assumeTime
|
||||
}
|
||||
|
||||
// determine if the pod is GPU share pod, and is already assumed but not assigned
|
||||
func isGPUMemoryAssumedPod(pod *v1.Pod) (assumed bool) {
|
||||
log.V(6).Infof("Determine if the pod %v is GPUSharedAssumed pod", pod)
|
||||
var ok bool
|
||||
|
||||
// 1. Check if it's for GPU share
|
||||
if getGPUMemoryFromPodResource(pod) <= 0 {
|
||||
log.V(6).Infof("Pod %s in namespace %s has not GPU Memory Request, so it's not GPUSharedAssumed assumed pod.",
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
return assumed
|
||||
}
|
||||
|
||||
// 2. Check if it already has assume time
|
||||
if _, ok = pod.ObjectMeta.Annotations[EnvResourceAssumeTime]; !ok {
|
||||
log.V(4).Infof("No assume timestamp for pod %s in namespace %s, so it's not GPUSharedAssumed assumed pod.",
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
return assumed
|
||||
}
|
||||
|
||||
// 3. Check if it has been assigned already
|
||||
if assigned, ok := pod.ObjectMeta.Annotations[EnvAssignedFlag]; ok {
|
||||
|
||||
if assigned == "false" {
|
||||
log.V(4).Infof("Found GPUSharedAssumed assumed pod %s in namespace %s.",
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
assumed = true
|
||||
} else {
|
||||
log.Infof("GPU assigned Flag for pod %s exists in namespace %s and its assigned status is %s, so it's not GPUSharedAssumed assumed pod.",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
assigned)
|
||||
}
|
||||
} else {
|
||||
log.Warningf("No GPU assigned Flag for pod %s in namespace %s, so it's not GPUSharedAssumed assumed pod.",
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
}
|
||||
|
||||
return assumed
|
||||
}
|
||||
|
||||
// Get GPU Memory of the Pod
|
||||
func getGPUMemoryFromPodResource(pod *v1.Pod) uint {
|
||||
var total uint
|
||||
containers := pod.Spec.Containers
|
||||
for _, container := range containers {
|
||||
if val, ok := container.Resources.Limits[resourceName]; ok {
|
||||
total += uint(val.Value())
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
func podIsNotRunning(pod v1.Pod) bool {
|
||||
status := pod.Status
|
||||
//deletionTimestamp
|
||||
if pod.DeletionTimestamp != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// pod is scheduled but not initialized
|
||||
if status.Phase == v1.PodPending && podConditionTrueOnly(status.Conditions, v1.PodScheduled) {
|
||||
log.Infof("Pod %s only has PodScheduled, is not running", pod.Name)
|
||||
return true
|
||||
}
|
||||
|
||||
return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) || (status.Phase == v1.PodPending && podConditionTrueOnly(status.Conditions, v1.PodScheduled))
|
||||
}
|
||||
|
||||
// notRunning returns true if every status is terminated or waiting, or the status list
|
||||
// is empty.
|
||||
func notRunning(statuses []v1.ContainerStatus) bool {
|
||||
for _, status := range statuses {
|
||||
if status.State.Terminated == nil && status.State.Waiting == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func podConditionTrue(conditions []v1.PodCondition, expect v1.PodConditionType) bool {
|
||||
for _, condition := range conditions {
|
||||
if condition.Type == expect && condition.Status == v1.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func podConditionTrueOnly(conditions []v1.PodCondition, expect v1.PodConditionType) bool {
|
||||
if len(conditions) != 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, condition := range conditions {
|
||||
if condition.Type == expect && condition.Status == v1.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
241
gpushare-device-plugin/pkg/gpu/nvidia/server.go
Normal file
241
gpushare-device-plugin/pkg/gpu/nvidia/server.go
Normal file
@@ -0,0 +1,241 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
|
||||
"net"
|
||||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
// NvidiaDevicePlugin implements the Kubernetes device plugin API
|
||||
type NvidiaDevicePlugin struct {
|
||||
devs []*pluginapi.Device
|
||||
realDevNames []string
|
||||
devNameMap map[string]uint
|
||||
devIndxMap map[uint]string
|
||||
socket string
|
||||
mps bool
|
||||
healthCheck bool
|
||||
disableCGPUIsolation bool
|
||||
stop chan struct{}
|
||||
health chan *pluginapi.Device
|
||||
queryKubelet bool
|
||||
kubeletClient *client.KubeletClient
|
||||
|
||||
server *grpc.Server
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
|
||||
func NewNvidiaDevicePlugin(mps, healthCheck, queryKubelet bool, client *client.KubeletClient) (*NvidiaDevicePlugin, error) {
|
||||
devs, devNameMap := getDevices()
|
||||
devList := []string{}
|
||||
|
||||
for dev, _ := range devNameMap {
|
||||
devList = append(devList, dev)
|
||||
}
|
||||
|
||||
log.Infof("Device Map: %v", devNameMap)
|
||||
log.Infof("Device List: %v", devList)
|
||||
|
||||
err := patchGPUCount(len(devList))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
disableCGPUIsolation, err := disableCGPUIsolationOrNot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &NvidiaDevicePlugin{
|
||||
devs: devs,
|
||||
realDevNames: devList,
|
||||
devNameMap: devNameMap,
|
||||
socket: serverSock,
|
||||
mps: mps,
|
||||
healthCheck: healthCheck,
|
||||
disableCGPUIsolation: disableCGPUIsolation,
|
||||
stop: make(chan struct{}),
|
||||
health: make(chan *pluginapi.Device),
|
||||
queryKubelet: queryKubelet,
|
||||
kubeletClient: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) GetDeviceNameByIndex(index uint) (name string, found bool) {
|
||||
if len(m.devIndxMap) == 0 {
|
||||
m.devIndxMap = map[uint]string{}
|
||||
for k, v := range m.devNameMap {
|
||||
m.devIndxMap[v] = k
|
||||
}
|
||||
log.Infof("Get devIndexMap: %v", m.devIndxMap)
|
||||
}
|
||||
|
||||
name, found = m.devIndxMap[index]
|
||||
return name, found
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
|
||||
return &pluginapi.DevicePluginOptions{}, nil
|
||||
}
|
||||
|
||||
// dial establishes the gRPC communication with the registered device plugin.
|
||||
func dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
|
||||
c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
|
||||
grpc.WithTimeout(timeout),
|
||||
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("unix", addr, timeout)
|
||||
}),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Start starts the gRPC server of the device plugin
|
||||
func (m *NvidiaDevicePlugin) Start() error {
|
||||
err := m.cleanup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sock, err := net.Listen("unix", m.socket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
pluginapi.RegisterDevicePluginServer(m.server, m)
|
||||
|
||||
go m.server.Serve(sock)
|
||||
|
||||
// Wait for server to start by launching a blocking connexion
|
||||
conn, err := dial(m.socket, 5*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
conn.Close()
|
||||
|
||||
go m.healthcheck()
|
||||
|
||||
lastAllocateTime = time.Now()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the gRPC server
|
||||
func (m *NvidiaDevicePlugin) Stop() error {
|
||||
if m.server == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.server.Stop()
|
||||
m.server = nil
|
||||
close(m.stop)
|
||||
|
||||
return m.cleanup()
|
||||
}
|
||||
|
||||
// Register registers the device plugin for the given resourceName with Kubelet.
|
||||
func (m *NvidiaDevicePlugin) Register(kubeletEndpoint, resourceName string) error {
|
||||
conn, err := dial(kubeletEndpoint, 5*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
client := pluginapi.NewRegistrationClient(conn)
|
||||
reqt := &pluginapi.RegisterRequest{
|
||||
Version: pluginapi.Version,
|
||||
Endpoint: path.Base(m.socket),
|
||||
ResourceName: resourceName,
|
||||
}
|
||||
|
||||
_, err = client.Register(context.Background(), reqt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListAndWatch lists devices and update that list according to the health status
|
||||
func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-m.stop:
|
||||
return nil
|
||||
case d := <-m.health:
|
||||
// FIXME: there is no way to recover from the Unhealthy state.
|
||||
d.Health = pluginapi.Unhealthy
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) unhealthy(dev *pluginapi.Device) {
|
||||
m.health <- dev
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
|
||||
return &pluginapi.PreStartContainerResponse{}, nil
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) cleanup() error {
|
||||
if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NvidiaDevicePlugin) healthcheck() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
var xids chan *pluginapi.Device
|
||||
if m.healthCheck {
|
||||
xids = make(chan *pluginapi.Device)
|
||||
go watchXIDs(ctx, m.devs, xids)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-m.stop:
|
||||
cancel()
|
||||
return
|
||||
case dev := <-xids:
|
||||
m.unhealthy(dev)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serve starts the gRPC server and register the device plugin to Kubelet
|
||||
func (m *NvidiaDevicePlugin) Serve() error {
|
||||
err := m.Start()
|
||||
if err != nil {
|
||||
log.Infof("Could not start device plugin: %s", err)
|
||||
return err
|
||||
}
|
||||
log.Infoln("Starting to serve on", m.socket)
|
||||
|
||||
err = m.Register(pluginapi.KubeletSocket, resourceName)
|
||||
if err != nil {
|
||||
log.Infof("Could not register device plugin: %s", err)
|
||||
m.Stop()
|
||||
return err
|
||||
}
|
||||
log.Infoln("Registered device plugin with Kubelet")
|
||||
|
||||
return nil
|
||||
}
|
32
gpushare-device-plugin/pkg/gpu/nvidia/watchers.go
Normal file
32
gpushare-device-plugin/pkg/gpu/nvidia/watchers.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
)
|
||||
|
||||
func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
err = watcher.Add(f)
|
||||
if err != nil {
|
||||
watcher.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return watcher, nil
|
||||
}
|
||||
|
||||
func newOSWatcher(sigs ...os.Signal) chan os.Signal {
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, sigs...)
|
||||
|
||||
return sigChan
|
||||
}
|
Reference in New Issue
Block a user