synchronization
This commit is contained in:
		
							
								
								
									
										198
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/allocate.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/allocate.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,198 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
	"golang.org/x/net/context"
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/types"
 | 
			
		||||
	pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	clientTimeout    = 30 * time.Second
 | 
			
		||||
	lastAllocateTime time.Time
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// create docker client
 | 
			
		||||
func init() {
 | 
			
		||||
	kubeInit()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func buildErrResponse(reqs *pluginapi.AllocateRequest, podReqGPU uint) *pluginapi.AllocateResponse {
 | 
			
		||||
	responses := pluginapi.AllocateResponse{}
 | 
			
		||||
	for _, req := range reqs.ContainerRequests {
 | 
			
		||||
		response := pluginapi.ContainerAllocateResponse{
 | 
			
		||||
			Envs: map[string]string{
 | 
			
		||||
				envNVGPU:               fmt.Sprintf("no-gpu-has-%d%s-to-run", podReqGPU, metric),
 | 
			
		||||
				EnvResourceIndex:       fmt.Sprintf("-1"),
 | 
			
		||||
				EnvResourceByPod:       fmt.Sprintf("%d", podReqGPU),
 | 
			
		||||
				EnvResourceByContainer: fmt.Sprintf("%d", uint(len(req.DevicesIDs))),
 | 
			
		||||
				EnvResourceByDev:       fmt.Sprintf("%d", getGPUMemory()),
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
		responses.ContainerResponses = append(responses.ContainerResponses, &response)
 | 
			
		||||
	}
 | 
			
		||||
	return &responses
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Allocate which return list of devices.
 | 
			
		||||
func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
 | 
			
		||||
	reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
 | 
			
		||||
	responses := pluginapi.AllocateResponse{}
 | 
			
		||||
 | 
			
		||||
	log.Infoln("----Allocating GPU for gpu mem is started----")
 | 
			
		||||
	var (
 | 
			
		||||
		podReqGPU uint
 | 
			
		||||
		found     bool
 | 
			
		||||
		assumePod *v1.Pod
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	// podReqGPU = uint(0)
 | 
			
		||||
	for _, req := range reqs.ContainerRequests {
 | 
			
		||||
		podReqGPU += uint(len(req.DevicesIDs))
 | 
			
		||||
	}
 | 
			
		||||
	log.Infof("RequestPodGPUs: %d", podReqGPU)
 | 
			
		||||
 | 
			
		||||
	m.Lock()
 | 
			
		||||
	defer m.Unlock()
 | 
			
		||||
	log.Infoln("checking...")
 | 
			
		||||
	pods, err := getCandidatePods(m.queryKubelet, m.kubeletClient)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Infof("invalid allocation requst: Failed to find candidate pods due to %v", err)
 | 
			
		||||
		return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if log.V(4) {
 | 
			
		||||
		for _, pod := range pods {
 | 
			
		||||
			log.Infof("Pod %s in ns %s request GPU Memory %d with timestamp %v",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				getGPUMemoryFromPodResource(pod),
 | 
			
		||||
				getAssumeTimeFromPodAnnotation(pod))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, pod := range pods {
 | 
			
		||||
		if getGPUMemoryFromPodResource(pod) == podReqGPU {
 | 
			
		||||
			log.Infof("Found Assumed GPU shared Pod %s in ns %s with GPU Memory %d",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				podReqGPU)
 | 
			
		||||
			assumePod = pod
 | 
			
		||||
			found = true
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if found {
 | 
			
		||||
		id := getGPUIDFromPodAnnotation(assumePod)
 | 
			
		||||
		if id < 0 {
 | 
			
		||||
			log.Warningf("Failed to get the dev ", assumePod)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		candidateDevID := ""
 | 
			
		||||
		if id >= 0 {
 | 
			
		||||
			ok := false
 | 
			
		||||
			candidateDevID, ok = m.GetDeviceNameByIndex(uint(id))
 | 
			
		||||
			if !ok {
 | 
			
		||||
				log.Warningf("Failed to find the dev for pod %v because it's not able to find dev with index %d",
 | 
			
		||||
					assumePod,
 | 
			
		||||
					id)
 | 
			
		||||
				id = -1
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if id < 0 {
 | 
			
		||||
			return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
		}
 | 
			
		||||
		log.Infof("gpu index %v,uuid: %v", id, candidateDevID)
 | 
			
		||||
		// 1. Create container requests
 | 
			
		||||
		for _, req := range reqs.ContainerRequests {
 | 
			
		||||
			reqGPU := uint(len(req.DevicesIDs))
 | 
			
		||||
			response := pluginapi.ContainerAllocateResponse{
 | 
			
		||||
				Envs: map[string]string{
 | 
			
		||||
					envNVGPU:               fmt.Sprintf("%v", id),
 | 
			
		||||
					EnvResourceIndex:       fmt.Sprintf("%d", id),
 | 
			
		||||
					EnvResourceByPod:       fmt.Sprintf("%d", podReqGPU),
 | 
			
		||||
					EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
 | 
			
		||||
					EnvResourceByDev:       fmt.Sprintf("%d", getGPUMemory()),
 | 
			
		||||
				},
 | 
			
		||||
			}
 | 
			
		||||
			if m.disableCGPUIsolation {
 | 
			
		||||
				response.Envs["CGPU_DISABLE"] = "true"
 | 
			
		||||
			}
 | 
			
		||||
			responses.ContainerResponses = append(responses.ContainerResponses, &response)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// 2. Update Pod spec
 | 
			
		||||
		patchedAnnotationBytes, err := patchPodAnnotationSpecAssigned()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
		}
 | 
			
		||||
		_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			// the object has been modified; please apply your changes to the latest version and try again
 | 
			
		||||
			if err.Error() == OptimisticLockErrorMsg {
 | 
			
		||||
				// retry
 | 
			
		||||
				_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					log.Warningf("Failed due to %v", err)
 | 
			
		||||
					return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				log.Warningf("Failed due to %v", err)
 | 
			
		||||
				return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	} else if len(m.devNameMap) == 1 {
 | 
			
		||||
		var devName string
 | 
			
		||||
		var devIndex uint
 | 
			
		||||
		for d, index := range m.devNameMap {
 | 
			
		||||
			devName = d
 | 
			
		||||
			devIndex = index
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		log.Infof("this node has only one gpu device,skip to search pod and directly specify the device  %v(%v) for container", devIndex, devName)
 | 
			
		||||
		for _, req := range reqs.ContainerRequests {
 | 
			
		||||
			reqGPU := uint(len(req.DevicesIDs))
 | 
			
		||||
			response := pluginapi.ContainerAllocateResponse{
 | 
			
		||||
				Envs: map[string]string{
 | 
			
		||||
					envNVGPU:               devName,
 | 
			
		||||
					EnvResourceIndex:       fmt.Sprintf("%d", devIndex),
 | 
			
		||||
					EnvResourceByPod:       fmt.Sprintf("%d", podReqGPU),
 | 
			
		||||
					EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
 | 
			
		||||
					EnvResourceByDev:       fmt.Sprintf("%d", getGPUMemory()),
 | 
			
		||||
				},
 | 
			
		||||
			}
 | 
			
		||||
			if m.disableCGPUIsolation {
 | 
			
		||||
				response.Envs["CGPU_DISABLE"] = "true"
 | 
			
		||||
			}
 | 
			
		||||
			responses.ContainerResponses = append(responses.ContainerResponses, &response)
 | 
			
		||||
		}
 | 
			
		||||
		log.Infof("get allocated GPUs info %v", responses)
 | 
			
		||||
		return &responses, nil
 | 
			
		||||
 | 
			
		||||
	} else {
 | 
			
		||||
		log.Warningf("invalid allocation requst: request GPU memory %d can't be satisfied.",
 | 
			
		||||
			podReqGPU)
 | 
			
		||||
		// return &responses, fmt.Errorf("invalid allocation requst: request GPU memory %d can't be satisfied", reqGPU)
 | 
			
		||||
		return buildErrResponse(reqs, podReqGPU), nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	podName := ""
 | 
			
		||||
	if assumePod != nil {
 | 
			
		||||
		podName = assumePod.Name
 | 
			
		||||
	}
 | 
			
		||||
	log.Infof("pod %v, new allocated GPUs info %v", podName, &responses)
 | 
			
		||||
	log.Infof("----Allocating GPU for gpu mem for %v is ended----", podName)
 | 
			
		||||
	// // Add this to make sure the container is created at least
 | 
			
		||||
	// currentTime := time.Now()
 | 
			
		||||
 | 
			
		||||
	// currentTime.Sub(lastAllocateTime)
 | 
			
		||||
 | 
			
		||||
	return &responses, nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										36
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/const.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/const.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// MemoryUnit describes GPU Memory, now only supports Gi, Mi
 | 
			
		||||
type MemoryUnit string
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	resourceName  = "rainbond.com/gpu-mem"
 | 
			
		||||
	resourceCount = "rainbond.com/gpu-count"
 | 
			
		||||
	serverSock    = pluginapi.DevicePluginPath + "aliyungpushare.sock"
 | 
			
		||||
 | 
			
		||||
	OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
 | 
			
		||||
 | 
			
		||||
	allHealthChecks             = "xids"
 | 
			
		||||
	containerTypeLabelKey       = "io.kubernetes.docker.type"
 | 
			
		||||
	containerTypeLabelSandbox   = "podsandbox"
 | 
			
		||||
	containerTypeLabelContainer = "container"
 | 
			
		||||
	containerLogPathLabelKey    = "io.kubernetes.container.logpath"
 | 
			
		||||
	sandboxIDLabelKey           = "io.kubernetes.sandbox.id"
 | 
			
		||||
 | 
			
		||||
	envNVGPU                   = "NVIDIA_VISIBLE_DEVICES"
 | 
			
		||||
	EnvResourceIndex           = "ALIYUN_COM_GPU_MEM_IDX"
 | 
			
		||||
	EnvResourceByPod           = "ALIYUN_COM_GPU_MEM_POD"
 | 
			
		||||
	EnvResourceByContainer     = "ALIYUN_COM_GPU_MEM_CONTAINER"
 | 
			
		||||
	EnvResourceByDev           = "ALIYUN_COM_GPU_MEM_DEV"
 | 
			
		||||
	EnvAssignedFlag            = "ALIYUN_COM_GPU_MEM_ASSIGNED"
 | 
			
		||||
	EnvResourceAssumeTime      = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
 | 
			
		||||
	EnvResourceAssignTime      = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
 | 
			
		||||
	EnvNodeLabelForDisableCGPU = "cgpu.disable.isolation"
 | 
			
		||||
 | 
			
		||||
	GiBPrefix = MemoryUnit("GiB")
 | 
			
		||||
	MiBPrefix = MemoryUnit("MiB")
 | 
			
		||||
)
 | 
			
		||||
							
								
								
									
										30
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/coredump.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/coredump.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"runtime"
 | 
			
		||||
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func StackTrace(all bool) string {
 | 
			
		||||
	buf := make([]byte, 10240)
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		size := runtime.Stack(buf, all)
 | 
			
		||||
 | 
			
		||||
		if size == len(buf) {
 | 
			
		||||
			buf = make([]byte, len(buf)<<1)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		break
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return string(buf)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func coredump(fileName string) {
 | 
			
		||||
	log.Infoln("Dump stacktrace to ", fileName)
 | 
			
		||||
	ioutil.WriteFile(fileName, []byte(StackTrace(true)), 0644)
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										111
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/gpumanager.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/gpumanager.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"os"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
 | 
			
		||||
	"github.com/fsnotify/fsnotify"
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
	pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type sharedGPUManager struct {
 | 
			
		||||
	enableMPS     bool
 | 
			
		||||
	healthCheck   bool
 | 
			
		||||
	queryKubelet  bool
 | 
			
		||||
	kubeletClient *client.KubeletClient
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewSharedGPUManager(enableMPS, healthCheck, queryKubelet bool, bp MemoryUnit, client *client.KubeletClient) *sharedGPUManager {
 | 
			
		||||
	metric = bp
 | 
			
		||||
	return &sharedGPUManager{
 | 
			
		||||
		enableMPS:     enableMPS,
 | 
			
		||||
		healthCheck:   healthCheck,
 | 
			
		||||
		queryKubelet:  queryKubelet,
 | 
			
		||||
		kubeletClient: client,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (ngm *sharedGPUManager) Run() error {
 | 
			
		||||
	log.V(1).Infoln("Loading NVML")
 | 
			
		||||
 | 
			
		||||
	if err := nvml.Init(); err != nil {
 | 
			
		||||
		log.V(1).Infof("Failed to initialize NVML: %s.", err)
 | 
			
		||||
		log.V(1).Infof("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
 | 
			
		||||
		select {}
 | 
			
		||||
	}
 | 
			
		||||
	defer func() { log.V(1).Infoln("Shutdown of NVML returned:", nvml.Shutdown()) }()
 | 
			
		||||
 | 
			
		||||
	log.V(1).Infoln("Fetching devices.")
 | 
			
		||||
	if getDeviceCount() == uint(0) {
 | 
			
		||||
		log.V(1).Infoln("No devices found. Waiting indefinitely.")
 | 
			
		||||
		select {}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	log.V(1).Infoln("Starting FS watcher.")
 | 
			
		||||
	watcher, err := newFSWatcher(pluginapi.DevicePluginPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.V(1).Infoln("Failed to created FS watcher.")
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	defer watcher.Close()
 | 
			
		||||
 | 
			
		||||
	log.V(1).Infoln("Starting OS watcher.")
 | 
			
		||||
	sigs := newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
 | 
			
		||||
 | 
			
		||||
	restart := true
 | 
			
		||||
	var devicePlugin *NvidiaDevicePlugin
 | 
			
		||||
 | 
			
		||||
L:
 | 
			
		||||
	for {
 | 
			
		||||
		if restart {
 | 
			
		||||
			if devicePlugin != nil {
 | 
			
		||||
				devicePlugin.Stop()
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			devicePlugin, err = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck, ngm.queryKubelet, ngm.kubeletClient)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				log.Warningf("Failed to get device plugin due to %v", err)
 | 
			
		||||
				os.Exit(1)
 | 
			
		||||
			} else if err = devicePlugin.Serve(); err != nil {
 | 
			
		||||
				log.Warningf("Failed to start device plugin due to %v", err)
 | 
			
		||||
				os.Exit(2)
 | 
			
		||||
			} else {
 | 
			
		||||
				restart = false
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		select {
 | 
			
		||||
		case event := <-watcher.Events:
 | 
			
		||||
			if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
 | 
			
		||||
				log.V(1).Infof("inotify: %s created, restarting.", pluginapi.KubeletSocket)
 | 
			
		||||
				restart = true
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		case err := <-watcher.Errors:
 | 
			
		||||
			log.Warningf("inotify: %s", err)
 | 
			
		||||
 | 
			
		||||
		case s := <-sigs:
 | 
			
		||||
			switch s {
 | 
			
		||||
			case syscall.SIGHUP:
 | 
			
		||||
				log.V(1).Infoln("Received SIGHUP, restarting.")
 | 
			
		||||
				restart = true
 | 
			
		||||
			case syscall.SIGQUIT:
 | 
			
		||||
				t := time.Now()
 | 
			
		||||
				timestamp := fmt.Sprint(t.Format("20060102150405"))
 | 
			
		||||
				log.Infoln("generate core dump")
 | 
			
		||||
				coredump("/etc/kubernetes/go_" + timestamp + ".txt")
 | 
			
		||||
			default:
 | 
			
		||||
				log.V(1).Infof("Received signal \"%v\", shutting down.", s)
 | 
			
		||||
				devicePlugin.Stop()
 | 
			
		||||
				break L
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										152
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/nvidia.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/nvidia.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,152 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
 | 
			
		||||
	"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/net/context"
 | 
			
		||||
	pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	gpuMemory uint
 | 
			
		||||
	metric    MemoryUnit
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func check(err error) {
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Fatalln("Fatal:", err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func generateFakeDeviceID(realID string, fakeCounter uint) string {
 | 
			
		||||
	return fmt.Sprintf("%s-_-%d", realID, fakeCounter)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func extractRealDeviceID(fakeDeviceID string) string {
 | 
			
		||||
	return strings.Split(fakeDeviceID, "-_-")[0]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func setGPUMemory(raw uint) {
 | 
			
		||||
	v := raw
 | 
			
		||||
	if metric == GiBPrefix {
 | 
			
		||||
		v = raw / 1024
 | 
			
		||||
	}
 | 
			
		||||
	gpuMemory = v
 | 
			
		||||
	log.Infof("set gpu memory: %d", gpuMemory)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getGPUMemory() uint {
 | 
			
		||||
	return gpuMemory
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getDeviceCount() uint {
 | 
			
		||||
	n, err := nvml.GetDeviceCount()
 | 
			
		||||
	check(err)
 | 
			
		||||
	return n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getDevices() ([]*pluginapi.Device, map[string]uint) {
 | 
			
		||||
	n, err := nvml.GetDeviceCount()
 | 
			
		||||
	check(err)
 | 
			
		||||
 | 
			
		||||
	var devs []*pluginapi.Device
 | 
			
		||||
	realDevNames := map[string]uint{}
 | 
			
		||||
	for i := uint(0); i < n; i++ {
 | 
			
		||||
		d, err := nvml.NewDevice(i)
 | 
			
		||||
		check(err)
 | 
			
		||||
		// realDevNames = append(realDevNames, d.UUID)
 | 
			
		||||
		var id uint
 | 
			
		||||
		log.Infof("Deivce %s's Path is %s", d.UUID, d.Path)
 | 
			
		||||
		_, err = fmt.Sscanf(d.Path, "/dev/nvidia%d", &id)
 | 
			
		||||
		check(err)
 | 
			
		||||
		realDevNames[d.UUID] = id
 | 
			
		||||
		// var KiB uint64 = 1024
 | 
			
		||||
		log.Infof("# device Memory: %d", uint(*d.Memory))
 | 
			
		||||
		if getGPUMemory() == uint(0) {
 | 
			
		||||
			setGPUMemory(uint(*d.Memory))
 | 
			
		||||
		}
 | 
			
		||||
		for j := uint(0); j < getGPUMemory(); j++ {
 | 
			
		||||
			fakeID := generateFakeDeviceID(d.UUID, j)
 | 
			
		||||
			if j == 0 {
 | 
			
		||||
				log.Infoln("# Add first device ID: " + fakeID)
 | 
			
		||||
			}
 | 
			
		||||
			if j == getGPUMemory()-1 {
 | 
			
		||||
				log.Infoln("# Add last device ID: " + fakeID)
 | 
			
		||||
			}
 | 
			
		||||
			devs = append(devs, &pluginapi.Device{
 | 
			
		||||
				ID:     fakeID,
 | 
			
		||||
				Health: pluginapi.Healthy,
 | 
			
		||||
			})
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return devs, realDevNames
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func deviceExists(devs []*pluginapi.Device, id string) bool {
 | 
			
		||||
	for _, d := range devs {
 | 
			
		||||
		if d.ID == id {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func watchXIDs(ctx context.Context, devs []*pluginapi.Device, xids chan<- *pluginapi.Device) {
 | 
			
		||||
	eventSet := nvml.NewEventSet()
 | 
			
		||||
	defer nvml.DeleteEventSet(eventSet)
 | 
			
		||||
 | 
			
		||||
	for _, d := range devs {
 | 
			
		||||
		realDeviceID := extractRealDeviceID(d.ID)
 | 
			
		||||
		err := nvml.RegisterEventForDevice(eventSet, nvml.XidCriticalError, realDeviceID)
 | 
			
		||||
		if err != nil && strings.HasSuffix(err.Error(), "Not Supported") {
 | 
			
		||||
			log.Infof("Warning: %s (%s) is too old to support healthchecking: %s. Marking it unhealthy.", realDeviceID, d.ID, err)
 | 
			
		||||
 | 
			
		||||
			xids <- d
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Fatalf("Fatal error:", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		select {
 | 
			
		||||
		case <-ctx.Done():
 | 
			
		||||
			return
 | 
			
		||||
		default:
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		e, err := nvml.WaitForEvent(eventSet, 5000)
 | 
			
		||||
		if err != nil && e.Etype != nvml.XidCriticalError {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// FIXME: formalize the full list and document it.
 | 
			
		||||
		// http://docs.nvidia.com/deploy/xid-errors/index.html#topic_4
 | 
			
		||||
		// Application errors: the GPU should still be healthy
 | 
			
		||||
		if e.Edata == 31 || e.Edata == 43 || e.Edata == 45 {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if e.UUID == nil || len(*e.UUID) == 0 {
 | 
			
		||||
			// All devices are unhealthy
 | 
			
		||||
			for _, d := range devs {
 | 
			
		||||
				xids <- d
 | 
			
		||||
			}
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for _, d := range devs {
 | 
			
		||||
			if extractRealDeviceID(d.ID) == *e.UUID {
 | 
			
		||||
				xids <- d
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										262
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/podmanager.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/podmanager.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,262 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
			
		||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/fields"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/labels"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/types"
 | 
			
		||||
	"k8s.io/client-go/kubernetes"
 | 
			
		||||
	"k8s.io/client-go/rest"
 | 
			
		||||
	"k8s.io/client-go/tools/clientcmd"
 | 
			
		||||
	nodeutil "k8s.io/kubernetes/pkg/util/node"
 | 
			
		||||
	"os"
 | 
			
		||||
	"sort"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	clientset *kubernetes.Clientset
 | 
			
		||||
	nodeName  string
 | 
			
		||||
	retries   = 8
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func kubeInit() {
 | 
			
		||||
	kubeconfigFile := os.Getenv("KUBECONFIG")
 | 
			
		||||
	var err error
 | 
			
		||||
	var config *rest.Config
 | 
			
		||||
 | 
			
		||||
	if _, err = os.Stat(kubeconfigFile); err != nil {
 | 
			
		||||
		log.V(5).Infof("kubeconfig %s failed to find due to %v", kubeconfigFile, err)
 | 
			
		||||
		config, err = rest.InClusterConfig()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Fatalf("Failed due to %v", err)
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		config, err = clientcmd.BuildConfigFromFlags("", kubeconfigFile)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Fatalf("Failed due to %v", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	clientset, err = kubernetes.NewForConfig(config)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Fatalf("Failed due to %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	nodeName = os.Getenv("NODE_NAME")
 | 
			
		||||
	if nodeName == "" {
 | 
			
		||||
		log.Fatalln("Please set env NODE_NAME")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func disableCGPUIsolationOrNot() (bool, error) {
 | 
			
		||||
	disable := false
 | 
			
		||||
	node, err := clientset.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return disable, err
 | 
			
		||||
	}
 | 
			
		||||
	labels := node.ObjectMeta.Labels
 | 
			
		||||
	value, ok := labels[EnvNodeLabelForDisableCGPU]
 | 
			
		||||
	if ok && value == "true" {
 | 
			
		||||
		log.Infof("enable gpusharing mode and disable cgpu mode")
 | 
			
		||||
		disable = true
 | 
			
		||||
	}
 | 
			
		||||
	return disable, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func patchGPUCount(gpuCount int) error {
 | 
			
		||||
	node, err := clientset.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if val, ok := node.Status.Capacity[resourceCount]; ok {
 | 
			
		||||
		if val.Value() == int64(gpuCount) {
 | 
			
		||||
			log.Infof("No need to update Capacity %s", resourceCount)
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	newNode := node.DeepCopy()
 | 
			
		||||
	newNode.Status.Capacity[resourceCount] = *resource.NewQuantity(int64(gpuCount), resource.DecimalSI)
 | 
			
		||||
	newNode.Status.Allocatable[resourceCount] = *resource.NewQuantity(int64(gpuCount), resource.DecimalSI)
 | 
			
		||||
	// content := fmt.Sprintf(`[{"op": "add", "path": "/status/capacity/aliyun.com~gpu-count", "value": "%d"}]`, gpuCount)
 | 
			
		||||
	// _, err = clientset.CoreV1().Nodes().PatchStatus(nodeName, []byte(content))
 | 
			
		||||
	_, _, err = nodeutil.PatchNodeStatus(clientset.CoreV1(), types.NodeName(nodeName), node, newNode)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Infof("Failed to update Capacity %s.", resourceCount)
 | 
			
		||||
	} else {
 | 
			
		||||
		log.Infof("Updated Capacity %s successfully.", resourceCount)
 | 
			
		||||
	}
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPodList(kubeletClient *client.KubeletClient) (*v1.PodList, error) {
 | 
			
		||||
	podList, err := kubeletClient.GetNodeRunningPods()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	list, _ := json.Marshal(podList)
 | 
			
		||||
	log.V(8).Infof("get pods list %v", string(list))
 | 
			
		||||
 | 
			
		||||
	resultPodList := &v1.PodList{}
 | 
			
		||||
	for _, metaPod := range podList.Items {
 | 
			
		||||
		if metaPod.Status.Phase != v1.PodPending {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		resultPodList.Items = append(resultPodList.Items, metaPod)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(resultPodList.Items) == 0 {
 | 
			
		||||
		return nil, fmt.Errorf("not found pending pod")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return resultPodList, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPodListsByQueryKubelet(kubeletClient *client.KubeletClient) (*v1.PodList, error) {
 | 
			
		||||
	podList, err := getPodList(kubeletClient)
 | 
			
		||||
	for i := 0; i < retries && err != nil; i++ {
 | 
			
		||||
		podList, err = getPodList(kubeletClient)
 | 
			
		||||
		log.Warningf("failed to get pending pod list, retry")
 | 
			
		||||
		time.Sleep(100 * time.Millisecond)
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Warningf("not found from kubelet /pods api, start to list apiserver")
 | 
			
		||||
		podList, err = getPodListsByListAPIServer()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return podList, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPodListsByListAPIServer() (*v1.PodList, error) {
 | 
			
		||||
	selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName, "status.phase": "Pending"})
 | 
			
		||||
	podList, err := clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
 | 
			
		||||
		FieldSelector: selector.String(),
 | 
			
		||||
		LabelSelector: labels.Everything().String(),
 | 
			
		||||
	})
 | 
			
		||||
	for i := 0; i < 3 && err != nil; i++ {
 | 
			
		||||
		podList, err = clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
 | 
			
		||||
			FieldSelector: selector.String(),
 | 
			
		||||
			LabelSelector: labels.Everything().String(),
 | 
			
		||||
		})
 | 
			
		||||
		time.Sleep(1 * time.Second)
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return podList, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPendingPodsInNode(queryKubelet bool, kubeletClient *client.KubeletClient) ([]v1.Pod, error) {
 | 
			
		||||
	// pods, err := m.lister.List(labels.Everything())
 | 
			
		||||
	// if err != nil {
 | 
			
		||||
	// 	return nil, err
 | 
			
		||||
	// }
 | 
			
		||||
	pods := []v1.Pod{}
 | 
			
		||||
 | 
			
		||||
	podIDMap := map[types.UID]bool{}
 | 
			
		||||
 | 
			
		||||
	var podList *v1.PodList
 | 
			
		||||
	var err error
 | 
			
		||||
	if queryKubelet {
 | 
			
		||||
		podList, err = getPodListsByQueryKubelet(kubeletClient)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		podList, err = getPodListsByListAPIServer()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	log.V(5).Infof("all pod list %v", podList.Items)
 | 
			
		||||
 | 
			
		||||
	// if log.V(5) {
 | 
			
		||||
	for _, pod := range podList.Items {
 | 
			
		||||
		if pod.Spec.NodeName != nodeName {
 | 
			
		||||
			log.Warningf("Pod name %s in ns %s is not assigned to node %s as expected, it's placed on node %s ",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				nodeName,
 | 
			
		||||
				pod.Spec.NodeName)
 | 
			
		||||
		} else {
 | 
			
		||||
			log.Infof("list pod %s in ns %s in node %s and status is %s",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				nodeName,
 | 
			
		||||
				pod.Status.Phase,
 | 
			
		||||
			)
 | 
			
		||||
			if _, ok := podIDMap[pod.UID]; !ok {
 | 
			
		||||
				pods = append(pods, pod)
 | 
			
		||||
				podIDMap[pod.UID] = true
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	// }
 | 
			
		||||
 | 
			
		||||
	return pods, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// pick up the gpushare pod with assigned status is false, and
 | 
			
		||||
func getCandidatePods(queryKubelet bool, client *client.KubeletClient) ([]*v1.Pod, error) {
 | 
			
		||||
	candidatePods := []*v1.Pod{}
 | 
			
		||||
	allPods, err := getPendingPodsInNode(queryKubelet, client)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return candidatePods, err
 | 
			
		||||
	}
 | 
			
		||||
	for _, pod := range allPods {
 | 
			
		||||
		current := pod
 | 
			
		||||
		if isGPUMemoryAssumedPod(¤t) {
 | 
			
		||||
			candidatePods = append(candidatePods, ¤t)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if log.V(4) {
 | 
			
		||||
		for _, pod := range candidatePods {
 | 
			
		||||
			log.Infof("candidate pod %s in ns %s with timestamp %d is found.",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				getAssumeTimeFromPodAnnotation(pod))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return makePodOrderdByAge(candidatePods), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// make the pod ordered by GPU assumed time
 | 
			
		||||
func makePodOrderdByAge(pods []*v1.Pod) []*v1.Pod {
 | 
			
		||||
	newPodList := make(orderedPodByAssumeTime, 0, len(pods))
 | 
			
		||||
	for _, v := range pods {
 | 
			
		||||
		newPodList = append(newPodList, v)
 | 
			
		||||
	}
 | 
			
		||||
	sort.Sort(newPodList)
 | 
			
		||||
	return []*v1.Pod(newPodList)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type orderedPodByAssumeTime []*v1.Pod
 | 
			
		||||
 | 
			
		||||
func (this orderedPodByAssumeTime) Len() int {
 | 
			
		||||
	return len(this)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this orderedPodByAssumeTime) Less(i, j int) bool {
 | 
			
		||||
	return getAssumeTimeFromPodAnnotation(this[i]) <= getAssumeTimeFromPodAnnotation(this[j])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this orderedPodByAssumeTime) Swap(i, j int) {
 | 
			
		||||
	this[i], this[j] = this[j], this[i]
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										182
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/podutils.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/podutils.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,182 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// update pod env with assigned status
 | 
			
		||||
func updatePodAnnotations(oldPod *v1.Pod) (newPod *v1.Pod) {
 | 
			
		||||
	newPod = oldPod.DeepCopy()
 | 
			
		||||
	if len(newPod.ObjectMeta.Annotations) == 0 {
 | 
			
		||||
		newPod.ObjectMeta.Annotations = map[string]string{}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	newPod.ObjectMeta.Annotations[EnvAssignedFlag] = "true"
 | 
			
		||||
	newPod.ObjectMeta.Annotations[EnvResourceAssumeTime] = fmt.Sprintf("%d", now.UnixNano())
 | 
			
		||||
 | 
			
		||||
	return newPod
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func patchPodAnnotationSpecAssigned() ([]byte, error) {
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	patchAnnotations := map[string]interface{}{
 | 
			
		||||
		"metadata": map[string]map[string]string{"annotations": {
 | 
			
		||||
			EnvAssignedFlag:       "true",
 | 
			
		||||
			EnvResourceAssumeTime: fmt.Sprintf("%d", now.UnixNano()),
 | 
			
		||||
		}}}
 | 
			
		||||
	return json.Marshal(patchAnnotations)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getGPUIDFromPodAnnotation(pod *v1.Pod) (id int) {
 | 
			
		||||
	var err error
 | 
			
		||||
	id = -1
 | 
			
		||||
 | 
			
		||||
	if len(pod.ObjectMeta.Annotations) > 0 {
 | 
			
		||||
		value, found := pod.ObjectMeta.Annotations[EnvResourceIndex]
 | 
			
		||||
		if found {
 | 
			
		||||
			id, err = strconv.Atoi(value)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				log.Warningf("Failed to parse dev id %s due to %v for pod %s in ns %s",
 | 
			
		||||
					value,
 | 
			
		||||
					err,
 | 
			
		||||
					pod.Name,
 | 
			
		||||
					pod.Namespace)
 | 
			
		||||
				id = -1
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			log.Warningf("Failed to get dev id %s for pod %s in ns %s",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return id
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// get assumed timestamp
 | 
			
		||||
func getAssumeTimeFromPodAnnotation(pod *v1.Pod) (assumeTime uint64) {
 | 
			
		||||
	if assumeTimeStr, ok := pod.ObjectMeta.Annotations[EnvResourceAssumeTime]; ok {
 | 
			
		||||
		u64, err := strconv.ParseUint(assumeTimeStr, 10, 64)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Warningf("Failed to parse assume Timestamp %s due to %v", assumeTimeStr, err)
 | 
			
		||||
		} else {
 | 
			
		||||
			assumeTime = u64
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return assumeTime
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// determine if the pod is GPU share pod, and is already assumed but not assigned
 | 
			
		||||
func isGPUMemoryAssumedPod(pod *v1.Pod) (assumed bool) {
 | 
			
		||||
	log.V(6).Infof("Determine if the pod %v is GPUSharedAssumed pod", pod)
 | 
			
		||||
	var ok bool
 | 
			
		||||
 | 
			
		||||
	// 1. Check if it's for GPU share
 | 
			
		||||
	if getGPUMemoryFromPodResource(pod) <= 0 {
 | 
			
		||||
		log.V(6).Infof("Pod %s in namespace %s has not GPU Memory Request, so it's not GPUSharedAssumed assumed pod.",
 | 
			
		||||
			pod.Name,
 | 
			
		||||
			pod.Namespace)
 | 
			
		||||
		return assumed
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// 2. Check if it already has assume time
 | 
			
		||||
	if _, ok = pod.ObjectMeta.Annotations[EnvResourceAssumeTime]; !ok {
 | 
			
		||||
		log.V(4).Infof("No assume timestamp for pod %s in namespace %s, so it's not GPUSharedAssumed assumed pod.",
 | 
			
		||||
			pod.Name,
 | 
			
		||||
			pod.Namespace)
 | 
			
		||||
		return assumed
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// 3. Check if it has been assigned already
 | 
			
		||||
	if assigned, ok := pod.ObjectMeta.Annotations[EnvAssignedFlag]; ok {
 | 
			
		||||
 | 
			
		||||
		if assigned == "false" {
 | 
			
		||||
			log.V(4).Infof("Found GPUSharedAssumed assumed pod %s in namespace %s.",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace)
 | 
			
		||||
			assumed = true
 | 
			
		||||
		} else {
 | 
			
		||||
			log.Infof("GPU assigned Flag for pod %s exists in namespace %s and its assigned status is %s, so it's not GPUSharedAssumed assumed pod.",
 | 
			
		||||
				pod.Name,
 | 
			
		||||
				pod.Namespace,
 | 
			
		||||
				assigned)
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		log.Warningf("No GPU assigned Flag for pod %s in namespace %s, so it's not GPUSharedAssumed assumed pod.",
 | 
			
		||||
			pod.Name,
 | 
			
		||||
			pod.Namespace)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return assumed
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get GPU Memory of the Pod
 | 
			
		||||
func getGPUMemoryFromPodResource(pod *v1.Pod) uint {
 | 
			
		||||
	var total uint
 | 
			
		||||
	containers := pod.Spec.Containers
 | 
			
		||||
	for _, container := range containers {
 | 
			
		||||
		if val, ok := container.Resources.Limits[resourceName]; ok {
 | 
			
		||||
			total += uint(val.Value())
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return total
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func podIsNotRunning(pod v1.Pod) bool {
 | 
			
		||||
	status := pod.Status
 | 
			
		||||
	//deletionTimestamp
 | 
			
		||||
	if pod.DeletionTimestamp != nil {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// pod is scheduled but not initialized
 | 
			
		||||
	if status.Phase == v1.PodPending && podConditionTrueOnly(status.Conditions, v1.PodScheduled) {
 | 
			
		||||
		log.Infof("Pod %s only has PodScheduled, is not running", pod.Name)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) || (status.Phase == v1.PodPending && podConditionTrueOnly(status.Conditions, v1.PodScheduled))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// notRunning returns true if every status is terminated or waiting, or the status list
 | 
			
		||||
// is empty.
 | 
			
		||||
func notRunning(statuses []v1.ContainerStatus) bool {
 | 
			
		||||
	for _, status := range statuses {
 | 
			
		||||
		if status.State.Terminated == nil && status.State.Waiting == nil {
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func podConditionTrue(conditions []v1.PodCondition, expect v1.PodConditionType) bool {
 | 
			
		||||
	for _, condition := range conditions {
 | 
			
		||||
		if condition.Type == expect && condition.Status == v1.ConditionTrue {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func podConditionTrueOnly(conditions []v1.PodCondition, expect v1.PodConditionType) bool {
 | 
			
		||||
	if len(conditions) != 1 {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, condition := range conditions {
 | 
			
		||||
		if condition.Type == expect && condition.Status == v1.ConditionTrue {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										241
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/server.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/server.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,241 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"github.com/AliyunContainerService/gpushare-device-plugin/pkg/kubelet/client"
 | 
			
		||||
	"net"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	log "github.com/golang/glog"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/net/context"
 | 
			
		||||
	"google.golang.org/grpc"
 | 
			
		||||
	pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// NvidiaDevicePlugin implements the Kubernetes device plugin API
 | 
			
		||||
type NvidiaDevicePlugin struct {
 | 
			
		||||
	devs                 []*pluginapi.Device
 | 
			
		||||
	realDevNames         []string
 | 
			
		||||
	devNameMap           map[string]uint
 | 
			
		||||
	devIndxMap           map[uint]string
 | 
			
		||||
	socket               string
 | 
			
		||||
	mps                  bool
 | 
			
		||||
	healthCheck          bool
 | 
			
		||||
	disableCGPUIsolation bool
 | 
			
		||||
	stop                 chan struct{}
 | 
			
		||||
	health               chan *pluginapi.Device
 | 
			
		||||
	queryKubelet         bool
 | 
			
		||||
	kubeletClient        *client.KubeletClient
 | 
			
		||||
 | 
			
		||||
	server *grpc.Server
 | 
			
		||||
	sync.RWMutex
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
 | 
			
		||||
func NewNvidiaDevicePlugin(mps, healthCheck, queryKubelet bool, client *client.KubeletClient) (*NvidiaDevicePlugin, error) {
 | 
			
		||||
	devs, devNameMap := getDevices()
 | 
			
		||||
	devList := []string{}
 | 
			
		||||
 | 
			
		||||
	for dev, _ := range devNameMap {
 | 
			
		||||
		devList = append(devList, dev)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	log.Infof("Device Map: %v", devNameMap)
 | 
			
		||||
	log.Infof("Device List: %v", devList)
 | 
			
		||||
 | 
			
		||||
	err := patchGPUCount(len(devList))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	disableCGPUIsolation, err := disableCGPUIsolationOrNot()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return &NvidiaDevicePlugin{
 | 
			
		||||
		devs:                 devs,
 | 
			
		||||
		realDevNames:         devList,
 | 
			
		||||
		devNameMap:           devNameMap,
 | 
			
		||||
		socket:               serverSock,
 | 
			
		||||
		mps:                  mps,
 | 
			
		||||
		healthCheck:          healthCheck,
 | 
			
		||||
		disableCGPUIsolation: disableCGPUIsolation,
 | 
			
		||||
		stop:                 make(chan struct{}),
 | 
			
		||||
		health:               make(chan *pluginapi.Device),
 | 
			
		||||
		queryKubelet:         queryKubelet,
 | 
			
		||||
		kubeletClient:        client,
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) GetDeviceNameByIndex(index uint) (name string, found bool) {
 | 
			
		||||
	if len(m.devIndxMap) == 0 {
 | 
			
		||||
		m.devIndxMap = map[uint]string{}
 | 
			
		||||
		for k, v := range m.devNameMap {
 | 
			
		||||
			m.devIndxMap[v] = k
 | 
			
		||||
		}
 | 
			
		||||
		log.Infof("Get devIndexMap: %v", m.devIndxMap)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	name, found = m.devIndxMap[index]
 | 
			
		||||
	return name, found
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
 | 
			
		||||
	return &pluginapi.DevicePluginOptions{}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// dial establishes the gRPC communication with the registered device plugin.
 | 
			
		||||
func dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
 | 
			
		||||
	c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
 | 
			
		||||
		grpc.WithTimeout(timeout),
 | 
			
		||||
		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
 | 
			
		||||
			return net.DialTimeout("unix", addr, timeout)
 | 
			
		||||
		}),
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return c, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Start starts the gRPC server of the device plugin
 | 
			
		||||
func (m *NvidiaDevicePlugin) Start() error {
 | 
			
		||||
	err := m.cleanup()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sock, err := net.Listen("unix", m.socket)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m.server = grpc.NewServer([]grpc.ServerOption{}...)
 | 
			
		||||
	pluginapi.RegisterDevicePluginServer(m.server, m)
 | 
			
		||||
 | 
			
		||||
	go m.server.Serve(sock)
 | 
			
		||||
 | 
			
		||||
	// Wait for server to start by launching a blocking connexion
 | 
			
		||||
	conn, err := dial(m.socket, 5*time.Second)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	conn.Close()
 | 
			
		||||
 | 
			
		||||
	go m.healthcheck()
 | 
			
		||||
 | 
			
		||||
	lastAllocateTime = time.Now()
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Stop stops the gRPC server
 | 
			
		||||
func (m *NvidiaDevicePlugin) Stop() error {
 | 
			
		||||
	if m.server == nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m.server.Stop()
 | 
			
		||||
	m.server = nil
 | 
			
		||||
	close(m.stop)
 | 
			
		||||
 | 
			
		||||
	return m.cleanup()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Register registers the device plugin for the given resourceName with Kubelet.
 | 
			
		||||
func (m *NvidiaDevicePlugin) Register(kubeletEndpoint, resourceName string) error {
 | 
			
		||||
	conn, err := dial(kubeletEndpoint, 5*time.Second)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	defer conn.Close()
 | 
			
		||||
 | 
			
		||||
	client := pluginapi.NewRegistrationClient(conn)
 | 
			
		||||
	reqt := &pluginapi.RegisterRequest{
 | 
			
		||||
		Version:      pluginapi.Version,
 | 
			
		||||
		Endpoint:     path.Base(m.socket),
 | 
			
		||||
		ResourceName: resourceName,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, err = client.Register(context.Background(), reqt)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ListAndWatch lists devices and update that list according to the health status
 | 
			
		||||
func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
 | 
			
		||||
	s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		select {
 | 
			
		||||
		case <-m.stop:
 | 
			
		||||
			return nil
 | 
			
		||||
		case d := <-m.health:
 | 
			
		||||
			// FIXME: there is no way to recover from the Unhealthy state.
 | 
			
		||||
			d.Health = pluginapi.Unhealthy
 | 
			
		||||
			s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) unhealthy(dev *pluginapi.Device) {
 | 
			
		||||
	m.health <- dev
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
 | 
			
		||||
	return &pluginapi.PreStartContainerResponse{}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) cleanup() error {
 | 
			
		||||
	if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaDevicePlugin) healthcheck() {
 | 
			
		||||
	ctx, cancel := context.WithCancel(context.Background())
 | 
			
		||||
 | 
			
		||||
	var xids chan *pluginapi.Device
 | 
			
		||||
	if m.healthCheck {
 | 
			
		||||
		xids = make(chan *pluginapi.Device)
 | 
			
		||||
		go watchXIDs(ctx, m.devs, xids)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		select {
 | 
			
		||||
		case <-m.stop:
 | 
			
		||||
			cancel()
 | 
			
		||||
			return
 | 
			
		||||
		case dev := <-xids:
 | 
			
		||||
			m.unhealthy(dev)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Serve starts the gRPC server and register the device plugin to Kubelet
 | 
			
		||||
func (m *NvidiaDevicePlugin) Serve() error {
 | 
			
		||||
	err := m.Start()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Infof("Could not start device plugin: %s", err)
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	log.Infoln("Starting to serve on", m.socket)
 | 
			
		||||
 | 
			
		||||
	err = m.Register(pluginapi.KubeletSocket, resourceName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Infof("Could not register device plugin: %s", err)
 | 
			
		||||
		m.Stop()
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	log.Infoln("Registered device plugin with Kubelet")
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										32
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/watchers.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								gpushare-device-plugin/pkg/gpu/nvidia/watchers.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
package nvidia
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"os/signal"
 | 
			
		||||
 | 
			
		||||
	"github.com/fsnotify/fsnotify"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
 | 
			
		||||
	watcher, err := fsnotify.NewWatcher()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, f := range files {
 | 
			
		||||
		err = watcher.Add(f)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			watcher.Close()
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return watcher, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newOSWatcher(sigs ...os.Signal) chan os.Signal {
 | 
			
		||||
	sigChan := make(chan os.Signal, 1)
 | 
			
		||||
	signal.Notify(sigChan, sigs...)
 | 
			
		||||
 | 
			
		||||
	return sigChan
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										134
									
								
								gpushare-device-plugin/pkg/kubelet/client/client.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								gpushare-device-plugin/pkg/kubelet/client/client.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,134 @@
 | 
			
		||||
package client
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	utilnet "k8s.io/apimachinery/pkg/util/net"
 | 
			
		||||
	restclient "k8s.io/client-go/rest"
 | 
			
		||||
	"k8s.io/client-go/transport"
 | 
			
		||||
	"net/http"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// KubeletClientConfig defines config parameters for the kubelet client
 | 
			
		||||
type KubeletClientConfig struct {
 | 
			
		||||
	// Address specifies the kubelet address
 | 
			
		||||
	Address string
 | 
			
		||||
 | 
			
		||||
	// Port specifies the default port - used if no information about Kubelet port can be found in Node.NodeStatus.DaemonEndpoints.
 | 
			
		||||
	Port uint
 | 
			
		||||
 | 
			
		||||
	// TLSClientConfig contains settings to enable transport layer security
 | 
			
		||||
	restclient.TLSClientConfig
 | 
			
		||||
 | 
			
		||||
	// Server requires Bearer authentication
 | 
			
		||||
	BearerToken string
 | 
			
		||||
 | 
			
		||||
	// HTTPTimeout is used by the client to timeout http requests to Kubelet.
 | 
			
		||||
	HTTPTimeout time.Duration
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type KubeletClient struct {
 | 
			
		||||
	defaultPort uint
 | 
			
		||||
	host        string
 | 
			
		||||
	client      *http.Client
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewKubeletClient(config *KubeletClientConfig) (*KubeletClient, error) {
 | 
			
		||||
	trans, err := makeTransport(config, true)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	client := &http.Client{
 | 
			
		||||
		Transport: trans,
 | 
			
		||||
		Timeout:   config.HTTPTimeout,
 | 
			
		||||
	}
 | 
			
		||||
	return &KubeletClient{
 | 
			
		||||
		host:        config.Address,
 | 
			
		||||
		defaultPort: config.Port,
 | 
			
		||||
		client:      client,
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// transportConfig converts a client config to an appropriate transport config.
 | 
			
		||||
func (c *KubeletClientConfig) transportConfig() *transport.Config {
 | 
			
		||||
	cfg := &transport.Config{
 | 
			
		||||
		TLS: transport.TLSConfig{
 | 
			
		||||
			CAFile:   c.CAFile,
 | 
			
		||||
			CAData:   c.CAData,
 | 
			
		||||
			CertFile: c.CertFile,
 | 
			
		||||
			CertData: c.CertData,
 | 
			
		||||
			KeyFile:  c.KeyFile,
 | 
			
		||||
			KeyData:  c.KeyData,
 | 
			
		||||
		},
 | 
			
		||||
		BearerToken: c.BearerToken,
 | 
			
		||||
	}
 | 
			
		||||
	if !cfg.HasCA() {
 | 
			
		||||
		cfg.TLS.Insecure = true
 | 
			
		||||
	}
 | 
			
		||||
	return cfg
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// makeTransport creates a RoundTripper for HTTP Transport.
 | 
			
		||||
func makeTransport(config *KubeletClientConfig, insecureSkipTLSVerify bool) (http.RoundTripper, error) {
 | 
			
		||||
	// do the insecureSkipTLSVerify on the pre-transport *before* we go get a potentially cached connection.
 | 
			
		||||
	// transportConfig always produces a new struct pointer.
 | 
			
		||||
	preTLSConfig := config.transportConfig()
 | 
			
		||||
	if insecureSkipTLSVerify && preTLSConfig != nil {
 | 
			
		||||
		preTLSConfig.TLS.Insecure = true
 | 
			
		||||
		preTLSConfig.TLS.CAData = nil
 | 
			
		||||
		preTLSConfig.TLS.CAFile = ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	tlsConfig, err := transport.TLSConfigFor(preTLSConfig)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rt := http.DefaultTransport
 | 
			
		||||
	if tlsConfig != nil {
 | 
			
		||||
		// If SSH Tunnel is turned on
 | 
			
		||||
		rt = utilnet.SetOldTransportDefaults(&http.Transport{
 | 
			
		||||
			TLSClientConfig: tlsConfig,
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return transport.HTTPWrappersForConfig(config.transportConfig(), rt)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func ReadAll(r io.Reader) ([]byte, error) {
 | 
			
		||||
	b := make([]byte, 0, 512)
 | 
			
		||||
	for {
 | 
			
		||||
		if len(b) == cap(b) {
 | 
			
		||||
			// Add more capacity (let append pick how much).
 | 
			
		||||
			b = append(b, 0)[:len(b)]
 | 
			
		||||
		}
 | 
			
		||||
		n, err := r.Read(b[len(b):cap(b)])
 | 
			
		||||
		b = b[:len(b)+n]
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			if err == io.EOF {
 | 
			
		||||
				err = nil
 | 
			
		||||
			}
 | 
			
		||||
			return b, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (k *KubeletClient) GetNodeRunningPods() (*v1.PodList, error) {
 | 
			
		||||
	resp, err := k.client.Get(fmt.Sprintf("https://%v:%d/pods/", k.host, k.defaultPort))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	body, err := ReadAll(resp.Body)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	podLists := &v1.PodList{}
 | 
			
		||||
	if err = json.Unmarshal(body, &podLists); err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return podLists, err
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										57
									
								
								gpushare-device-plugin/pkg/kubelet/client/client_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								gpushare-device-plugin/pkg/kubelet/client/client_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
package client
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"flag"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"k8s.io/client-go/rest"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	clientCert string
 | 
			
		||||
	clientKey  string
 | 
			
		||||
	token      string
 | 
			
		||||
	timeout    int
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestNewKubeletClient(t *testing.T) {
 | 
			
		||||
	flag.StringVar(&clientCert, "client-cert", "", "")
 | 
			
		||||
	flag.StringVar(&clientKey, "client-key", "", "")
 | 
			
		||||
	flag.StringVar(&token, "token", "", "")
 | 
			
		||||
	flag.IntVar(&timeout, "timeout", 10, "")
 | 
			
		||||
 | 
			
		||||
	flag.Parse()
 | 
			
		||||
 | 
			
		||||
	if clientCert == "" && clientKey == "" && token == "" {
 | 
			
		||||
		tokenByte, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			panic(fmt.Errorf("in cluster mode, find token failed, error: %v", err))
 | 
			
		||||
		}
 | 
			
		||||
		token = string(tokenByte)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	c, err := NewKubeletClient(&KubeletClientConfig{
 | 
			
		||||
		Address: "127.0.0.1",
 | 
			
		||||
		Port:    10250,
 | 
			
		||||
		TLSClientConfig: rest.TLSClientConfig{
 | 
			
		||||
			Insecure:   true,
 | 
			
		||||
			ServerName: "kubelet",
 | 
			
		||||
			CertFile:   clientCert,
 | 
			
		||||
			KeyFile:    clientKey,
 | 
			
		||||
		},
 | 
			
		||||
		BearerToken: token,
 | 
			
		||||
		HTTPTimeout: time.Duration(timeout) * time.Second,
 | 
			
		||||
	})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		fmt.Println(err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	podsList, err := c.GetNodeRunningPods()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		fmt.Println(err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	fmt.Println(podsList)
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user