Files
Rainbond/gpushare-device-plugin/pkg/gpu/nvidia/allocate.go
2025-08-25 16:04:00 +08:00

199 lines
5.9 KiB
Go

package nvidia
import (
"fmt"
"time"
log "github.com/golang/glog"
"golang.org/x/net/context"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
)
var (
clientTimeout = 30 * time.Second
lastAllocateTime time.Time
)
// create docker client
func init() {
kubeInit()
}
func buildErrResponse(reqs *pluginapi.AllocateRequest, podReqGPU uint) *pluginapi.AllocateResponse {
responses := pluginapi.AllocateResponse{}
for _, req := range reqs.ContainerRequests {
response := pluginapi.ContainerAllocateResponse{
Envs: map[string]string{
envNVGPU: fmt.Sprintf("no-gpu-has-%d%s-to-run", podReqGPU, metric),
EnvResourceIndex: fmt.Sprintf("-1"),
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
EnvResourceByContainer: fmt.Sprintf("%d", uint(len(req.DevicesIDs))),
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
},
}
responses.ContainerResponses = append(responses.ContainerResponses, &response)
}
return &responses
}
// Allocate which return list of devices.
func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
responses := pluginapi.AllocateResponse{}
log.Infoln("----Allocating GPU for gpu mem is started----")
var (
podReqGPU uint
found bool
assumePod *v1.Pod
)
// podReqGPU = uint(0)
for _, req := range reqs.ContainerRequests {
podReqGPU += uint(len(req.DevicesIDs))
}
log.Infof("RequestPodGPUs: %d", podReqGPU)
m.Lock()
defer m.Unlock()
log.Infoln("checking...")
pods, err := getCandidatePods(m.queryKubelet, m.kubeletClient)
if err != nil {
log.Infof("invalid allocation requst: Failed to find candidate pods due to %v", err)
return buildErrResponse(reqs, podReqGPU), nil
}
if log.V(4) {
for _, pod := range pods {
log.Infof("Pod %s in ns %s request GPU Memory %d with timestamp %v",
pod.Name,
pod.Namespace,
getGPUMemoryFromPodResource(pod),
getAssumeTimeFromPodAnnotation(pod))
}
}
for _, pod := range pods {
if getGPUMemoryFromPodResource(pod) == podReqGPU {
log.Infof("Found Assumed GPU shared Pod %s in ns %s with GPU Memory %d",
pod.Name,
pod.Namespace,
podReqGPU)
assumePod = pod
found = true
break
}
}
if found {
id := getGPUIDFromPodAnnotation(assumePod)
if id < 0 {
log.Warningf("Failed to get the dev ", assumePod)
}
candidateDevID := ""
if id >= 0 {
ok := false
candidateDevID, ok = m.GetDeviceNameByIndex(uint(id))
if !ok {
log.Warningf("Failed to find the dev for pod %v because it's not able to find dev with index %d",
assumePod,
id)
id = -1
}
}
if id < 0 {
return buildErrResponse(reqs, podReqGPU), nil
}
log.Infof("gpu index %v,uuid: %v", id, candidateDevID)
// 1. Create container requests
for _, req := range reqs.ContainerRequests {
reqGPU := uint(len(req.DevicesIDs))
response := pluginapi.ContainerAllocateResponse{
Envs: map[string]string{
envNVGPU: fmt.Sprintf("%v", id),
EnvResourceIndex: fmt.Sprintf("%d", id),
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
},
}
if m.disableCGPUIsolation {
response.Envs["CGPU_DISABLE"] = "true"
}
responses.ContainerResponses = append(responses.ContainerResponses, &response)
}
// 2. Update Pod spec
patchedAnnotationBytes, err := patchPodAnnotationSpecAssigned()
if err != nil {
return buildErrResponse(reqs, podReqGPU), nil
}
_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
if err != nil {
// the object has been modified; please apply your changes to the latest version and try again
if err.Error() == OptimisticLockErrorMsg {
// retry
_, err = clientset.CoreV1().Pods(assumePod.Namespace).Patch(assumePod.Name, types.StrategicMergePatchType, patchedAnnotationBytes)
if err != nil {
log.Warningf("Failed due to %v", err)
return buildErrResponse(reqs, podReqGPU), nil
}
} else {
log.Warningf("Failed due to %v", err)
return buildErrResponse(reqs, podReqGPU), nil
}
}
} else if len(m.devNameMap) == 1 {
var devName string
var devIndex uint
for d, index := range m.devNameMap {
devName = d
devIndex = index
break
}
log.Infof("this node has only one gpu device,skip to search pod and directly specify the device %v(%v) for container", devIndex, devName)
for _, req := range reqs.ContainerRequests {
reqGPU := uint(len(req.DevicesIDs))
response := pluginapi.ContainerAllocateResponse{
Envs: map[string]string{
envNVGPU: devName,
EnvResourceIndex: fmt.Sprintf("%d", devIndex),
EnvResourceByPod: fmt.Sprintf("%d", podReqGPU),
EnvResourceByContainer: fmt.Sprintf("%d", reqGPU),
EnvResourceByDev: fmt.Sprintf("%d", getGPUMemory()),
},
}
if m.disableCGPUIsolation {
response.Envs["CGPU_DISABLE"] = "true"
}
responses.ContainerResponses = append(responses.ContainerResponses, &response)
}
log.Infof("get allocated GPUs info %v", responses)
return &responses, nil
} else {
log.Warningf("invalid allocation requst: request GPU memory %d can't be satisfied.",
podReqGPU)
// return &responses, fmt.Errorf("invalid allocation requst: request GPU memory %d can't be satisfied", reqGPU)
return buildErrResponse(reqs, podReqGPU), nil
}
podName := ""
if assumePod != nil {
podName = assumePod.Name
}
log.Infof("pod %v, new allocated GPUs info %v", podName, &responses)
log.Infof("----Allocating GPU for gpu mem for %v is ended----", podName)
// // Add this to make sure the container is created at least
// currentTime := time.Now()
// currentTime.Sub(lastAllocateTime)
return &responses, nil
}