synchronization

2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions
--- a/gpushare-scheduler-extender/pkg/utils/const.go
+++ b/gpushare-scheduler-extender/pkg/utils/const.go
@@ -0,0 +1,13 @@
+package utils
+
+const (
+	ResourceName = "rainbond.com/gpu-mem"
+	CountName    = "rainbond.com/gpu-count"
+
+	EnvNVGPU              = "NVIDIA_VISIBLE_DEVICES"
+	EnvResourceIndex      = "ALIYUN_COM_GPU_MEM_IDX"
+	EnvResourceByPod      = "ALIYUN_COM_GPU_MEM_POD"
+	EnvResourceByDev      = "ALIYUN_COM_GPU_MEM_DEV"
+	EnvAssignedFlag       = "ALIYUN_COM_GPU_MEM_ASSIGNED"
+	EnvResourceAssumeTime = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
+)
--- a/gpushare-scheduler-extender/pkg/utils/node.go
+++ b/gpushare-scheduler-extender/pkg/utils/node.go
@@ -0,0 +1,30 @@
+package utils
+
+import "k8s.io/api/core/v1"
+
+// Is the Node for GPU sharing
+func IsGPUSharingNode(node *v1.Node) bool {
+	return GetTotalGPUMemory(node) > 0
+}
+
+// Get the total GPU memory of the Node
+func GetTotalGPUMemory(node *v1.Node) int {
+	val, ok := node.Status.Capacity[ResourceName]
+
+	if !ok {
+		return 0
+	}
+
+	return int(val.Value())
+}
+
+// Get the GPU count of the node
+func GetGPUCountInNode(node *v1.Node) int {
+	val, ok := node.Status.Capacity[CountName]
+
+	if !ok {
+		return int(0)
+	}
+
+	return int(val.Value())
+}
--- a/gpushare-scheduler-extender/pkg/utils/pod.go
+++ b/gpushare-scheduler-extender/pkg/utils/pod.go
@@ -0,0 +1,219 @@
+package utils
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
+	v1 "k8s.io/api/core/v1"
+	"strconv"
+	"time"
+)
+
+// AssignedNonTerminatedPod selects pods that are assigned and non-terminal (scheduled and running).
+func AssignedNonTerminatedPod(pod *v1.Pod) bool {
+	if pod.DeletionTimestamp != nil {
+		return false
+	}
+
+	if len(pod.Spec.NodeName) == 0 {
+		return false
+	}
+	if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
+		return false
+	}
+	return true
+}
+
+// IsCompletePod determines if the pod is complete
+func IsCompletePod(pod *v1.Pod) bool {
+	if pod.DeletionTimestamp != nil {
+		return true
+	}
+
+	if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
+		return true
+	}
+	return false
+}
+
+// IsGPUsharingPod determines if it's the pod for GPU sharing
+func IsGPUsharingPod(pod *v1.Pod) bool {
+	return GetGPUMemoryFromPodResource(pod) > 0
+}
+
+// GetGPUIDFromAnnotation gets GPU ID from Annotation
+func GetGPUIDFromAnnotation(pod *v1.Pod) int {
+	id := -1
+	if len(pod.ObjectMeta.Annotations) > 0 {
+		value, found := pod.ObjectMeta.Annotations[EnvResourceIndex]
+		if found {
+			var err error
+			id, err = strconv.Atoi(value)
+			if err != nil {
+				log.V(9).Info("warn: Failed due to %v for pod %s in ns %s", err, pod.Name, pod.Namespace)
+				id = -1
+			}
+		}
+	}
+
+	return id
+}
+
+// GetGPUIDFromEnv gets GPU ID from Env
+func GetGPUIDFromEnv(pod *v1.Pod) int {
+	id := -1
+	for _, container := range pod.Spec.Containers {
+		id = getGPUIDFromContainer(container)
+		if id >= 0 {
+			return id
+		}
+	}
+
+	return id
+}
+
+func getGPUIDFromContainer(container v1.Container) (devIdx int) {
+	devIdx = -1
+	var err error
+loop:
+	for _, env := range container.Env {
+		if env.Name == EnvResourceIndex {
+			devIdx, err = strconv.Atoi(env.Value)
+			if err != nil {
+				log.V(9).Info("warn: Failed due to %v for %s", err, container.Name)
+				devIdx = -1
+			}
+			break loop
+		}
+	}
+
+	return devIdx
+}
+
+// GetGPUMemoryFromPodAnnotation gets the GPU Memory of the pod, choose the larger one between gpu memory and gpu init container memory
+func GetGPUMemoryFromPodAnnotation(pod *v1.Pod) (gpuMemory uint) {
+	if len(pod.ObjectMeta.Annotations) > 0 {
+		value, found := pod.ObjectMeta.Annotations[EnvResourceByPod]
+		if found {
+			s, _ := strconv.Atoi(value)
+			if s < 0 {
+				s = 0
+			}
+
+			gpuMemory += uint(s)
+		}
+	}
+
+	log.V(100).Info("debug: pod %s in ns %s with status %v has GPU Mem %d",
+		pod.Name,
+		pod.Namespace,
+		pod.Status.Phase,
+		gpuMemory)
+	return gpuMemory
+}
+
+// GetGPUMemoryFromPodEnv gets the GPU Memory of the pod, choose the larger one between gpu memory and gpu init container memory
+func GetGPUMemoryFromPodEnv(pod *v1.Pod) (gpuMemory uint) {
+	for _, container := range pod.Spec.Containers {
+		gpuMemory += getGPUMemoryFromContainerEnv(container)
+	}
+	log.V(100).Info("debug: pod %s in ns %s with status %v has GPU Mem %d",
+		pod.Name,
+		pod.Namespace,
+		pod.Status.Phase,
+		gpuMemory)
+	return gpuMemory
+}
+
+func getGPUMemoryFromContainerEnv(container v1.Container) (gpuMemory uint) {
+	gpuMemory = 0
+loop:
+	for _, env := range container.Env {
+		if env.Name == EnvResourceByPod {
+			s, _ := strconv.Atoi(env.Value)
+			if s < 0 {
+				s = 0
+			}
+			gpuMemory = uint(s)
+			break loop
+		}
+	}
+
+	return gpuMemory
+}
+
+// GetGPUMemoryFromPodResource gets GPU Memory of the Pod
+func GetGPUMemoryFromPodResource(pod *v1.Pod) int {
+	var total int
+	containers := pod.Spec.Containers
+	for _, container := range containers {
+		if val, ok := container.Resources.Limits[ResourceName]; ok {
+			total += int(val.Value())
+		}
+	}
+	return total
+}
+
+// GetGPUMemoryFromPodResource gets GPU Memory of the Container
+func GetGPUMemoryFromContainerResource(container v1.Container) int {
+	var total int
+	if val, ok := container.Resources.Limits[ResourceName]; ok {
+		total += int(val.Value())
+	}
+	return total
+}
+
+// GetUpdatedPodEnvSpec updates pod env with devId
+func GetUpdatedPodEnvSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) (newPod *v1.Pod) {
+	newPod = oldPod.DeepCopy()
+	for i, c := range newPod.Spec.Containers {
+		gpuMem := GetGPUMemoryFromContainerResource(c)
+
+		if gpuMem > 0 {
+			envs := []v1.EnvVar{
+				// v1.EnvVar{Name: EnvNVGPU, Value: fmt.Sprintf("%d", devId)},
+				v1.EnvVar{Name: EnvResourceIndex, Value: fmt.Sprintf("%d", devId)},
+				v1.EnvVar{Name: EnvResourceByPod, Value: fmt.Sprintf("%d", gpuMem)},
+				v1.EnvVar{Name: EnvResourceByDev, Value: fmt.Sprintf("%d", totalGPUMemByDev)},
+				v1.EnvVar{Name: EnvAssignedFlag, Value: "false"},
+			}
+
+			for _, env := range envs {
+				newPod.Spec.Containers[i].Env = append(newPod.Spec.Containers[i].Env,
+					env)
+			}
+		}
+	}
+
+	return newPod
+}
+
+// GetUpdatedPodAnnotationSpec updates pod env with devId
+func GetUpdatedPodAnnotationSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) (newPod *v1.Pod) {
+	newPod = oldPod.DeepCopy()
+	if len(newPod.ObjectMeta.Annotations) == 0 {
+		newPod.ObjectMeta.Annotations = map[string]string{}
+	}
+
+	now := time.Now()
+	newPod.ObjectMeta.Annotations[EnvResourceIndex] = fmt.Sprintf("%d", devId)
+	newPod.ObjectMeta.Annotations[EnvResourceByDev] = fmt.Sprintf("%d", totalGPUMemByDev)
+	newPod.ObjectMeta.Annotations[EnvResourceByPod] = fmt.Sprintf("%d", GetGPUMemoryFromPodResource(newPod))
+	newPod.ObjectMeta.Annotations[EnvAssignedFlag] = "false"
+	newPod.ObjectMeta.Annotations[EnvResourceAssumeTime] = fmt.Sprintf("%d", now.UnixNano())
+
+	return newPod
+}
+
+func PatchPodAnnotationSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) ([]byte, error) {
+	now := time.Now()
+	patchAnnotations := map[string]interface{}{
+		"metadata": map[string]map[string]string{"annotations": {
+			EnvResourceIndex:      fmt.Sprintf("%d", devId),
+			EnvResourceByDev:      fmt.Sprintf("%d", totalGPUMemByDev),
+			EnvResourceByPod:      fmt.Sprintf("%d", GetGPUMemoryFromPodResource(oldPod)),
+			EnvAssignedFlag:       "false",
+			EnvResourceAssumeTime: fmt.Sprintf("%d", now.UnixNano()),
+		}}}
+	return json.Marshal(patchAnnotations)
+}
--- a/gpushare-scheduler-extender/pkg/utils/signals/signal.go
+++ b/gpushare-scheduler-extender/pkg/utils/signals/signal.go
@@ -0,0 +1,30 @@
+package signals
+
+import (
+	"os"
+	"os/signal"
+	"syscall"
+)
+
+var onlyOneSignalHandler = make(chan struct{})
+
+var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}
+
+// SetupSignalHandler registered for SIGTERM and SIGINT. A stop channel is returned
+// which is closed on one of these signals. If a second signal is caught, the program
+// is terminated with exit code 1.
+func SetupSignalHandler() (stopCh <-chan struct{}) {
+	close(onlyOneSignalHandler) // panics when called twice
+
+	stop := make(chan struct{})
+	c := make(chan os.Signal, 2)
+	signal.Notify(c, shutdownSignals...)
+	go func() {
+		<-c
+		close(stop)
+		<-c
+		os.Exit(1) // second signal. Exit directly.
+	}()
+
+	return stop
+}