Files
2025-08-25 16:04:00 +08:00

81 lines
2.0 KiB
Go

package cache
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"sync"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
type DeviceInfo struct {
idx int
podMap map[types.UID]*v1.Pod
// usedGPUMem uint
totalGPUMem uint
rwmu *sync.RWMutex
}
func (d *DeviceInfo) GetPods() []*v1.Pod {
pods := []*v1.Pod{}
for _, pod := range d.podMap {
pods = append(pods, pod)
}
return pods
}
func newDeviceInfo(index int, totalGPUMem uint) *DeviceInfo {
return &DeviceInfo{
idx: index,
totalGPUMem: totalGPUMem,
podMap: map[types.UID]*v1.Pod{},
rwmu: new(sync.RWMutex),
}
}
func (d *DeviceInfo) GetTotalGPUMemory() uint {
return d.totalGPUMem
}
func (d *DeviceInfo) GetUsedGPUMemory() (gpuMem uint) {
log.V(100).Info("debug: GetUsedGPUMemory() podMap %v, and its address is %p", d.podMap, d)
d.rwmu.RLock()
defer d.rwmu.RUnlock()
for _, pod := range d.podMap {
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
log.V(100).Info("debug: skip the pod %s in ns %s due to its status is %s", pod.Name, pod.Namespace, pod.Status.Phase)
continue
}
// gpuMem += utils.GetGPUMemoryFromPodEnv(pod)
gpuMem += utils.GetGPUMemoryFromPodAnnotation(pod)
}
return gpuMem
}
func (d *DeviceInfo) addPod(pod *v1.Pod) {
log.V(100).Info("debug: dev.addPod() Pod %s in ns %s with the GPU ID %d will be added to device map",
pod.Name,
pod.Namespace,
d.idx)
d.rwmu.Lock()
defer d.rwmu.Unlock()
d.podMap[pod.UID] = pod
log.V(100).Info("debug: dev.addPod() after updated is %v, and its address is %p",
d.podMap,
d)
}
func (d *DeviceInfo) removePod(pod *v1.Pod) {
log.V(100).Info("debug: dev.removePod() Pod %s in ns %s with the GPU ID %d will be removed from device map",
pod.Name,
pod.Namespace,
d.idx)
d.rwmu.Lock()
defer d.rwmu.Unlock()
delete(d.podMap, pod.UID)
log.V(100).Info("debug: dev.removePod() after updated is %v, and its address is %p",
d.podMap,
d)
}