81 lines
2.0 KiB
Go
81 lines
2.0 KiB
Go
package cache
|
|
|
|
import (
|
|
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
|
"sync"
|
|
|
|
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
)
|
|
|
|
type DeviceInfo struct {
|
|
idx int
|
|
podMap map[types.UID]*v1.Pod
|
|
// usedGPUMem uint
|
|
totalGPUMem uint
|
|
rwmu *sync.RWMutex
|
|
}
|
|
|
|
func (d *DeviceInfo) GetPods() []*v1.Pod {
|
|
pods := []*v1.Pod{}
|
|
for _, pod := range d.podMap {
|
|
pods = append(pods, pod)
|
|
}
|
|
return pods
|
|
}
|
|
|
|
func newDeviceInfo(index int, totalGPUMem uint) *DeviceInfo {
|
|
return &DeviceInfo{
|
|
idx: index,
|
|
totalGPUMem: totalGPUMem,
|
|
podMap: map[types.UID]*v1.Pod{},
|
|
rwmu: new(sync.RWMutex),
|
|
}
|
|
}
|
|
|
|
func (d *DeviceInfo) GetTotalGPUMemory() uint {
|
|
return d.totalGPUMem
|
|
}
|
|
|
|
func (d *DeviceInfo) GetUsedGPUMemory() (gpuMem uint) {
|
|
log.V(100).Info("debug: GetUsedGPUMemory() podMap %v, and its address is %p", d.podMap, d)
|
|
d.rwmu.RLock()
|
|
defer d.rwmu.RUnlock()
|
|
for _, pod := range d.podMap {
|
|
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
|
|
log.V(100).Info("debug: skip the pod %s in ns %s due to its status is %s", pod.Name, pod.Namespace, pod.Status.Phase)
|
|
continue
|
|
}
|
|
// gpuMem += utils.GetGPUMemoryFromPodEnv(pod)
|
|
gpuMem += utils.GetGPUMemoryFromPodAnnotation(pod)
|
|
}
|
|
return gpuMem
|
|
}
|
|
|
|
func (d *DeviceInfo) addPod(pod *v1.Pod) {
|
|
log.V(100).Info("debug: dev.addPod() Pod %s in ns %s with the GPU ID %d will be added to device map",
|
|
pod.Name,
|
|
pod.Namespace,
|
|
d.idx)
|
|
d.rwmu.Lock()
|
|
defer d.rwmu.Unlock()
|
|
d.podMap[pod.UID] = pod
|
|
log.V(100).Info("debug: dev.addPod() after updated is %v, and its address is %p",
|
|
d.podMap,
|
|
d)
|
|
}
|
|
|
|
func (d *DeviceInfo) removePod(pod *v1.Pod) {
|
|
log.V(100).Info("debug: dev.removePod() Pod %s in ns %s with the GPU ID %d will be removed from device map",
|
|
pod.Name,
|
|
pod.Namespace,
|
|
d.idx)
|
|
d.rwmu.Lock()
|
|
defer d.rwmu.Unlock()
|
|
delete(d.podMap, pod.UID)
|
|
log.V(100).Info("debug: dev.removePod() after updated is %v, and its address is %p",
|
|
d.podMap,
|
|
d)
|
|
}
|