synchronization
This commit is contained in:
80
gpushare-scheduler-extender/pkg/cache/deviceinfo.go
vendored
Normal file
80
gpushare-scheduler-extender/pkg/cache/deviceinfo.go
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
||||
"sync"
|
||||
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
type DeviceInfo struct {
|
||||
idx int
|
||||
podMap map[types.UID]*v1.Pod
|
||||
// usedGPUMem uint
|
||||
totalGPUMem uint
|
||||
rwmu *sync.RWMutex
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetPods() []*v1.Pod {
|
||||
pods := []*v1.Pod{}
|
||||
for _, pod := range d.podMap {
|
||||
pods = append(pods, pod)
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
func newDeviceInfo(index int, totalGPUMem uint) *DeviceInfo {
|
||||
return &DeviceInfo{
|
||||
idx: index,
|
||||
totalGPUMem: totalGPUMem,
|
||||
podMap: map[types.UID]*v1.Pod{},
|
||||
rwmu: new(sync.RWMutex),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetTotalGPUMemory() uint {
|
||||
return d.totalGPUMem
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetUsedGPUMemory() (gpuMem uint) {
|
||||
log.V(100).Info("debug: GetUsedGPUMemory() podMap %v, and its address is %p", d.podMap, d)
|
||||
d.rwmu.RLock()
|
||||
defer d.rwmu.RUnlock()
|
||||
for _, pod := range d.podMap {
|
||||
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
|
||||
log.V(100).Info("debug: skip the pod %s in ns %s due to its status is %s", pod.Name, pod.Namespace, pod.Status.Phase)
|
||||
continue
|
||||
}
|
||||
// gpuMem += utils.GetGPUMemoryFromPodEnv(pod)
|
||||
gpuMem += utils.GetGPUMemoryFromPodAnnotation(pod)
|
||||
}
|
||||
return gpuMem
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) addPod(pod *v1.Pod) {
|
||||
log.V(100).Info("debug: dev.addPod() Pod %s in ns %s with the GPU ID %d will be added to device map",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
d.idx)
|
||||
d.rwmu.Lock()
|
||||
defer d.rwmu.Unlock()
|
||||
d.podMap[pod.UID] = pod
|
||||
log.V(100).Info("debug: dev.addPod() after updated is %v, and its address is %p",
|
||||
d.podMap,
|
||||
d)
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) removePod(pod *v1.Pod) {
|
||||
log.V(100).Info("debug: dev.removePod() Pod %s in ns %s with the GPU ID %d will be removed from device map",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
d.idx)
|
||||
d.rwmu.Lock()
|
||||
defer d.rwmu.Unlock()
|
||||
delete(d.podMap, pod.UID)
|
||||
log.V(100).Info("debug: dev.removePod() after updated is %v, and its address is %p",
|
||||
d.podMap,
|
||||
d)
|
||||
}
|
Reference in New Issue
Block a user