synchronization
This commit is contained in:
177
gpushare-scheduler-extender/pkg/cache/cache.go
vendored
Normal file
177
gpushare-scheduler-extender/pkg/cache/cache.go
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
||||
"sync"
|
||||
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
)
|
||||
|
||||
type SchedulerCache struct {
|
||||
|
||||
// a map from pod key to podState.
|
||||
nodes map[string]*NodeInfo
|
||||
|
||||
// nodeLister can list/get nodes from the shared informer's store.
|
||||
nodeLister corelisters.NodeLister
|
||||
|
||||
//
|
||||
podLister corelisters.PodLister
|
||||
|
||||
// record the knownPod, it will be added when annotation ALIYUN_GPU_ID is added, and will be removed when complete and deleted
|
||||
knownPods map[types.UID]*v1.Pod
|
||||
nLock *sync.RWMutex
|
||||
}
|
||||
|
||||
func NewSchedulerCache(nLister corelisters.NodeLister, pLister corelisters.PodLister) *SchedulerCache {
|
||||
return &SchedulerCache{
|
||||
nodes: make(map[string]*NodeInfo),
|
||||
nodeLister: nLister,
|
||||
podLister: pLister,
|
||||
knownPods: make(map[types.UID]*v1.Pod),
|
||||
nLock: new(sync.RWMutex),
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *SchedulerCache) GetNodeinfos() []*NodeInfo {
|
||||
nodes := []*NodeInfo{}
|
||||
for _, n := range cache.nodes {
|
||||
nodes = append(nodes, n)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// build cache when initializing
|
||||
func (cache *SchedulerCache) BuildCache() error {
|
||||
log.V(5).Info("debug: begin to build scheduler cache")
|
||||
pods, err := cache.podLister.List(labels.Everything())
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
} else {
|
||||
for _, pod := range pods {
|
||||
if utils.GetGPUMemoryFromPodAnnotation(pod) <= uint(0) {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(pod.Spec.NodeName) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
err = cache.AddOrUpdatePod(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *SchedulerCache) GetPod(name, namespace string) (*v1.Pod, error) {
|
||||
return cache.podLister.Pods(namespace).Get(name)
|
||||
}
|
||||
|
||||
// Get known pod from the pod UID
|
||||
func (cache *SchedulerCache) KnownPod(podUID types.UID) bool {
|
||||
cache.nLock.RLock()
|
||||
defer cache.nLock.RUnlock()
|
||||
|
||||
_, found := cache.knownPods[podUID]
|
||||
return found
|
||||
}
|
||||
|
||||
func (cache *SchedulerCache) AddOrUpdatePod(pod *v1.Pod) error {
|
||||
log.V(100).Info("debug: Add or update pod info: %v", pod)
|
||||
log.V(100).Info("debug: Node %v", cache.nodes)
|
||||
if len(pod.Spec.NodeName) == 0 {
|
||||
log.V(100).Info("debug: pod %s in ns %s is not assigned to any node, skip", pod.Name, pod.Namespace)
|
||||
return nil
|
||||
}
|
||||
|
||||
n, err := cache.GetNodeInfo(pod.Spec.NodeName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
podCopy := pod.DeepCopy()
|
||||
if n.addOrUpdatePod(podCopy) {
|
||||
// put it into known pod
|
||||
cache.rememberPod(pod.UID, podCopy)
|
||||
} else {
|
||||
log.V(100).Info("debug: pod %s in ns %s's gpu id is %d, it's illegal, skip",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
utils.GetGPUIDFromAnnotation(pod))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// The lock is in cacheNode
|
||||
func (cache *SchedulerCache) RemovePod(pod *v1.Pod) {
|
||||
log.V(100).Info("debug: Remove pod info: %v", pod)
|
||||
log.V(100).Info("debug: Node %v", cache.nodes)
|
||||
n, err := cache.GetNodeInfo(pod.Spec.NodeName)
|
||||
if err == nil {
|
||||
n.removePod(pod)
|
||||
} else {
|
||||
log.V(10).Info("debug: Failed to get node %s due to %v", pod.Spec.NodeName, err)
|
||||
}
|
||||
|
||||
cache.forgetPod(pod.UID)
|
||||
}
|
||||
|
||||
// Get or build nodeInfo if it doesn't exist
|
||||
func (cache *SchedulerCache) GetNodeInfo(name string) (*NodeInfo, error) {
|
||||
node, err := cache.nodeLister.Get(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache.nLock.Lock()
|
||||
defer cache.nLock.Unlock()
|
||||
n, ok := cache.nodes[name]
|
||||
|
||||
if !ok {
|
||||
n = NewNodeInfo(node)
|
||||
cache.nodes[name] = n
|
||||
} else {
|
||||
// if the existing node turn from non gpushare to gpushare
|
||||
// if (utils.GetTotalGPUMemory(n.node) <= 0 && utils.GetTotalGPUMemory(node) > 0) ||
|
||||
// (utils.GetGPUCountInNode(n.node) <= 0 && utils.GetGPUCountInNode(node) > 0) ||
|
||||
// // if the existing node turn from gpushare to non gpushare
|
||||
// (utils.GetTotalGPUMemory(n.node) > 0 && utils.GetTotalGPUMemory(node) <= 0) ||
|
||||
// (utils.GetGPUCountInNode(n.node) > 0 && utils.GetGPUCountInNode(node) <= 0) {
|
||||
if len(cache.nodes[name].devs) == 0 ||
|
||||
utils.GetTotalGPUMemory(n.node) <= 0 ||
|
||||
utils.GetGPUCountInNode(n.node) <= 0 {
|
||||
log.V(10).Info("info: GetNodeInfo() need update node %s",
|
||||
name)
|
||||
|
||||
// fix the scenario that the number of devices changes from 0 to an positive number
|
||||
cache.nodes[name].Reset(node)
|
||||
log.V(10).Info("info: node: %s, labels from cache after been updated: %v", n.node.Name, n.node.Labels)
|
||||
} else {
|
||||
log.V(10).Info("info: GetNodeInfo() uses the existing nodeInfo for %s", name)
|
||||
}
|
||||
log.V(100).Info("debug: node %s with devices %v", name, n.devs)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (cache *SchedulerCache) forgetPod(uid types.UID) {
|
||||
cache.nLock.Lock()
|
||||
defer cache.nLock.Unlock()
|
||||
delete(cache.knownPods, uid)
|
||||
}
|
||||
|
||||
func (cache *SchedulerCache) rememberPod(uid types.UID, pod *v1.Pod) {
|
||||
cache.nLock.Lock()
|
||||
defer cache.nLock.Unlock()
|
||||
cache.knownPods[pod.UID] = pod
|
||||
}
|
33
gpushare-scheduler-extender/pkg/cache/configmap.go
vendored
Normal file
33
gpushare-scheduler-extender/pkg/cache/configmap.go
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
clientgocache "k8s.io/client-go/tools/cache"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var (
|
||||
ConfigMapLister corelisters.ConfigMapLister
|
||||
ConfigMapInformerSynced clientgocache.InformerSynced
|
||||
)
|
||||
|
||||
func getConfigMap(name string) *v1.ConfigMap {
|
||||
configMap, err := ConfigMapLister.ConfigMaps(metav1.NamespaceSystem).Get(name)
|
||||
|
||||
// If we can't get the configmap just return nil. The resync will eventually
|
||||
// sync things up.
|
||||
if err != nil {
|
||||
if !apierrors.IsNotFound(err) {
|
||||
log.V(10).Info("warn: find configmap with error: %v", err)
|
||||
utilruntime.HandleError(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return configMap
|
||||
}
|
80
gpushare-scheduler-extender/pkg/cache/deviceinfo.go
vendored
Normal file
80
gpushare-scheduler-extender/pkg/cache/deviceinfo.go
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
||||
"sync"
|
||||
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
type DeviceInfo struct {
|
||||
idx int
|
||||
podMap map[types.UID]*v1.Pod
|
||||
// usedGPUMem uint
|
||||
totalGPUMem uint
|
||||
rwmu *sync.RWMutex
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetPods() []*v1.Pod {
|
||||
pods := []*v1.Pod{}
|
||||
for _, pod := range d.podMap {
|
||||
pods = append(pods, pod)
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
func newDeviceInfo(index int, totalGPUMem uint) *DeviceInfo {
|
||||
return &DeviceInfo{
|
||||
idx: index,
|
||||
totalGPUMem: totalGPUMem,
|
||||
podMap: map[types.UID]*v1.Pod{},
|
||||
rwmu: new(sync.RWMutex),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetTotalGPUMemory() uint {
|
||||
return d.totalGPUMem
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) GetUsedGPUMemory() (gpuMem uint) {
|
||||
log.V(100).Info("debug: GetUsedGPUMemory() podMap %v, and its address is %p", d.podMap, d)
|
||||
d.rwmu.RLock()
|
||||
defer d.rwmu.RUnlock()
|
||||
for _, pod := range d.podMap {
|
||||
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
|
||||
log.V(100).Info("debug: skip the pod %s in ns %s due to its status is %s", pod.Name, pod.Namespace, pod.Status.Phase)
|
||||
continue
|
||||
}
|
||||
// gpuMem += utils.GetGPUMemoryFromPodEnv(pod)
|
||||
gpuMem += utils.GetGPUMemoryFromPodAnnotation(pod)
|
||||
}
|
||||
return gpuMem
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) addPod(pod *v1.Pod) {
|
||||
log.V(100).Info("debug: dev.addPod() Pod %s in ns %s with the GPU ID %d will be added to device map",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
d.idx)
|
||||
d.rwmu.Lock()
|
||||
defer d.rwmu.Unlock()
|
||||
d.podMap[pod.UID] = pod
|
||||
log.V(100).Info("debug: dev.addPod() after updated is %v, and its address is %p",
|
||||
d.podMap,
|
||||
d)
|
||||
}
|
||||
|
||||
func (d *DeviceInfo) removePod(pod *v1.Pod) {
|
||||
log.V(100).Info("debug: dev.removePod() Pod %s in ns %s with the GPU ID %d will be removed from device map",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
d.idx)
|
||||
d.rwmu.Lock()
|
||||
defer d.rwmu.Unlock()
|
||||
delete(d.podMap, pod.UID)
|
||||
log.V(100).Info("debug: dev.removePod() after updated is %v, and its address is %p",
|
||||
d.podMap,
|
||||
d)
|
||||
}
|
362
gpushare-scheduler-extender/pkg/cache/nodeinfo.go
vendored
Normal file
362
gpushare-scheduler-extender/pkg/cache/nodeinfo.go
vendored
Normal file
@@ -0,0 +1,362 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
|
||||
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
const (
|
||||
OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
|
||||
)
|
||||
|
||||
// NodeInfo is node level aggregated information.
|
||||
type NodeInfo struct {
|
||||
ctx context.Context
|
||||
name string
|
||||
node *v1.Node
|
||||
devs map[int]*DeviceInfo
|
||||
gpuCount int
|
||||
gpuTotalMemory int
|
||||
rwmu *sync.RWMutex
|
||||
}
|
||||
|
||||
// Create Node Level
|
||||
func NewNodeInfo(node *v1.Node) *NodeInfo {
|
||||
log.V(10).Info("debug: NewNodeInfo() creates nodeInfo for %s", node.Name)
|
||||
|
||||
devMap := map[int]*DeviceInfo{}
|
||||
for i := 0; i < utils.GetGPUCountInNode(node); i++ {
|
||||
devMap[i] = newDeviceInfo(i, uint(utils.GetTotalGPUMemory(node)/utils.GetGPUCountInNode(node)))
|
||||
}
|
||||
|
||||
if len(devMap) == 0 {
|
||||
log.V(3).Info("warn: node %s with nodeinfo %v has no devices", node.Name, node)
|
||||
}
|
||||
|
||||
return &NodeInfo{
|
||||
ctx: context.Background(),
|
||||
name: node.Name,
|
||||
node: node,
|
||||
devs: devMap,
|
||||
gpuCount: utils.GetGPUCountInNode(node),
|
||||
gpuTotalMemory: utils.GetTotalGPUMemory(node),
|
||||
rwmu: new(sync.RWMutex),
|
||||
}
|
||||
}
|
||||
|
||||
// Only update the devices when the length of devs is 0
|
||||
func (n *NodeInfo) Reset(node *v1.Node) {
|
||||
n.gpuCount = utils.GetGPUCountInNode(node)
|
||||
n.gpuTotalMemory = utils.GetTotalGPUMemory(node)
|
||||
n.node = node
|
||||
if n.gpuCount == 0 {
|
||||
log.V(3).Info("warn: Reset for node %s but the gpu count is 0", node.Name)
|
||||
}
|
||||
|
||||
if n.gpuTotalMemory == 0 {
|
||||
log.V(3).Info("warn: Reset for node %s but the gpu total memory is 0", node.Name)
|
||||
}
|
||||
|
||||
if len(n.devs) == 0 && n.gpuCount > 0 {
|
||||
devMap := map[int]*DeviceInfo{}
|
||||
for i := 0; i < utils.GetGPUCountInNode(node); i++ {
|
||||
devMap[i] = newDeviceInfo(i, uint(n.gpuTotalMemory/n.gpuCount))
|
||||
}
|
||||
n.devs = devMap
|
||||
}
|
||||
log.V(3).Info("info: Reset() update nodeInfo for %s with devs %v", node.Name, n.devs)
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetName() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetDevs() []*DeviceInfo {
|
||||
devs := make([]*DeviceInfo, n.gpuCount)
|
||||
for i, dev := range n.devs {
|
||||
devs[i] = dev
|
||||
}
|
||||
return devs
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetNode() *v1.Node {
|
||||
return n.node
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetTotalGPUMemory() int {
|
||||
return n.gpuTotalMemory
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetGPUCount() int {
|
||||
return n.gpuCount
|
||||
}
|
||||
|
||||
func (n *NodeInfo) removePod(pod *v1.Pod) {
|
||||
n.rwmu.Lock()
|
||||
defer n.rwmu.Unlock()
|
||||
|
||||
id := utils.GetGPUIDFromAnnotation(pod)
|
||||
if id >= 0 {
|
||||
dev, found := n.devs[id]
|
||||
if !found {
|
||||
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
|
||||
} else {
|
||||
dev.removePod(pod)
|
||||
}
|
||||
} else {
|
||||
log.V(3).Info("warn: Pod %s in ns %s is not set the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
|
||||
}
|
||||
}
|
||||
|
||||
// Add the Pod which has the GPU id to the node
|
||||
func (n *NodeInfo) addOrUpdatePod(pod *v1.Pod) (added bool) {
|
||||
n.rwmu.Lock()
|
||||
defer n.rwmu.Unlock()
|
||||
|
||||
id := utils.GetGPUIDFromAnnotation(pod)
|
||||
log.V(3).Info("debug: addOrUpdatePod() Pod %s in ns %s with the GPU ID %d should be added to device map",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
id)
|
||||
if id >= 0 {
|
||||
dev, found := n.devs[id]
|
||||
if !found {
|
||||
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
|
||||
} else {
|
||||
dev.addPod(pod)
|
||||
added = true
|
||||
}
|
||||
} else {
|
||||
log.V(3).Info("warn: Pod %s in ns %s is not set the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
|
||||
}
|
||||
return added
|
||||
}
|
||||
|
||||
// check if the pod can be allocated on the node
|
||||
func (n *NodeInfo) Assume(pod *v1.Pod) (allocatable bool) {
|
||||
allocatable = false
|
||||
|
||||
n.rwmu.RLock()
|
||||
defer n.rwmu.RUnlock()
|
||||
|
||||
availableGPUs := n.getAvailableGPUs()
|
||||
reqGPU := uint(utils.GetGPUMemoryFromPodResource(pod))
|
||||
log.V(10).Info("debug: AvailableGPUs: %v in node %s", availableGPUs, n.name)
|
||||
|
||||
if len(availableGPUs) > 0 {
|
||||
for devID := 0; devID < len(n.devs); devID++ {
|
||||
availableGPU, ok := availableGPUs[devID]
|
||||
if ok {
|
||||
if availableGPU >= reqGPU {
|
||||
allocatable = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allocatable
|
||||
|
||||
}
|
||||
|
||||
func (n *NodeInfo) Allocate(clientset *kubernetes.Clientset, pod *v1.Pod) (err error) {
|
||||
var newPod *v1.Pod
|
||||
n.rwmu.Lock()
|
||||
defer n.rwmu.Unlock()
|
||||
log.V(3).Info("info: Allocate() ----Begin to allocate GPU for gpu mem for pod %s in ns %s----", pod.Name, pod.Namespace)
|
||||
// 1. Update the pod spec
|
||||
devId, found := n.allocateGPUID(pod)
|
||||
if found {
|
||||
log.V(3).Info("info: Allocate() 1. Allocate GPU ID %d to pod %s in ns %s.----", devId, pod.Name, pod.Namespace)
|
||||
// newPod := utils.GetUpdatedPodEnvSpec(pod, devId, nodeInfo.GetTotalGPUMemory()/nodeInfo.GetGPUCount())
|
||||
//newPod = utils.GetUpdatedPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
|
||||
patchedAnnotationBytes, err := utils.PatchPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate patched annotations,reason: %v", err)
|
||||
}
|
||||
newPod, err = clientset.CoreV1().Pods(pod.Namespace).Patch(n.ctx, pod.Name, types.StrategicMergePatchType, patchedAnnotationBytes, metav1.PatchOptions{})
|
||||
//_, err = clientset.CoreV1().Pods(newPod.Namespace).Update(newPod)
|
||||
if err != nil {
|
||||
// the object has been modified; please apply your changes to the latest version and try again
|
||||
if err.Error() == OptimisticLockErrorMsg {
|
||||
// retry
|
||||
pod, err = clientset.CoreV1().Pods(pod.Namespace).Get(n.ctx, pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// newPod = utils.GetUpdatedPodEnvSpec(pod, devId, nodeInfo.GetTotalGPUMemory()/nodeInfo.GetGPUCount())
|
||||
//newPod = utils.GetUpdatedPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
|
||||
//_, err = clientset.CoreV1().Pods(newPod.Namespace).Update(newPod)
|
||||
newPod, err = clientset.CoreV1().Pods(pod.Namespace).Patch(n.ctx, pod.Name, types.StrategicMergePatchType, patchedAnnotationBytes, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
log.V(3).Info("failed to patch pod %v", pod)
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
err = fmt.Errorf("The node %s can't place the pod %s in ns %s,and the pod spec is %v", pod.Spec.NodeName, pod.Name, pod.Namespace, pod)
|
||||
}
|
||||
|
||||
// 2. Bind the pod to the node
|
||||
if err == nil {
|
||||
binding := &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.Name, UID: pod.UID},
|
||||
Target: v1.ObjectReference{Kind: "Node", Name: n.name},
|
||||
}
|
||||
log.V(3).Info("info: Allocate() 2. Try to bind pod %s in %s namespace to node %s with %v",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
pod.Spec.NodeName,
|
||||
binding)
|
||||
err = clientset.CoreV1().Pods(pod.Namespace).Bind(n.ctx, binding, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
log.V(3).Info("warn: Failed to bind the pod %s in ns %s due to %v", pod.Name, pod.Namespace, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 3. update the device info if the pod is update successfully
|
||||
if err == nil {
|
||||
log.V(3).Info("info: Allocate() 3. Try to add pod %s in ns %s to dev %d",
|
||||
pod.Name,
|
||||
pod.Namespace,
|
||||
devId)
|
||||
dev, found := n.devs[devId]
|
||||
if !found {
|
||||
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, devId, n.name)
|
||||
} else {
|
||||
dev.addPod(newPod)
|
||||
}
|
||||
}
|
||||
log.V(3).Info("info: Allocate() ----End to allocate GPU for gpu mem for pod %s in ns %s----", pod.Name, pod.Namespace)
|
||||
return err
|
||||
}
|
||||
|
||||
// allocate the GPU ID to the pod
|
||||
func (n *NodeInfo) allocateGPUID(pod *v1.Pod) (candidateDevID int, found bool) {
|
||||
|
||||
reqGPU := uint(0)
|
||||
found = false
|
||||
candidateDevID = -1
|
||||
candidateGPUMemory := uint(0)
|
||||
availableGPUs := n.getAvailableGPUs()
|
||||
|
||||
reqGPU = uint(utils.GetGPUMemoryFromPodResource(pod))
|
||||
|
||||
if reqGPU > uint(0) {
|
||||
log.V(3).Info("info: reqGPU for pod %s in ns %s: %d", pod.Name, pod.Namespace, reqGPU)
|
||||
log.V(3).Info("info: AvailableGPUs: %v in node %s", availableGPUs, n.name)
|
||||
if len(availableGPUs) > 0 {
|
||||
for devID := 0; devID < len(n.devs); devID++ {
|
||||
availableGPU, ok := availableGPUs[devID]
|
||||
if ok {
|
||||
if availableGPU >= reqGPU {
|
||||
if candidateDevID == -1 || candidateGPUMemory > availableGPU {
|
||||
candidateDevID = devID
|
||||
candidateGPUMemory = availableGPU
|
||||
}
|
||||
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if found {
|
||||
log.V(3).Info("info: Find candidate dev id %d for pod %s in ns %s successfully.",
|
||||
candidateDevID,
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
} else {
|
||||
log.V(3).Info("warn: Failed to find available GPUs %d for the pod %s in the namespace %s",
|
||||
reqGPU,
|
||||
pod.Name,
|
||||
pod.Namespace)
|
||||
}
|
||||
}
|
||||
|
||||
return candidateDevID, found
|
||||
}
|
||||
|
||||
func (n *NodeInfo) getAvailableGPUs() (availableGPUs map[int]uint) {
|
||||
allGPUs := n.getAllGPUs()
|
||||
usedGPUs := n.getUsedGPUs()
|
||||
unhealthyGPUs := n.getUnhealthyGPUs()
|
||||
availableGPUs = map[int]uint{}
|
||||
for id, totalGPUMem := range allGPUs {
|
||||
if usedGPUMem, found := usedGPUs[id]; found {
|
||||
availableGPUs[id] = totalGPUMem - usedGPUMem
|
||||
}
|
||||
}
|
||||
log.V(3).Info("info: available GPU list %v before removing unhealty GPUs", availableGPUs)
|
||||
for id, _ := range unhealthyGPUs {
|
||||
log.V(3).Info("info: delete dev %d from availble GPU list", id)
|
||||
delete(availableGPUs, id)
|
||||
}
|
||||
log.V(3).Info("info: available GPU list %v after removing unhealty GPUs", availableGPUs)
|
||||
|
||||
return availableGPUs
|
||||
}
|
||||
|
||||
// device index: gpu memory
|
||||
func (n *NodeInfo) getUsedGPUs() (usedGPUs map[int]uint) {
|
||||
usedGPUs = map[int]uint{}
|
||||
for _, dev := range n.devs {
|
||||
usedGPUs[dev.idx] = dev.GetUsedGPUMemory()
|
||||
}
|
||||
log.V(3).Info("info: getUsedGPUs: %v in node %s, and devs %v", usedGPUs, n.name, n.devs)
|
||||
return usedGPUs
|
||||
}
|
||||
|
||||
// device index: gpu memory
|
||||
func (n *NodeInfo) getAllGPUs() (allGPUs map[int]uint) {
|
||||
allGPUs = map[int]uint{}
|
||||
for _, dev := range n.devs {
|
||||
allGPUs[dev.idx] = dev.totalGPUMem
|
||||
}
|
||||
log.V(3).Info("info: getAllGPUs: %v in node %s, and dev %v", allGPUs, n.name, n.devs)
|
||||
return allGPUs
|
||||
}
|
||||
|
||||
// getUnhealthyGPUs get the unhealthy GPUs from configmap
|
||||
func (n *NodeInfo) getUnhealthyGPUs() (unhealthyGPUs map[int]bool) {
|
||||
unhealthyGPUs = map[int]bool{}
|
||||
name := fmt.Sprintf("unhealthy-gpu-%s", n.GetName())
|
||||
log.V(3).Info("info: try to find unhealthy node %s", name)
|
||||
cm := getConfigMap(name)
|
||||
if cm == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if devicesStr, found := cm.Data["gpus"]; found {
|
||||
log.V(3).Info("warn: the unhelathy gpus %s", devicesStr)
|
||||
idsStr := strings.Split(devicesStr, ",")
|
||||
for _, sid := range idsStr {
|
||||
id, err := strconv.Atoi(sid)
|
||||
if err != nil {
|
||||
log.V(3).Info("warn: failed to parse id %s due to %v", sid, err)
|
||||
}
|
||||
unhealthyGPUs[id] = true
|
||||
}
|
||||
} else {
|
||||
log.V(3).Info("info: skip, because there are no unhealthy gpus")
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
Reference in New Issue
Block a user