Files
Rainbond/gpushare-scheduler-extender/pkg/scheduler/predicate.go
2025-08-25 16:04:00 +08:00

88 lines
2.5 KiB
Go

package scheduler
import (
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kube-scheduler/extender/v1"
)
type Predicate struct {
Name string
cache *cache.SchedulerCache
}
func (p Predicate) checkNode(pod *v1.Pod, nodeName string, c *cache.SchedulerCache) (*v1.Node, error) {
log.V(10).Info("info: check if the pod name %s can be scheduled on node %s", pod.Name, nodeName)
nodeInfo, err := c.GetNodeInfo(nodeName)
if err != nil {
return nil, err
}
node := nodeInfo.GetNode()
if node == nil {
return nil, fmt.Errorf("failed get node with name %s", nodeName)
}
if !utils.IsGPUSharingNode(node) {
return nil, fmt.Errorf("The node %s is not for GPU share, need skip", nodeName)
}
allocatable := nodeInfo.Assume(pod)
if !allocatable {
return nil, fmt.Errorf("Insufficient GPU Memory in one device")
} else {
log.V(10).Info("info: The pod %s in the namespace %s can be scheduled on %s",
pod.Name,
pod.Namespace,
nodeName)
}
return node, nil
}
func (p Predicate) Handler(args *schedulerapi.ExtenderArgs) *schedulerapi.ExtenderFilterResult {
if args == nil || args.Pod == nil {
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("arg or pod is nil")}
}
pod := args.Pod
var nodeNames []string
if args.NodeNames != nil {
nodeNames = *args.NodeNames
log.V(3).Info("extender args NodeNames is not nil, result %+v", nodeNames)
} else if args.Nodes != nil {
for _, n := range args.Nodes.Items {
nodeNames = append(nodeNames, n.Name)
}
log.V(3).Info("extender args Nodes is not nil, names is %+v", nodeNames)
} else {
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("cannot get node names")}
}
canSchedule := make([]string, 0, len(nodeNames))
canNotSchedule := make(map[string]string)
canScheduleNodes := &v1.NodeList{}
for _, nodeName := range nodeNames {
node, err := p.checkNode(pod, nodeName, p.cache)
if err != nil {
canNotSchedule[nodeName] = err.Error()
} else {
if node != nil {
canSchedule = append(canSchedule, nodeName)
canScheduleNodes.Items = append(canScheduleNodes.Items, *node)
}
}
}
result := schedulerapi.ExtenderFilterResult{
NodeNames: &canSchedule,
Nodes: canScheduleNodes,
FailedNodes: canNotSchedule,
Error: "",
}
log.V(100).Info("predicate result for %s, is %+v", pod.Name, result)
return &result
}