88 lines
2.5 KiB
Go
88 lines
2.5 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
|
|
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
|
|
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
|
|
"k8s.io/api/core/v1"
|
|
schedulerapi "k8s.io/kube-scheduler/extender/v1"
|
|
)
|
|
|
|
type Predicate struct {
|
|
Name string
|
|
cache *cache.SchedulerCache
|
|
}
|
|
|
|
func (p Predicate) checkNode(pod *v1.Pod, nodeName string, c *cache.SchedulerCache) (*v1.Node, error) {
|
|
log.V(10).Info("info: check if the pod name %s can be scheduled on node %s", pod.Name, nodeName)
|
|
nodeInfo, err := c.GetNodeInfo(nodeName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
node := nodeInfo.GetNode()
|
|
if node == nil {
|
|
return nil, fmt.Errorf("failed get node with name %s", nodeName)
|
|
}
|
|
if !utils.IsGPUSharingNode(node) {
|
|
return nil, fmt.Errorf("The node %s is not for GPU share, need skip", nodeName)
|
|
}
|
|
|
|
allocatable := nodeInfo.Assume(pod)
|
|
if !allocatable {
|
|
return nil, fmt.Errorf("Insufficient GPU Memory in one device")
|
|
} else {
|
|
log.V(10).Info("info: The pod %s in the namespace %s can be scheduled on %s",
|
|
pod.Name,
|
|
pod.Namespace,
|
|
nodeName)
|
|
}
|
|
return node, nil
|
|
}
|
|
|
|
func (p Predicate) Handler(args *schedulerapi.ExtenderArgs) *schedulerapi.ExtenderFilterResult {
|
|
if args == nil || args.Pod == nil {
|
|
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("arg or pod is nil")}
|
|
}
|
|
|
|
pod := args.Pod
|
|
var nodeNames []string
|
|
if args.NodeNames != nil {
|
|
nodeNames = *args.NodeNames
|
|
log.V(3).Info("extender args NodeNames is not nil, result %+v", nodeNames)
|
|
} else if args.Nodes != nil {
|
|
for _, n := range args.Nodes.Items {
|
|
nodeNames = append(nodeNames, n.Name)
|
|
}
|
|
log.V(3).Info("extender args Nodes is not nil, names is %+v", nodeNames)
|
|
} else {
|
|
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("cannot get node names")}
|
|
}
|
|
canSchedule := make([]string, 0, len(nodeNames))
|
|
canNotSchedule := make(map[string]string)
|
|
canScheduleNodes := &v1.NodeList{}
|
|
|
|
for _, nodeName := range nodeNames {
|
|
node, err := p.checkNode(pod, nodeName, p.cache)
|
|
if err != nil {
|
|
canNotSchedule[nodeName] = err.Error()
|
|
} else {
|
|
if node != nil {
|
|
canSchedule = append(canSchedule, nodeName)
|
|
canScheduleNodes.Items = append(canScheduleNodes.Items, *node)
|
|
}
|
|
}
|
|
}
|
|
|
|
result := schedulerapi.ExtenderFilterResult{
|
|
NodeNames: &canSchedule,
|
|
Nodes: canScheduleNodes,
|
|
FailedNodes: canNotSchedule,
|
|
Error: "",
|
|
}
|
|
|
|
log.V(100).Info("predicate result for %s, is %+v", pod.Name, result)
|
|
return &result
|
|
}
|