synchronization

This commit is contained in:
2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions

View File

@@ -0,0 +1,134 @@
package main
import (
"fmt"
"os"
"path"
"time"
log "github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
var (
clientConfig clientcmd.ClientConfig
clientset *kubernetes.Clientset
restConfig *rest.Config
retries = 5
)
func kubeInit() {
kubeconfigFile := os.Getenv("KUBECONFIG")
if kubeconfigFile == "" {
kubeconfigFile = path.Join(os.Getenv("HOME"), "/.kube/config")
}
if _, err := os.Stat(kubeconfigFile); err != nil {
log.Fatalf("kubeconfig %s failed to find due to %v, please set KUBECONFIG env", kubeconfigFile, err)
}
var err error
restConfig, err = clientcmd.BuildConfigFromFlags("", kubeconfigFile)
if err != nil {
log.Fatalf("Failed due to %v", err)
}
clientset, err = kubernetes.NewForConfig(restConfig)
if err != nil {
log.Fatalf("Failed due to %v", err)
}
}
type podInfo struct {
name string
namespace string
}
func (p podInfo) equal(p1 podInfo) bool {
return p.name == p1.name && p.namespace == p1.namespace
}
func getActivePodsByNode(nodeName string) ([]v1.Pod, error) {
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
pods, err := clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
FieldSelector: selector.String(),
LabelSelector: labels.Everything().String(),
})
for i := 0; i < retries && err != nil; i++ {
pods, err = clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
FieldSelector: selector.String(),
LabelSelector: labels.Everything().String(),
})
time.Sleep(100 * time.Millisecond)
}
if err != nil {
return []v1.Pod{}, fmt.Errorf("failed to get Pods in node %v", nodeName)
}
return filterActivePods(pods.Items), nil
}
func getActivePodsInAllNodes() ([]v1.Pod, error) {
pods, err := clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
LabelSelector: labels.Everything().String(),
})
for i := 0; i < retries && err != nil; i++ {
pods, err = clientset.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
LabelSelector: labels.Everything().String(),
})
time.Sleep(100 * time.Millisecond)
}
if err != nil {
return []v1.Pod{}, fmt.Errorf("failed to get Pods")
}
return filterActivePods(pods.Items), nil
}
func filterActivePods(pods []v1.Pod) (activePods []v1.Pod) {
activePods = []v1.Pod{}
for _, pod := range pods {
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
continue
}
activePods = append(activePods, pod)
}
return activePods
}
func getAllSharedGPUNode() ([]v1.Node, error) {
nodes := []v1.Node{}
allNodes, err := clientset.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nodes, err
}
for _, item := range allNodes.Items {
if isGPUSharingNode(item) {
nodes = append(nodes, item)
}
}
return nodes, nil
}
func gpuMemoryInPod(pod v1.Pod) int {
var total int
containers := pod.Spec.Containers
for _, container := range containers {
if val, ok := container.Resources.Limits[resourceName]; ok {
total += int(val.Value())
}
}
return total
}