package main import ( "bytes" "fmt" "os" "strconv" "text/tabwriter" log "github.com/golang/glog" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" ) func displayDetails(nodeInfos []*NodeInfo) { w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) var ( totalGPUMemInCluster int64 usedGPUMemInCluster int64 prtLineLen int ) for _, nodeInfo := range nodeInfos { address := "unknown" if len(nodeInfo.node.Status.Addresses) > 0 { //address = nodeInfo.node.Status.Addresses[0].Address for _, addr := range nodeInfo.node.Status.Addresses { if addr.Type == v1.NodeInternalIP { address = addr.Address break } } } totalGPUMemInNode := nodeInfo.gpuTotalMemory if totalGPUMemInNode <= 0 { continue } fmt.Fprintf(w, "\n") fmt.Fprintf(w, "NAME:\t%s\n", nodeInfo.node.Name) fmt.Fprintf(w, "IPADDRESS:\t%s\n", address) fmt.Fprintf(w, "\n") usedGPUMemInNode := 0 var buf bytes.Buffer buf.WriteString("NAME\tNAMESPACE\t") for i := 0; i < nodeInfo.gpuCount; i++ { buf.WriteString(fmt.Sprintf("GPU%d(Allocated)\t", i)) } if nodeInfo.hasPendingGPUMemory() { buf.WriteString("Pending(Allocated)\t") } buf.WriteString("\n") fmt.Fprintf(w, buf.String()) var buffer bytes.Buffer exists := map[types.UID]bool{} for i, dev := range nodeInfo.devs { usedGPUMemInNode += dev.usedGPUMem for _, pod := range dev.pods { if _,ok := exists[pod.UID]; ok { continue } buffer.WriteString(fmt.Sprintf("%s\t%s\t", pod.Name, pod.Namespace)) count := nodeInfo.gpuCount if nodeInfo.hasPendingGPUMemory() { count += 1 } for k := 0; k < count; k++ { allocation := GetAllocation(&pod) if len(allocation) != 0 { buffer.WriteString(fmt.Sprintf("%d\t", allocation[k])) continue } if k == i || (i == -1 && k == nodeInfo.gpuCount) { buffer.WriteString(fmt.Sprintf("%d\t", getGPUMemoryInPod(pod))) } else { buffer.WriteString("0\t") } } buffer.WriteString("\n") exists[pod.UID] = true } } if prtLineLen == 0 { prtLineLen = buffer.Len() + 10 } fmt.Fprintf(w, buffer.String()) var gpuUsageInNode float64 = 0 if totalGPUMemInNode > 0 { gpuUsageInNode = float64(usedGPUMemInNode) / float64(totalGPUMemInNode) * 100 } else { fmt.Fprintf(w, "\n") } fmt.Fprintf(w, "Allocated :\t%d (%d%%)\t\n", usedGPUMemInNode, int64(gpuUsageInNode)) fmt.Fprintf(w, "Total :\t%d \t\n", nodeInfo.gpuTotalMemory) // fmt.Fprintf(w, "-----------------------------------------------------------------------------------------\n") var prtLine bytes.Buffer for i := 0; i < prtLineLen; i++ { prtLine.WriteString("-") } prtLine.WriteString("\n") fmt.Fprintf(w, prtLine.String()) totalGPUMemInCluster += int64(totalGPUMemInNode) usedGPUMemInCluster += int64(usedGPUMemInNode) } fmt.Fprintf(w, "\n") fmt.Fprintf(w, "\n") fmt.Fprintf(w, "Allocated/Total GPU Memory In Cluster:\t") log.V(2).Infof("gpu: %s, allocated GPU Memory %s", strconv.FormatInt(totalGPUMemInCluster, 10), strconv.FormatInt(usedGPUMemInCluster, 10)) var gpuUsage float64 = 0 if totalGPUMemInCluster > 0 { gpuUsage = float64(usedGPUMemInCluster) / float64(totalGPUMemInCluster) * 100 } fmt.Fprintf(w, "%s/%s (%d%%)\t\n", strconv.FormatInt(usedGPUMemInCluster, 10), strconv.FormatInt(totalGPUMemInCluster, 10), int64(gpuUsage)) // fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", ...) _ = w.Flush() } func getMaxGPUCount(nodeInfos []*NodeInfo) (max int) { for _, node := range nodeInfos { if node.gpuCount > max { max = node.gpuCount } } return max } func displaySummary(nodeInfos []*NodeInfo) { w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) var ( maxGPUCount int totalGPUMemInCluster int64 usedGPUMemInCluster int64 prtLineLen int ) hasPendingGPU := hasPendingGPUMemory(nodeInfos) maxGPUCount = getMaxGPUCount(nodeInfos) var buffer bytes.Buffer buffer.WriteString("NAME\tIPADDRESS\t") for i := 0; i < maxGPUCount; i++ { buffer.WriteString(fmt.Sprintf("GPU%d(Allocated/Total)\t", i)) } if hasPendingGPU { buffer.WriteString("PENDING(Allocated)\t") } buffer.WriteString(fmt.Sprintf("GPU Memory(%s)\n", memoryUnit)) // fmt.Fprintf(w, "NAME\tIPADDRESS\tROLE\tGPU(Allocated/Total)\tPENDING(Allocated)\n") fmt.Fprintf(w, buffer.String()) for _, nodeInfo := range nodeInfos { address := "unknown" if len(nodeInfo.node.Status.Addresses) > 0 { // address = nodeInfo.node.Status.Addresses[0].Address for _, addr := range nodeInfo.node.Status.Addresses { if addr.Type == v1.NodeInternalIP { address = addr.Address break } } } gpuMemInfos := []string{} pendingGPUMemInfo := "" usedGPUMemInNode := 0 totalGPUMemInNode := nodeInfo.gpuTotalMemory if totalGPUMemInNode <= 0 { continue } for i := 0; i < maxGPUCount; i++ { gpuMemInfo := "0/0" if dev, ok := nodeInfo.devs[i]; ok { gpuMemInfo = dev.String() usedGPUMemInNode += dev.usedGPUMem } gpuMemInfos = append(gpuMemInfos, gpuMemInfo) } // check if there is pending dev if dev, ok := nodeInfo.devs[-1]; ok { pendingGPUMemInfo = fmt.Sprintf("%d", dev.usedGPUMem) usedGPUMemInNode += dev.usedGPUMem } nodeGPUMemInfo := fmt.Sprintf("%d/%d", usedGPUMemInNode, totalGPUMemInNode) var buf bytes.Buffer buf.WriteString(fmt.Sprintf("%s\t%s\t", nodeInfo.node.Name, address)) for i := 0; i < maxGPUCount; i++ { buf.WriteString(fmt.Sprintf("%s\t", gpuMemInfos[i])) } if hasPendingGPU { buf.WriteString(fmt.Sprintf("%s\t", pendingGPUMemInfo)) } buf.WriteString(fmt.Sprintf("%s\n", nodeGPUMemInfo)) fmt.Fprintf(w, buf.String()) if prtLineLen == 0 { prtLineLen = buf.Len() + 20 } usedGPUMemInCluster += int64(usedGPUMemInNode) totalGPUMemInCluster += int64(totalGPUMemInNode) } // fmt.Fprintf(w, "-----------------------------------------------------------------------------------------\n") var prtLine bytes.Buffer for i := 0; i < prtLineLen; i++ { prtLine.WriteString("-") } prtLine.WriteString("\n") fmt.Fprint(w, prtLine.String()) fmt.Fprintf(w, "Allocated/Total GPU Memory In Cluster:\n") log.V(2).Infof("gpu: %s, allocated GPU Memory %s", strconv.FormatInt(totalGPUMemInCluster, 10), strconv.FormatInt(usedGPUMemInCluster, 10)) var gpuUsage float64 = 0 if totalGPUMemInCluster > 0 { gpuUsage = float64(usedGPUMemInCluster) / float64(totalGPUMemInCluster) * 100 } fmt.Fprintf(w, "%s/%s (%d%%)\t\n", strconv.FormatInt(usedGPUMemInCluster, 10), strconv.FormatInt(totalGPUMemInCluster, 10), int64(gpuUsage)) // fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", ...) _ = w.Flush() } func getGPUMemoryInPod(pod v1.Pod) int { gpuMem := 0 for _, container := range pod.Spec.Containers { if val, ok := container.Resources.Limits[resourceName]; ok { gpuMem += int(val.Value()) } } return gpuMem }