256 lines
6.8 KiB
Go
256 lines
6.8 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"text/tabwriter"
|
|
|
|
log "github.com/golang/glog"
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
)
|
|
|
|
func displayDetails(nodeInfos []*NodeInfo) {
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
var (
|
|
totalGPUMemInCluster int64
|
|
usedGPUMemInCluster int64
|
|
prtLineLen int
|
|
)
|
|
|
|
for _, nodeInfo := range nodeInfos {
|
|
address := "unknown"
|
|
if len(nodeInfo.node.Status.Addresses) > 0 {
|
|
//address = nodeInfo.node.Status.Addresses[0].Address
|
|
for _, addr := range nodeInfo.node.Status.Addresses {
|
|
if addr.Type == v1.NodeInternalIP {
|
|
address = addr.Address
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
totalGPUMemInNode := nodeInfo.gpuTotalMemory
|
|
if totalGPUMemInNode <= 0 {
|
|
continue
|
|
}
|
|
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "NAME:\t%s\n", nodeInfo.node.Name)
|
|
fmt.Fprintf(w, "IPADDRESS:\t%s\n", address)
|
|
fmt.Fprintf(w, "\n")
|
|
|
|
usedGPUMemInNode := 0
|
|
var buf bytes.Buffer
|
|
buf.WriteString("NAME\tNAMESPACE\t")
|
|
for i := 0; i < nodeInfo.gpuCount; i++ {
|
|
buf.WriteString(fmt.Sprintf("GPU%d(Allocated)\t", i))
|
|
}
|
|
|
|
if nodeInfo.hasPendingGPUMemory() {
|
|
buf.WriteString("Pending(Allocated)\t")
|
|
}
|
|
buf.WriteString("\n")
|
|
fmt.Fprintf(w, buf.String())
|
|
|
|
var buffer bytes.Buffer
|
|
exists := map[types.UID]bool{}
|
|
for i, dev := range nodeInfo.devs {
|
|
usedGPUMemInNode += dev.usedGPUMem
|
|
for _, pod := range dev.pods {
|
|
if _,ok := exists[pod.UID]; ok {
|
|
continue
|
|
}
|
|
buffer.WriteString(fmt.Sprintf("%s\t%s\t", pod.Name, pod.Namespace))
|
|
count := nodeInfo.gpuCount
|
|
if nodeInfo.hasPendingGPUMemory() {
|
|
count += 1
|
|
}
|
|
|
|
for k := 0; k < count; k++ {
|
|
allocation := GetAllocation(&pod)
|
|
if len(allocation) != 0 {
|
|
buffer.WriteString(fmt.Sprintf("%d\t", allocation[k]))
|
|
continue
|
|
}
|
|
if k == i || (i == -1 && k == nodeInfo.gpuCount) {
|
|
buffer.WriteString(fmt.Sprintf("%d\t", getGPUMemoryInPod(pod)))
|
|
} else {
|
|
buffer.WriteString("0\t")
|
|
}
|
|
}
|
|
buffer.WriteString("\n")
|
|
exists[pod.UID] = true
|
|
}
|
|
}
|
|
if prtLineLen == 0 {
|
|
prtLineLen = buffer.Len() + 10
|
|
}
|
|
fmt.Fprintf(w, buffer.String())
|
|
|
|
var gpuUsageInNode float64 = 0
|
|
if totalGPUMemInNode > 0 {
|
|
gpuUsageInNode = float64(usedGPUMemInNode) / float64(totalGPUMemInNode) * 100
|
|
} else {
|
|
fmt.Fprintf(w, "\n")
|
|
}
|
|
|
|
fmt.Fprintf(w, "Allocated :\t%d (%d%%)\t\n", usedGPUMemInNode, int64(gpuUsageInNode))
|
|
fmt.Fprintf(w, "Total :\t%d \t\n", nodeInfo.gpuTotalMemory)
|
|
// fmt.Fprintf(w, "-----------------------------------------------------------------------------------------\n")
|
|
var prtLine bytes.Buffer
|
|
for i := 0; i < prtLineLen; i++ {
|
|
prtLine.WriteString("-")
|
|
}
|
|
prtLine.WriteString("\n")
|
|
fmt.Fprintf(w, prtLine.String())
|
|
totalGPUMemInCluster += int64(totalGPUMemInNode)
|
|
usedGPUMemInCluster += int64(usedGPUMemInNode)
|
|
}
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "Allocated/Total GPU Memory In Cluster:\t")
|
|
log.V(2).Infof("gpu: %s, allocated GPU Memory %s", strconv.FormatInt(totalGPUMemInCluster, 10),
|
|
strconv.FormatInt(usedGPUMemInCluster, 10))
|
|
|
|
var gpuUsage float64 = 0
|
|
if totalGPUMemInCluster > 0 {
|
|
gpuUsage = float64(usedGPUMemInCluster) / float64(totalGPUMemInCluster) * 100
|
|
}
|
|
fmt.Fprintf(w, "%s/%s (%d%%)\t\n",
|
|
strconv.FormatInt(usedGPUMemInCluster, 10),
|
|
strconv.FormatInt(totalGPUMemInCluster, 10),
|
|
int64(gpuUsage))
|
|
// fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", ...)
|
|
|
|
_ = w.Flush()
|
|
}
|
|
|
|
func getMaxGPUCount(nodeInfos []*NodeInfo) (max int) {
|
|
for _, node := range nodeInfos {
|
|
if node.gpuCount > max {
|
|
max = node.gpuCount
|
|
}
|
|
}
|
|
|
|
return max
|
|
}
|
|
|
|
func displaySummary(nodeInfos []*NodeInfo) {
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
var (
|
|
maxGPUCount int
|
|
totalGPUMemInCluster int64
|
|
usedGPUMemInCluster int64
|
|
prtLineLen int
|
|
)
|
|
|
|
hasPendingGPU := hasPendingGPUMemory(nodeInfos)
|
|
|
|
maxGPUCount = getMaxGPUCount(nodeInfos)
|
|
|
|
var buffer bytes.Buffer
|
|
buffer.WriteString("NAME\tIPADDRESS\t")
|
|
for i := 0; i < maxGPUCount; i++ {
|
|
buffer.WriteString(fmt.Sprintf("GPU%d(Allocated/Total)\t", i))
|
|
}
|
|
|
|
if hasPendingGPU {
|
|
buffer.WriteString("PENDING(Allocated)\t")
|
|
}
|
|
buffer.WriteString(fmt.Sprintf("GPU Memory(%s)\n", memoryUnit))
|
|
|
|
// fmt.Fprintf(w, "NAME\tIPADDRESS\tROLE\tGPU(Allocated/Total)\tPENDING(Allocated)\n")
|
|
fmt.Fprintf(w, buffer.String())
|
|
for _, nodeInfo := range nodeInfos {
|
|
address := "unknown"
|
|
if len(nodeInfo.node.Status.Addresses) > 0 {
|
|
// address = nodeInfo.node.Status.Addresses[0].Address
|
|
for _, addr := range nodeInfo.node.Status.Addresses {
|
|
if addr.Type == v1.NodeInternalIP {
|
|
address = addr.Address
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
gpuMemInfos := []string{}
|
|
pendingGPUMemInfo := ""
|
|
usedGPUMemInNode := 0
|
|
totalGPUMemInNode := nodeInfo.gpuTotalMemory
|
|
if totalGPUMemInNode <= 0 {
|
|
continue
|
|
}
|
|
|
|
for i := 0; i < maxGPUCount; i++ {
|
|
gpuMemInfo := "0/0"
|
|
if dev, ok := nodeInfo.devs[i]; ok {
|
|
gpuMemInfo = dev.String()
|
|
usedGPUMemInNode += dev.usedGPUMem
|
|
}
|
|
gpuMemInfos = append(gpuMemInfos, gpuMemInfo)
|
|
}
|
|
|
|
// check if there is pending dev
|
|
if dev, ok := nodeInfo.devs[-1]; ok {
|
|
pendingGPUMemInfo = fmt.Sprintf("%d", dev.usedGPUMem)
|
|
usedGPUMemInNode += dev.usedGPUMem
|
|
}
|
|
|
|
nodeGPUMemInfo := fmt.Sprintf("%d/%d", usedGPUMemInNode, totalGPUMemInNode)
|
|
|
|
var buf bytes.Buffer
|
|
buf.WriteString(fmt.Sprintf("%s\t%s\t", nodeInfo.node.Name, address))
|
|
for i := 0; i < maxGPUCount; i++ {
|
|
buf.WriteString(fmt.Sprintf("%s\t", gpuMemInfos[i]))
|
|
}
|
|
if hasPendingGPU {
|
|
buf.WriteString(fmt.Sprintf("%s\t", pendingGPUMemInfo))
|
|
}
|
|
|
|
buf.WriteString(fmt.Sprintf("%s\n", nodeGPUMemInfo))
|
|
fmt.Fprintf(w, buf.String())
|
|
|
|
if prtLineLen == 0 {
|
|
prtLineLen = buf.Len() + 20
|
|
}
|
|
|
|
usedGPUMemInCluster += int64(usedGPUMemInNode)
|
|
totalGPUMemInCluster += int64(totalGPUMemInNode)
|
|
}
|
|
// fmt.Fprintf(w, "-----------------------------------------------------------------------------------------\n")
|
|
var prtLine bytes.Buffer
|
|
for i := 0; i < prtLineLen; i++ {
|
|
prtLine.WriteString("-")
|
|
}
|
|
prtLine.WriteString("\n")
|
|
fmt.Fprint(w, prtLine.String())
|
|
|
|
fmt.Fprintf(w, "Allocated/Total GPU Memory In Cluster:\n")
|
|
log.V(2).Infof("gpu: %s, allocated GPU Memory %s", strconv.FormatInt(totalGPUMemInCluster, 10),
|
|
strconv.FormatInt(usedGPUMemInCluster, 10))
|
|
var gpuUsage float64 = 0
|
|
if totalGPUMemInCluster > 0 {
|
|
gpuUsage = float64(usedGPUMemInCluster) / float64(totalGPUMemInCluster) * 100
|
|
}
|
|
fmt.Fprintf(w, "%s/%s (%d%%)\t\n",
|
|
strconv.FormatInt(usedGPUMemInCluster, 10),
|
|
strconv.FormatInt(totalGPUMemInCluster, 10),
|
|
int64(gpuUsage))
|
|
// fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", ...)
|
|
|
|
_ = w.Flush()
|
|
}
|
|
|
|
func getGPUMemoryInPod(pod v1.Pod) int {
|
|
gpuMem := 0
|
|
for _, container := range pod.Spec.Containers {
|
|
if val, ok := container.Resources.Limits[resourceName]; ok {
|
|
gpuMem += int(val.Value())
|
|
}
|
|
}
|
|
return gpuMem
|
|
}
|