synchronization

This commit is contained in:
2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,36 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: #PACKAGE#
Source: https://github.com/NVIDIA/nvidia-docker
Files: *
Copyright: #YEAR# #USERNAME# <#EMAIL#>
License: BSD-3-Clause
Files: debian/*
Copyright: #YEAR# #USERNAME# <#EMAIL#>
License: BSD-3-Clause
License: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of #USERNAME# nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1 @@
../common/nvidia-docker.service

View File

@@ -0,0 +1,25 @@
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1 @@
../../common/nvidia-docker.service

View File

@@ -0,0 +1,311 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvml
// #cgo LDFLAGS: -ldl -Wl,--unresolved-symbols=ignore-in-object-files
// #include "nvml_dl.h"
import "C"
import (
"errors"
"fmt"
)
const (
szDriver = C.NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE
szName = C.NVML_DEVICE_NAME_BUFFER_SIZE
szUUID = C.NVML_DEVICE_UUID_BUFFER_SIZE
szProcs = 32
szProcName = 64
)
type handle struct{ dev C.nvmlDevice_t }
func uintPtr(c C.uint) *uint {
i := uint(c)
return &i
}
func uint64Ptr(c C.ulonglong) *uint64 {
i := uint64(c)
return &i
}
func stringPtr(c *C.char) *string {
s := C.GoString(c)
return &s
}
func errorString(ret C.nvmlReturn_t) error {
if ret == C.NVML_SUCCESS {
return nil
}
err := C.GoString(C.nvmlErrorString(ret))
return fmt.Errorf("nvml: %v", err)
}
func init_() error {
r := C.nvmlInit_dl()
if r == C.NVML_ERROR_LIBRARY_NOT_FOUND {
return errors.New("could not load NVML library")
}
return errorString(r)
}
func shutdown() error {
return errorString(C.nvmlShutdown_dl())
}
func systemGetDriverVersion() (string, error) {
var driver [szDriver]C.char
r := C.nvmlSystemGetDriverVersion(&driver[0], szDriver)
return C.GoString(&driver[0]), errorString(r)
}
func systemGetProcessName(pid uint) (string, error) {
var proc [szProcName]C.char
r := C.nvmlSystemGetProcessName(C.uint(pid), &proc[0], szProcName)
return C.GoString(&proc[0]), errorString(r)
}
func deviceGetCount() (uint, error) {
var n C.uint
r := C.nvmlDeviceGetCount(&n)
return uint(n), errorString(r)
}
func deviceGetHandleByIndex(idx uint) (handle, error) {
var dev C.nvmlDevice_t
r := C.nvmlDeviceGetHandleByIndex(C.uint(idx), &dev)
return handle{dev}, errorString(r)
}
func deviceGetTopologyCommonAncestor(h1, h2 handle) (*uint, error) {
var level C.nvmlGpuTopologyLevel_t
r := C.nvmlDeviceGetTopologyCommonAncestor_dl(h1.dev, h2.dev, &level)
if r == C.NVML_ERROR_FUNCTION_NOT_FOUND || r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(C.uint(level)), errorString(r)
}
func (h handle) deviceGetName() (*string, error) {
var name [szName]C.char
r := C.nvmlDeviceGetName(h.dev, &name[0], szName)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&name[0]), errorString(r)
}
func (h handle) deviceGetUUID() (*string, error) {
var uuid [szUUID]C.char
r := C.nvmlDeviceGetUUID(h.dev, &uuid[0], szUUID)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&uuid[0]), errorString(r)
}
func (h handle) deviceGetPciInfo() (*string, error) {
var pci C.nvmlPciInfo_t
r := C.nvmlDeviceGetPciInfo(h.dev, &pci)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&pci.busId[0]), errorString(r)
}
func (h handle) deviceGetMinorNumber() (*uint, error) {
var minor C.uint
r := C.nvmlDeviceGetMinorNumber(h.dev, &minor)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(minor), errorString(r)
}
func (h handle) deviceGetBAR1MemoryInfo() (*uint64, *uint64, error) {
var bar1 C.nvmlBAR1Memory_t
r := C.nvmlDeviceGetBAR1MemoryInfo(h.dev, &bar1)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
return uint64Ptr(bar1.bar1Total), uint64Ptr(bar1.bar1Used), errorString(r)
}
func (h handle) deviceGetPowerManagementLimit() (*uint, error) {
var power C.uint
r := C.nvmlDeviceGetPowerManagementLimit(h.dev, &power)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(power), errorString(r)
}
func (h handle) deviceGetMaxClockInfo() (*uint, *uint, error) {
var sm, mem C.uint
r := C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
}
return uintPtr(sm), uintPtr(mem), errorString(r)
}
func (h handle) deviceGetMaxPcieLinkGeneration() (*uint, error) {
var link C.uint
r := C.nvmlDeviceGetMaxPcieLinkGeneration(h.dev, &link)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(link), errorString(r)
}
func (h handle) deviceGetMaxPcieLinkWidth() (*uint, error) {
var width C.uint
r := C.nvmlDeviceGetMaxPcieLinkWidth(h.dev, &width)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(width), errorString(r)
}
func (h handle) deviceGetPowerUsage() (*uint, error) {
var power C.uint
r := C.nvmlDeviceGetPowerUsage(h.dev, &power)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(power), errorString(r)
}
func (h handle) deviceGetTemperature() (*uint, error) {
var temp C.uint
r := C.nvmlDeviceGetTemperature(h.dev, C.NVML_TEMPERATURE_GPU, &temp)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(temp), errorString(r)
}
func (h handle) deviceGetUtilizationRates() (*uint, *uint, error) {
var usage C.nvmlUtilization_t
r := C.nvmlDeviceGetUtilizationRates(h.dev, &usage)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
return uintPtr(usage.gpu), uintPtr(usage.memory), errorString(r)
}
func (h handle) deviceGetEncoderUtilization() (*uint, error) {
var usage, sampling C.uint
r := C.nvmlDeviceGetEncoderUtilization(h.dev, &usage, &sampling)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(usage), errorString(r)
}
func (h handle) deviceGetDecoderUtilization() (*uint, error) {
var usage, sampling C.uint
r := C.nvmlDeviceGetDecoderUtilization(h.dev, &usage, &sampling)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(usage), errorString(r)
}
func (h handle) deviceGetMemoryInfo() (*uint64, error) {
var mem C.nvmlMemory_t
r := C.nvmlDeviceGetMemoryInfo(h.dev, &mem)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uint64Ptr(mem.used), errorString(r)
}
func (h handle) deviceGetClockInfo() (*uint, *uint, error) {
var sm, mem C.uint
r := C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
}
return uintPtr(sm), uintPtr(mem), errorString(r)
}
func (h handle) deviceGetMemoryErrorCounter() (*uint64, *uint64, *uint64, error) {
var l1, l2, mem C.ulonglong
r := C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L1_CACHE, &l1)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L2_CACHE, &l2)
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_DEVICE_MEMORY, &mem)
}
return uint64Ptr(l1), uint64Ptr(l2), uint64Ptr(mem), errorString(r)
}
func (h handle) deviceGetPcieThroughput() (*uint, *uint, error) {
var rx, tx C.uint
r := C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_RX_BYTES, &rx)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_TX_BYTES, &tx)
}
return uintPtr(rx), uintPtr(tx), errorString(r)
}
func (h handle) deviceGetComputeRunningProcesses() ([]uint, []uint64, error) {
var procs [szProcs]C.nvmlProcessInfo_t
var count = C.uint(szProcs)
r := C.nvmlDeviceGetComputeRunningProcesses(h.dev, &count, &procs[0])
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
n := int(count)
pids := make([]uint, n)
mems := make([]uint64, n)
for i := 0; i < n; i++ {
pids[i] = uint(procs[i].pid)
mems[i] = uint64(procs[i].usedGpuMemory)
}
return pids, mems, errorString(r)
}

View File

@@ -0,0 +1,381 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvml
// #include "nvml_dl.h"
import "C"
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"strconv"
"strings"
)
var (
ErrCPUAffinity = errors.New("failed to retrieve CPU affinity")
ErrUnsupportedP2PLink = errors.New("unsupported P2P link type")
ErrUnsupportedGPU = errors.New("unsupported GPU device")
)
type P2PLinkType uint
const (
P2PLinkUnknown P2PLinkType = iota
P2PLinkCrossCPU
P2PLinkSameCPU
P2PLinkHostBridge
P2PLinkMultiSwitch
P2PLinkSingleSwitch
P2PLinkSameBoard
)
type P2PLink struct {
BusID string
Link P2PLinkType
}
func (t P2PLinkType) String() string {
switch t {
case P2PLinkCrossCPU:
return "Cross CPU socket"
case P2PLinkSameCPU:
return "Same CPU socket"
case P2PLinkHostBridge:
return "Host PCI bridge"
case P2PLinkMultiSwitch:
return "Multiple PCI switches"
case P2PLinkSingleSwitch:
return "Single PCI switch"
case P2PLinkSameBoard:
return "Same board"
case P2PLinkUnknown:
}
return "N/A"
}
type ClockInfo struct {
Cores *uint
Memory *uint
}
type PCIInfo struct {
BusID string
BAR1 *uint64
Bandwidth *uint
}
type Device struct {
handle
UUID string
Path string
Model *string
Power *uint
CPUAffinity *uint
PCI PCIInfo
Clocks ClockInfo
Topology []P2PLink
}
type UtilizationInfo struct {
GPU *uint
Memory *uint
Encoder *uint
Decoder *uint
}
type PCIThroughputInfo struct {
RX *uint
TX *uint
}
type PCIStatusInfo struct {
BAR1Used *uint64
Throughput PCIThroughputInfo
}
type ECCErrorsInfo struct {
L1Cache *uint64
L2Cache *uint64
Global *uint64
}
type MemoryInfo struct {
GlobalUsed *uint64
ECCErrors ECCErrorsInfo
}
type ProcessInfo struct {
PID uint
Name string
MemoryUsed uint64
}
type DeviceStatus struct {
Power *uint
Temperature *uint
Utilization UtilizationInfo
Memory MemoryInfo
Clocks ClockInfo
PCI PCIStatusInfo
Processes []ProcessInfo
}
func assert(err error) {
if err != nil {
panic(err)
}
}
func Init() error {
return init_()
}
func Shutdown() error {
return shutdown()
}
func GetDeviceCount() (uint, error) {
return deviceGetCount()
}
func GetDriverVersion() (string, error) {
return systemGetDriverVersion()
}
func numaNode(busid string) (uint, error) {
b, err := ioutil.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", strings.ToLower(busid)))
if err != nil {
// XXX report node 0 if NUMA support isn't enabled
return 0, nil
}
node, err := strconv.ParseInt(string(bytes.TrimSpace(b)), 10, 8)
if err != nil {
return 0, fmt.Errorf("%v: %v", ErrCPUAffinity, err)
}
if node < 0 {
node = 0 // XXX report node 0 instead of NUMA_NO_NODE
}
return uint(node), nil
}
func pciBandwidth(gen, width *uint) *uint {
m := map[uint]uint{
1: 250, // MB/s
2: 500,
3: 985,
4: 1969,
}
if gen == nil || width == nil {
return nil
}
bw := m[*gen] * *width
return &bw
}
func NewDevice(idx uint) (device *Device, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
h, err := deviceGetHandleByIndex(idx)
assert(err)
model, err := h.deviceGetName()
assert(err)
uuid, err := h.deviceGetUUID()
assert(err)
minor, err := h.deviceGetMinorNumber()
assert(err)
power, err := h.deviceGetPowerManagementLimit()
assert(err)
busid, err := h.deviceGetPciInfo()
assert(err)
bar1, _, err := h.deviceGetBAR1MemoryInfo()
assert(err)
pcig, err := h.deviceGetMaxPcieLinkGeneration()
assert(err)
pciw, err := h.deviceGetMaxPcieLinkWidth()
assert(err)
ccore, cmem, err := h.deviceGetMaxClockInfo()
assert(err)
if minor == nil || busid == nil || uuid == nil {
return nil, ErrUnsupportedGPU
}
path := fmt.Sprintf("/dev/nvidia%d", *minor)
node, err := numaNode(*busid)
assert(err)
device = &Device{
handle: h,
UUID: *uuid,
Path: path,
Model: model,
Power: power,
CPUAffinity: &node,
PCI: PCIInfo{
BusID: *busid,
BAR1: bar1,
Bandwidth: pciBandwidth(pcig, pciw), // MB/s
},
Clocks: ClockInfo{
Cores: ccore, // MHz
Memory: cmem, // MHz
},
}
if power != nil {
*device.Power /= 1000 // W
}
if bar1 != nil {
*device.PCI.BAR1 /= 1024 * 1024 // MiB
}
return
}
func NewDeviceLite(idx uint) (device *Device, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
h, err := deviceGetHandleByIndex(idx)
assert(err)
uuid, err := h.deviceGetUUID()
assert(err)
minor, err := h.deviceGetMinorNumber()
assert(err)
busid, err := h.deviceGetPciInfo()
assert(err)
if minor == nil || busid == nil || uuid == nil {
return nil, ErrUnsupportedGPU
}
path := fmt.Sprintf("/dev/nvidia%d", *minor)
device = &Device{
handle: h,
UUID: *uuid,
Path: path,
PCI: PCIInfo{
BusID: *busid,
},
}
return
}
func (d *Device) Status() (status *DeviceStatus, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
power, err := d.deviceGetPowerUsage()
assert(err)
temp, err := d.deviceGetTemperature()
assert(err)
ugpu, umem, err := d.deviceGetUtilizationRates()
assert(err)
uenc, err := d.deviceGetEncoderUtilization()
assert(err)
udec, err := d.deviceGetDecoderUtilization()
assert(err)
mem, err := d.deviceGetMemoryInfo()
assert(err)
ccore, cmem, err := d.deviceGetClockInfo()
assert(err)
_, bar1, err := d.deviceGetBAR1MemoryInfo()
assert(err)
pids, pmems, err := d.deviceGetComputeRunningProcesses()
assert(err)
el1, el2, emem, err := d.deviceGetMemoryErrorCounter()
assert(err)
pcirx, pcitx, err := d.deviceGetPcieThroughput()
assert(err)
status = &DeviceStatus{
Power: power,
Temperature: temp, // °C
Utilization: UtilizationInfo{
GPU: ugpu, // %
Memory: umem, // %
Encoder: uenc, // %
Decoder: udec, // %
},
Memory: MemoryInfo{
GlobalUsed: mem,
ECCErrors: ECCErrorsInfo{
L1Cache: el1,
L2Cache: el2,
Global: emem,
},
},
Clocks: ClockInfo{
Cores: ccore, // MHz
Memory: cmem, // MHz
},
PCI: PCIStatusInfo{
BAR1Used: bar1,
Throughput: PCIThroughputInfo{
RX: pcirx,
TX: pcitx,
},
},
}
if power != nil {
*status.Power /= 1000 // W
}
if mem != nil {
*status.Memory.GlobalUsed /= 1024 * 1024 // MiB
}
if bar1 != nil {
*status.PCI.BAR1Used /= 1024 * 1024 // MiB
}
if pcirx != nil {
*status.PCI.Throughput.RX /= 1000 // MB/s
}
if pcitx != nil {
*status.PCI.Throughput.TX /= 1000 // MB/s
}
for i := range pids {
name, err := systemGetProcessName(pids[i])
assert(err)
status.Processes = append(status.Processes, ProcessInfo{
PID: pids[i],
Name: name,
MemoryUsed: pmems[i] / (1024 * 1024), // MiB
})
}
return
}
func GetP2PLink(dev1, dev2 *Device) (link P2PLinkType, err error) {
level, err := deviceGetTopologyCommonAncestor(dev1.handle, dev2.handle)
if err != nil || level == nil {
return P2PLinkUnknown, err
}
switch *level {
case C.NVML_TOPOLOGY_INTERNAL:
link = P2PLinkSameBoard
case C.NVML_TOPOLOGY_SINGLE:
link = P2PLinkSingleSwitch
case C.NVML_TOPOLOGY_MULTIPLE:
link = P2PLinkMultiSwitch
case C.NVML_TOPOLOGY_HOSTBRIDGE:
link = P2PLinkHostBridge
case C.NVML_TOPOLOGY_CPU:
link = P2PLinkSameCPU
case C.NVML_TOPOLOGY_SYSTEM:
link = P2PLinkCrossCPU
default:
err = ErrUnsupportedP2PLink
}
return
}

View File

@@ -0,0 +1,46 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
#include <stddef.h>
#include <dlfcn.h>
#include "nvml_dl.h"
#define DLSYM(x, sym) \
do { \
dlerror(); \
x = dlsym(handle, #sym); \
if (dlerror() != NULL) { \
return (NVML_ERROR_FUNCTION_NOT_FOUND); \
} \
} while (0)
typedef nvmlReturn_t (*nvmlSym_t)();
static void *handle;
nvmlReturn_t NVML_DL(nvmlInit)(void)
{
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY | RTLD_GLOBAL);
if (handle == NULL) {
return (NVML_ERROR_LIBRARY_NOT_FOUND);
}
return (nvmlInit());
}
nvmlReturn_t NVML_DL(nvmlShutdown)(void)
{
nvmlReturn_t r = nvmlShutdown();
if (r != NVML_SUCCESS) {
return (r);
}
return (dlclose(handle) ? NVML_ERROR_UNKNOWN : NVML_SUCCESS);
}
nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
nvmlDevice_t dev1, nvmlDevice_t dev2, nvmlGpuTopologyLevel_t *info)
{
nvmlSym_t sym;
DLSYM(sym, nvmlDeviceGetTopologyCommonAncestor);
return ((*sym)(dev1, dev2, info));
}

View File

@@ -0,0 +1,15 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
#ifndef _NVML_DL_H_
#define _NVML_DL_H_
#include <nvml.h>
#define NVML_DL(x) x##_dl
extern nvmlReturn_t NVML_DL(nvmlInit)(void);
extern nvmlReturn_t NVML_DL(nvmlShutdown)(void);
extern nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
nvmlDevice_t, nvmlDevice_t, nvmlGpuTopologyLevel_t *);
#endif // _NVML_DL_H_