Catch case where user has multiple of the same kind of GPU (#15903)

This commit is contained in:
Nicolas Mowen 2025-01-08 16:17:57 -07:00 committed by GitHub
parent d57a61b50f
commit b265b6b190
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -390,12 +390,22 @@ def try_get_info(f, h, default="N/A"):
def get_nvidia_gpu_stats() -> dict[int, dict]: def get_nvidia_gpu_stats() -> dict[int, dict]:
names: dict[str, int] = {}
results = {} results = {}
try: try:
nvml.nvmlInit() nvml.nvmlInit()
deviceCount = nvml.nvmlDeviceGetCount() deviceCount = nvml.nvmlDeviceGetCount()
for i in range(deviceCount): for i in range(deviceCount):
handle = nvml.nvmlDeviceGetHandleByIndex(i) handle = nvml.nvmlDeviceGetHandleByIndex(i)
gpu_name = nvml.nvmlDeviceGetName(handle)
# handle case where user has multiple of same GPU
if gpu_name in names:
names[gpu_name] += 1
gpu_name += f" ({names.get(gpu_name)})"
else:
names[gpu_name] = 1
meminfo = try_get_info(nvml.nvmlDeviceGetMemoryInfo, handle) meminfo = try_get_info(nvml.nvmlDeviceGetMemoryInfo, handle)
util = try_get_info(nvml.nvmlDeviceGetUtilizationRates, handle) util = try_get_info(nvml.nvmlDeviceGetUtilizationRates, handle)
enc = try_get_info(nvml.nvmlDeviceGetEncoderUtilization, handle) enc = try_get_info(nvml.nvmlDeviceGetEncoderUtilization, handle)
@ -423,7 +433,7 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
dec_util = -1 dec_util = -1
results[i] = { results[i] = {
"name": nvml.nvmlDeviceGetName(handle), "name": gpu_name,
"gpu": gpu_util, "gpu": gpu_util,
"mem": gpu_mem_util, "mem": gpu_mem_util,
"enc": enc_util, "enc": enc_util,