Check nvidia gpu features on init

When a required feature is not supported, gBar would crash inside the
query functions without a way to manually bypass NvidiaGPU. Now
NvidiaGPU checks whether all features are supported and disables the
module if any throw an error.

Fixes https://github.com/scorpion-26/gBar/issues/52
This commit is contained in:
scorpion-26 2023-09-21 19:30:42 +08:00 committed by GitHub
parent 471bc6e719
commit 96485f408e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -7,10 +7,30 @@
#ifdef WITH_NVIDIA
namespace NvidiaGPU
{
struct GPUUtilization
{
uint32_t gpu;
uint32_t vram;
};
struct VRAM
{
uint64_t totalB;
uint64_t freeB;
uint64_t usedB;
};
static void* nvmldl;
static void* nvmlGPUHandle;
// Preloaded Query functions
typedef int (*PFN_nvmlDeviceGetUtilizationRates)(void*, struct GPUUtilization*);
static PFN_nvmlDeviceGetUtilizationRates nvmlDeviceGetUtilizationRates;
typedef int (*PFN_nvmlDeviceGetTemperature)(void*, uint32_t, uint32_t*);
static PFN_nvmlDeviceGetTemperature nvmlDeviceGetTemperature;
typedef int (*PFN_nvmlDeviceGetMemoryInfo)(void*, struct VRAM*);
static PFN_nvmlDeviceGetMemoryInfo nvmlDeviceGetMemoryInfo;
inline void Init()
{
if (nvmldl || !RuntimeConfig::Get().hasNvidia)
@ -40,6 +60,37 @@ namespace NvidiaGPU
auto nvmlDeviceGetHandle = (PFN_nvmlDeviceGetHandle)dlsym(nvmldl, "nvmlDeviceGetHandleByIndex");
res = nvmlDeviceGetHandle(0, &nvmlGPUHandle);
ASSERT(res == 0, "Failed getting device (Error: " << res << ")!");
// Dynamically load functions
nvmlDeviceGetUtilizationRates = (PFN_nvmlDeviceGetUtilizationRates)dlsym(nvmldl, "nvmlDeviceGetUtilizationRates");
nvmlDeviceGetTemperature = (PFN_nvmlDeviceGetTemperature)dlsym(nvmldl, "nvmlDeviceGetTemperature");
nvmlDeviceGetMemoryInfo = (PFN_nvmlDeviceGetMemoryInfo)dlsym(nvmldl, "nvmlDeviceGetMemoryInfo");
// Check if all information is available
GPUUtilization util;
res = nvmlDeviceGetUtilizationRates(nvmlGPUHandle, &util);
if (res != 0)
{
LOG("Failed querying utilization rates (Error: " << res << "), disabling Nvidia GPU!");
RuntimeConfig::Get().hasNvidia = false;
return;
}
uint32_t temp;
res = nvmlDeviceGetTemperature(nvmlGPUHandle, 0, &temp);
if (res != 0)
{
LOG("Failed querying temperature (Error: " << res << "), disabling Nvidia GPU!");
RuntimeConfig::Get().hasNvidia = false;
return;
}
VRAM mem;
res = nvmlDeviceGetMemoryInfo(nvmlGPUHandle, &mem);
if (res != 0)
{
LOG("Failed querying VRAM (Error: " << res << "), disabling Nvidia GPU!");
RuntimeConfig::Get().hasNvidia = false;
return;
}
}
inline void Shutdown()
@ -48,18 +99,6 @@ namespace NvidiaGPU
dlclose(nvmldl);
}
struct GPUUtilization
{
uint32_t gpu;
uint32_t vram;
};
struct VRAM
{
uint64_t totalB;
uint64_t freeB;
uint64_t usedB;
};
inline GPUUtilization GetUtilization()
{
if (!RuntimeConfig::Get().hasNvidia)
@ -69,9 +108,6 @@ namespace NvidiaGPU
}
GPUUtilization util;
typedef int (*PFN_nvmlDeviceGetUtilizationRates)(void*, GPUUtilization*);
auto nvmlDeviceGetUtilizationRates = (PFN_nvmlDeviceGetUtilizationRates)dlsym(nvmldl, "nvmlDeviceGetUtilizationRates");
int res = nvmlDeviceGetUtilizationRates(nvmlGPUHandle, &util);
ASSERT(res == 0, "Failed getting utilization (Error: " << res << ")!");
return util;
@ -85,8 +121,6 @@ namespace NvidiaGPU
return {};
}
typedef int (*PFN_nvmlDeviceGetTemperature)(void*, uint32_t, uint32_t*);
auto nvmlDeviceGetTemperature = (PFN_nvmlDeviceGetTemperature)dlsym(nvmldl, "nvmlDeviceGetTemperature");
uint32_t temp;
int res = nvmlDeviceGetTemperature(nvmlGPUHandle, 0, &temp);
ASSERT(res == 0, "Failed getting temperature (Error: " << res << ")!");
@ -101,8 +135,6 @@ namespace NvidiaGPU
return {};
}
typedef int (*PFN_nvmlDeviceGetMemoryInfo)(void*, VRAM*);
auto nvmlDeviceGetMemoryInfo = (PFN_nvmlDeviceGetMemoryInfo)dlsym(nvmldl, "nvmlDeviceGetMemoryInfo");
VRAM mem;
int res = nvmlDeviceGetMemoryInfo(nvmlGPUHandle, &mem);
ASSERT(res == 0, "Failed getting memory (Error: " << res << ")!");