Replaces sriov flag with explicit path to the Intel GPU device (#19136)

* Replaces `sriov` flag with an explicit path to the GPU device for intel GPUs

* Sort imports
This commit is contained in:
Manu Wallner 2025-07-14 05:11:25 -07:00 committed by GitHub
parent 4b72c86e77
commit 4c8f4ef9fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 25 additions and 11 deletions

View File

@ -176,16 +176,26 @@ For more information on the various values across different distributions, see h
Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'` Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'`
#### Stats for SR-IOV devices #### Stats for SR-IOV or other devices
When using virtualized GPUs via SR-IOV, additional args are needed for GPU stats to function. This can be enabled with the following config: When using virtualized GPUs via SR-IOV, you need to specify the device path to use to gather stats from `intel_gpu_top`. This example may work for some systems using SR-IOV:
```yaml ```yaml
telemetry: telemetry:
stats: stats:
sriov: True intel_gpu_device: "sriov"
``` ```
For other virtualized GPUs, try specifying the direct path to the device instead:
```yaml
telemetry:
stats:
intel_gpu_device: "drm:/dev/dri/card0"
```
If you are passing in a device path, make sure you've passed the device through to the container.
## AMD/ATI GPUs (Radeon HD 2000 and newer GPUs) via libva-mesa-driver ## AMD/ATI GPUs (Radeon HD 2000 and newer GPUs) via libva-mesa-driver
VAAPI supports automatic profile selection so it will work automatically with both H.264 and H.265 streams. VAAPI supports automatic profile selection so it will work automatically with both H.264 and H.265 streams.

View File

@ -903,7 +903,7 @@ telemetry:
# Optional: Enable Intel GPU stats (default: shown below) # Optional: Enable Intel GPU stats (default: shown below)
intel_gpu_stats: True intel_gpu_stats: True
# Optional: Treat GPU as SR-IOV to fix GPU stats (default: shown below) # Optional: Treat GPU as SR-IOV to fix GPU stats (default: shown below)
sriov: False intel_gpu_device: None
# Optional: Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below) # Optional: Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below)
# NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled. # NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled.
network_bandwidth: False network_bandwidth: False

View File

@ -1,3 +1,5 @@
from typing import Optional
from pydantic import Field from pydantic import Field
from .base import FrigateBaseModel from .base import FrigateBaseModel
@ -11,8 +13,8 @@ class StatsConfig(FrigateBaseModel):
network_bandwidth: bool = Field( network_bandwidth: bool = Field(
default=False, title="Enable network bandwidth for ffmpeg processes." default=False, title="Enable network bandwidth for ffmpeg processes."
) )
sriov: bool = Field( intel_gpu_device: Optional[str] = Field(
default=False, title="Treat device as SR-IOV to support GPU stats." default=None, title="Define the device to use when gathering SR-IOV stats."
) )

View File

@ -201,7 +201,7 @@ async def set_gpu_stats(
continue continue
# intel QSV GPU # intel QSV GPU
intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov) intel_usage = get_intel_gpu_stats(config.telemetry.stats.intel_gpu_device)
if intel_usage is not None: if intel_usage is not None:
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""} stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
@ -226,7 +226,9 @@ async def set_gpu_stats(
continue continue
# intel VAAPI GPU # intel VAAPI GPU
intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov) intel_usage = get_intel_gpu_stats(
config.telemetry.stats.intel_gpu_device
)
if intel_usage is not None: if intel_usage is not None:
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""} stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}

View File

@ -257,7 +257,7 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
return results return results
def get_intel_gpu_stats(sriov: bool) -> Optional[dict[str, str]]: def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
"""Get stats using intel_gpu_top.""" """Get stats using intel_gpu_top."""
def get_stats_manually(output: str) -> dict[str, str]: def get_stats_manually(output: str) -> dict[str, str]:
@ -304,8 +304,8 @@ def get_intel_gpu_stats(sriov: bool) -> Optional[dict[str, str]]:
"1", "1",
] ]
if sriov: if intel_gpu_device:
intel_gpu_top_command += ["-d", "sriov"] intel_gpu_top_command += ["-d", intel_gpu_device]
try: try:
p = sp.run( p = sp.run(