diff --git a/docs/docs/configuration/hardware_acceleration.md b/docs/docs/configuration/hardware_acceleration.md index e70e57497..393350e62 100644 --- a/docs/docs/configuration/hardware_acceleration.md +++ b/docs/docs/configuration/hardware_acceleration.md @@ -175,6 +175,16 @@ For more information on the various values across different distributions, see h Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'` +#### Stats for SR-IOV devices + +When using virtualized GPUs via SR-IOV, additional args are needed for GPU stats to function. This can be enabled with the following config: + +```yaml +telemetry: + stats: + sriov: True +``` + ## AMD/ATI GPUs (Radeon HD 2000 and newer GPUs) via libva-mesa-driver VAAPI supports automatic profile selection so it will work automatically with both H.264 and H.265 streams. diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index ad7ec90c5..7b682e3de 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -817,11 +817,13 @@ telemetry: - lo # Optional: Configure system stats stats: - # Enable AMD GPU stats (default: shown below) + # Optional: Enable AMD GPU stats (default: shown below) amd_gpu_stats: True - # Enable Intel GPU stats (default: shown below) + # Optional: Enable Intel GPU stats (default: shown below) intel_gpu_stats: True - # Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below) + # Optional: Treat GPU as SR-IOV to fix GPU stats (default: shown below) + sriov: False + # Optional: Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below) # NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled. network_bandwidth: False # Optional: Enable the latest version outbound check (default: shown below) diff --git a/frigate/config/telemetry.py b/frigate/config/telemetry.py index 0610c1f06..628d93427 100644 --- a/frigate/config/telemetry.py +++ b/frigate/config/telemetry.py @@ -11,6 +11,9 @@ class StatsConfig(FrigateBaseModel): network_bandwidth: bool = Field( default=False, title="Enable network bandwidth for ffmpeg processes." ) + sriov: bool = Field( + default=False, title="Treat device as SR-IOV to support GPU stats." + ) class TelemetryConfig(FrigateBaseModel): diff --git a/frigate/stats/util.py b/frigate/stats/util.py index d8e93c6ca..189e019ca 100644 --- a/frigate/stats/util.py +++ b/frigate/stats/util.py @@ -195,7 +195,7 @@ async def set_gpu_stats( continue # intel QSV GPU - intel_usage = get_intel_gpu_stats() + intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov) if intel_usage is not None: stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""} @@ -220,7 +220,7 @@ async def set_gpu_stats( continue # intel VAAPI GPU - intel_usage = get_intel_gpu_stats() + intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov) if intel_usage is not None: stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""} diff --git a/frigate/test/test_gpu_stats.py b/frigate/test/test_gpu_stats.py index 7c1bc4618..fd0df94c4 100644 --- a/frigate/test/test_gpu_stats.py +++ b/frigate/test/test_gpu_stats.py @@ -38,7 +38,7 @@ class TestGpuStats(unittest.TestCase): process.returncode = 124 process.stdout = self.intel_results sp.return_value = process - intel_stats = get_intel_gpu_stats() + intel_stats = get_intel_gpu_stats(False) print(f"the intel stats are {intel_stats}") assert intel_stats == { "gpu": "1.13%", diff --git a/frigate/util/services.py b/frigate/util/services.py index 2fd701298..d54d1beb0 100644 --- a/frigate/util/services.py +++ b/frigate/util/services.py @@ -255,7 +255,7 @@ def get_amd_gpu_stats() -> dict[str, str]: return results -def get_intel_gpu_stats() -> dict[str, str]: +def get_intel_gpu_stats(sriov: bool) -> dict[str, str]: """Get stats using intel_gpu_top.""" def get_stats_manually(output: str) -> dict[str, str]: @@ -302,6 +302,9 @@ def get_intel_gpu_stats() -> dict[str, str]: "1", ] + if sriov: + intel_gpu_top_command += ["-d", "drm:/dev/dri/card0"] + p = sp.run( intel_gpu_top_command, encoding="ascii",