mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-02-05 00:15:51 +01:00
Add support for SR-IOV GPU stats (#15796)
* Add option to treat GPU as SRIOV in order for stats to work correctly * Add to intel docs * fix tests
This commit is contained in:
parent
f2cc16bf3c
commit
eb85079f74
@ -175,6 +175,16 @@ For more information on the various values across different distributions, see h
|
|||||||
|
|
||||||
Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'`
|
Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'`
|
||||||
|
|
||||||
|
#### Stats for SR-IOV devices
|
||||||
|
|
||||||
|
When using virtualized GPUs via SR-IOV, additional args are needed for GPU stats to function. This can be enabled with the following config:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
telemetry:
|
||||||
|
stats:
|
||||||
|
sriov: True
|
||||||
|
```
|
||||||
|
|
||||||
## AMD/ATI GPUs (Radeon HD 2000 and newer GPUs) via libva-mesa-driver
|
## AMD/ATI GPUs (Radeon HD 2000 and newer GPUs) via libva-mesa-driver
|
||||||
|
|
||||||
VAAPI supports automatic profile selection so it will work automatically with both H.264 and H.265 streams.
|
VAAPI supports automatic profile selection so it will work automatically with both H.264 and H.265 streams.
|
||||||
|
@ -817,11 +817,13 @@ telemetry:
|
|||||||
- lo
|
- lo
|
||||||
# Optional: Configure system stats
|
# Optional: Configure system stats
|
||||||
stats:
|
stats:
|
||||||
# Enable AMD GPU stats (default: shown below)
|
# Optional: Enable AMD GPU stats (default: shown below)
|
||||||
amd_gpu_stats: True
|
amd_gpu_stats: True
|
||||||
# Enable Intel GPU stats (default: shown below)
|
# Optional: Enable Intel GPU stats (default: shown below)
|
||||||
intel_gpu_stats: True
|
intel_gpu_stats: True
|
||||||
# Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below)
|
# Optional: Treat GPU as SR-IOV to fix GPU stats (default: shown below)
|
||||||
|
sriov: False
|
||||||
|
# Optional: Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below)
|
||||||
# NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled.
|
# NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled.
|
||||||
network_bandwidth: False
|
network_bandwidth: False
|
||||||
# Optional: Enable the latest version outbound check (default: shown below)
|
# Optional: Enable the latest version outbound check (default: shown below)
|
||||||
|
@ -11,6 +11,9 @@ class StatsConfig(FrigateBaseModel):
|
|||||||
network_bandwidth: bool = Field(
|
network_bandwidth: bool = Field(
|
||||||
default=False, title="Enable network bandwidth for ffmpeg processes."
|
default=False, title="Enable network bandwidth for ffmpeg processes."
|
||||||
)
|
)
|
||||||
|
sriov: bool = Field(
|
||||||
|
default=False, title="Treat device as SR-IOV to support GPU stats."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelemetryConfig(FrigateBaseModel):
|
class TelemetryConfig(FrigateBaseModel):
|
||||||
|
@ -195,7 +195,7 @@ async def set_gpu_stats(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# intel QSV GPU
|
# intel QSV GPU
|
||||||
intel_usage = get_intel_gpu_stats()
|
intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov)
|
||||||
|
|
||||||
if intel_usage is not None:
|
if intel_usage is not None:
|
||||||
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
|
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
|
||||||
@ -220,7 +220,7 @@ async def set_gpu_stats(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# intel VAAPI GPU
|
# intel VAAPI GPU
|
||||||
intel_usage = get_intel_gpu_stats()
|
intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov)
|
||||||
|
|
||||||
if intel_usage is not None:
|
if intel_usage is not None:
|
||||||
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}
|
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}
|
||||||
|
@ -38,7 +38,7 @@ class TestGpuStats(unittest.TestCase):
|
|||||||
process.returncode = 124
|
process.returncode = 124
|
||||||
process.stdout = self.intel_results
|
process.stdout = self.intel_results
|
||||||
sp.return_value = process
|
sp.return_value = process
|
||||||
intel_stats = get_intel_gpu_stats()
|
intel_stats = get_intel_gpu_stats(False)
|
||||||
print(f"the intel stats are {intel_stats}")
|
print(f"the intel stats are {intel_stats}")
|
||||||
assert intel_stats == {
|
assert intel_stats == {
|
||||||
"gpu": "1.13%",
|
"gpu": "1.13%",
|
||||||
|
@ -255,7 +255,7 @@ def get_amd_gpu_stats() -> dict[str, str]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_intel_gpu_stats() -> dict[str, str]:
|
def get_intel_gpu_stats(sriov: bool) -> dict[str, str]:
|
||||||
"""Get stats using intel_gpu_top."""
|
"""Get stats using intel_gpu_top."""
|
||||||
|
|
||||||
def get_stats_manually(output: str) -> dict[str, str]:
|
def get_stats_manually(output: str) -> dict[str, str]:
|
||||||
@ -302,6 +302,9 @@ def get_intel_gpu_stats() -> dict[str, str]:
|
|||||||
"1",
|
"1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if sriov:
|
||||||
|
intel_gpu_top_command += ["-d", "drm:/dev/dri/card0"]
|
||||||
|
|
||||||
p = sp.run(
|
p = sp.run(
|
||||||
intel_gpu_top_command,
|
intel_gpu_top_command,
|
||||||
encoding="ascii",
|
encoding="ascii",
|
||||||
|
Loading…
Reference in New Issue
Block a user