Add support for NPU statistics in metrics page (#17806)

* Add npu usages as a statistic

* Support showing NPU stats in dashboard

* Add sys volume mount for npu usages

* Fix type

* Simplify check

* Cleanup

* Cleanup
This commit is contained in:
Nicolas Mowen 2025-04-19 08:20:22 -06:00 committed by GitHub
parent c8e22a3653
commit 80a13e43e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 219 additions and 95 deletions

View File

@ -165,6 +165,8 @@ devices:
- /dev/dma_heap
- /dev/rga
- /dev/mpp_service
volumes:
- /sys/:/sys/:ro
```
or add these options to your `docker run` command:
@ -175,7 +177,8 @@ or add these options to your `docker run` command:
--device /dev/dri \
--device /dev/dma_heap \
--device /dev/rga \
--device /dev/mpp_service
--device /dev/mpp_service \
--volume /sys/:/sys/:ro
```
#### Configuration

View File

@ -24,6 +24,7 @@ from frigate.util.services import (
get_intel_gpu_stats,
get_jetson_stats,
get_nvidia_gpu_stats,
get_rockchip_npu_stats,
is_vaapi_amd_driver,
)
from frigate.version import VERSION
@ -109,6 +110,7 @@ def get_processing_stats(
stats_tasks = [
asyncio.create_task(set_gpu_stats(config, stats, hwaccel_errors)),
asyncio.create_task(set_cpu_stats(stats)),
asyncio.create_task(set_npu_usages(config, stats)),
]
if config.telemetry.stats.network_bandwidth:
@ -238,6 +240,19 @@ async def set_gpu_stats(
all_stats["gpu_usages"] = stats
async def set_npu_usages(config: FrigateConfig, all_stats: dict[str, Any]) -> None:
stats: dict[str, dict] = {}
for detector in config.detectors.values():
if detector.type == "rknn":
# Rockchip NPU usage
rk_usage = get_rockchip_npu_stats()
stats["rockchip"] = rk_usage
if stats:
all_stats["npu_usages"] = stats
def stats_snapshot(
config: FrigateConfig, stats_tracking: StatsTrackingTypes, hwaccel_errors: list[str]
) -> dict[str, Any]:

View File

@ -382,6 +382,23 @@ def get_intel_gpu_stats(sriov: bool) -> dict[str, str]:
return results
def get_rockchip_npu_stats() -> dict[str, str]:
"""Get stats using rk."""
try:
with open("/sys/kernel/debug/rknpu/load", "r") as f:
npu_output = f.read()
core_loads = re.findall(r"Core\d+:\s*(\d+)%", npu_output)
except FileNotFoundError:
core_loads = None
if not core_loads:
return None
percentages = [int(load) for load in core_loads]
mean = round(sum(percentages) / len(percentages), 2)
return {"npu": mean, "mem": "-"}
def try_get_info(f, h, default="N/A"):
try:
if h:

View File

@ -72,7 +72,9 @@
"toast": {
"success": "Copied GPU info to clipboard"
}
}
},
"npuUsage": "NPU Usage",
"npuMemory": "NPU Memory"
},
"otherProcesses": {
"title": "Other Processes",

View File

@ -4,6 +4,7 @@ export interface FrigateStats {
detectors: { [detectorKey: string]: DetectorStats };
embeddings?: EmbeddingsStats;
gpu_usages?: { [gpuKey: string]: GpuStats };
npu_usages?: { [npuKey: string]: NpuStats };
processes: { [processKey: string]: ExtraProcessStats };
service: ServiceStats;
detection_fps: number;
@ -54,6 +55,11 @@ export type GpuStats = {
pstate?: string;
};
export type NpuStats = {
npu: number;
mem: string;
};
export type GpuInfo = "vainfo" | "nvinfo";
export type ServiceStats = {

View File

@ -34,7 +34,7 @@ export default function GeneralMetrics({
const { data: initialStats } = useSWR<FrigateStats[]>(
[
"stats/history",
{ keys: "cpu_usages,detectors,gpu_usages,processes,service" },
{ keys: "cpu_usages,detectors,gpu_usages,npu_usages,processes,service" },
],
{
revalidateOnFocus: false,
@ -369,8 +369,57 @@ export default function GeneralMetrics({
return Object.keys(series).length > 0 ? Object.values(series) : undefined;
}, [statsHistory]);
// npu stats
const npuSeries = useMemo(() => {
if (!statsHistory) {
return [];
}
const series: {
[key: string]: { name: string; data: { x: number; y: number }[] };
} = {};
let hasValidNpu = false;
statsHistory.forEach((stats, statsIdx) => {
if (!stats) {
return;
}
Object.entries(stats.npu_usages || []).forEach(([key, stats]) => {
if (!(key in series)) {
series[key] = { name: key, data: [] };
}
if (stats.npu) {
hasValidNpu = true;
series[key].data.push({ x: statsIdx + 1, y: stats.npu });
}
});
});
if (!hasValidNpu) {
return [];
}
return Object.keys(series).length > 0 ? Object.values(series) : [];
}, [statsHistory]);
// other processes stats
const hardwareType = useMemo(() => {
const hasGpu = gpuSeries.length > 0;
const hasNpu = npuSeries.length > 0;
if (hasGpu && !hasNpu) {
return "GPUs";
} else if (!hasGpu && hasNpu) {
return "NPUs";
} else {
return "GPUs / NPUs";
}
}, [gpuSeries, npuSeries]);
const otherProcessCpuSeries = useMemo(() => {
if (!statsHistory) {
return [];
@ -533,11 +582,13 @@ export default function GeneralMetrics({
)}
</div>
{(statsHistory.length == 0 || statsHistory[0].gpu_usages) && (
{(statsHistory.length == 0 ||
statsHistory[0].gpu_usages ||
statsHistory[0].npu_usages) && (
<>
<div className="mt-4 flex items-center justify-between">
<div className="text-sm font-medium text-muted-foreground">
GPUs
{hardwareType}
</div>
{canGetGpuInfo && (
<Button
@ -556,97 +607,127 @@ export default function GeneralMetrics({
gpuEncSeries?.length && "md:grid-cols-4",
)}
>
{statsHistory.length != 0 ? (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuUsage")}
</div>
{gpuSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-gpu`}
name={series.name}
unit="%"
threshold={GPUUsageThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
{statsHistory[0]?.gpu_usages && (
<>
{statsHistory.length != 0 ? (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuUsage")}
</div>
{gpuSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-gpu`}
name={series.name}
unit="%"
threshold={GPUUsageThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuMemSeries && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuMemory")}
</div>
{gpuMemSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-mem`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuEncSeries && gpuEncSeries?.length != 0 && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuEncoder")}
</div>
{gpuEncSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-enc`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuDecSeries && gpuDecSeries?.length != 0 && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuDecoder")}
</div>
{gpuDecSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-dec`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
</>
)}
{statsHistory[0]?.npu_usages && (
<div
className={cn("mt-4 grid grid-cols-1 gap-2 sm:grid-cols-2")}
>
{statsHistory.length != 0 ? (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.npuUsage")}
</div>
{npuSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-npu`}
name={series.name}
unit="%"
threshold={GPUUsageThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
) : (
<Skeleton className="aspect-video w-full" />
)}
</div>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuMemSeries && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuMemory")}
</div>
{gpuMemSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-mem`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuEncSeries && gpuEncSeries?.length != 0 && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuEncoder")}
</div>
{gpuEncSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-enc`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? (
<>
{gpuDecSeries && gpuDecSeries?.length != 0 && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuDecoder")}
</div>
{gpuDecSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-dec`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
</div>
</>