Fix missing prometheus commit (#16415)

* Add prometheus metrics

* add docs for metrics

* sidebar

* lint

* lint

---------

Co-authored-by: Mitch Ross <mitchross@users.noreply.github.com>
This commit is contained in:
Nicolas Mowen 2025-02-09 10:04:39 -07:00 committed by GitHub
parent a42ad7ead9
commit c58d2add37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 326 additions and 0 deletions

View File

@ -52,3 +52,4 @@ pywebpush == 2.0.*
# alpr # alpr
pyclipper == 1.3.* pyclipper == 1.3.*
shapely == 2.0.* shapely == 2.0.*
prometheus-client == 0.21.*

View File

@ -0,0 +1,99 @@
---
id: metrics
title: Metrics
---
# Metrics
Frigate exposes Prometheus metrics at the `/metrics` endpoint that can be used to monitor the performance and health of your Frigate instance.
## Available Metrics
### System Metrics
- `frigate_cpu_usage_percent{pid="", name="", process="", type="", cmdline=""}` - Process CPU usage percentage
- `frigate_mem_usage_percent{pid="", name="", process="", type="", cmdline=""}` - Process memory usage percentage
- `frigate_gpu_usage_percent{gpu_name=""}` - GPU utilization percentage
- `frigate_gpu_mem_usage_percent{gpu_name=""}` - GPU memory usage percentage
### Camera Metrics
- `frigate_camera_fps{camera_name=""}` - Frames per second being consumed from your camera
- `frigate_detection_fps{camera_name=""}` - Number of times detection is run per second
- `frigate_process_fps{camera_name=""}` - Frames per second being processed
- `frigate_skipped_fps{camera_name=""}` - Frames per second skipped for processing
- `frigate_detection_enabled{camera_name=""}` - Detection enabled status for camera
- `frigate_audio_dBFS{camera_name=""}` - Audio dBFS for camera
- `frigate_audio_rms{camera_name=""}` - Audio RMS for camera
### Detector Metrics
- `frigate_detector_inference_speed_seconds{name=""}` - Time spent running object detection in seconds
- `frigate_detection_start{name=""}` - Detector start time (unix timestamp)
### Storage Metrics
- `frigate_storage_free_bytes{storage=""}` - Storage free bytes
- `frigate_storage_total_bytes{storage=""}` - Storage total bytes
- `frigate_storage_used_bytes{storage=""}` - Storage used bytes
- `frigate_storage_mount_type{mount_type="", storage=""}` - Storage mount type info
### Service Metrics
- `frigate_service_uptime_seconds` - Uptime in seconds
- `frigate_service_last_updated_timestamp` - Stats recorded time (unix timestamp)
- `frigate_device_temperature{device=""}` - Device Temperature
### Event Metrics
- `frigate_camera_events{camera="", label=""}` - Count of camera events since exporter started
## Configuring Prometheus
To scrape metrics from Frigate, add the following to your Prometheus configuration:
```yaml
scrape_configs:
- job_name: 'frigate'
metrics_path: '/metrics'
static_configs:
- targets: ['frigate:5000']
scrape_interval: 15s
```
## Example Queries
Here are some example PromQL queries that might be useful:
```promql
# Average CPU usage across all processes
avg(frigate_cpu_usage_percent)
# Total GPU memory usage
sum(frigate_gpu_mem_usage_percent)
# Detection FPS by camera
rate(frigate_detection_fps{camera_name="front_door"}[5m])
# Storage usage percentage
(frigate_storage_used_bytes / frigate_storage_total_bytes) * 100
# Event count by camera in last hour
increase(frigate_camera_events[1h])
```
## Grafana Dashboard
You can use these metrics to create Grafana dashboards to monitor your Frigate instance. Here's an example of metrics you might want to track:
- CPU, Memory and GPU usage over time
- Camera FPS and detection rates
- Storage usage and trends
- Event counts by camera
- System temperatures
A sample Grafana dashboard JSON will be provided in a future update.
## Metric Types
The metrics exposed by Frigate use the following Prometheus metric types:
- **Counter**: Cumulative values that only increase (e.g., `frigate_camera_events`)
- **Gauge**: Values that can go up and down (e.g., `frigate_cpu_usage_percent`)
- **Info**: Key-value pairs for metadata (e.g., `frigate_storage_mount_type`)
For more information about Prometheus metric types, see the [Prometheus documentation](https://prometheus.io/docs/concepts/metric_types/).

View File

@ -84,6 +84,7 @@ const sidebars: SidebarsConfig = {
items: frigateHttpApiSidebar, items: frigateHttpApiSidebar,
}, },
'integrations/mqtt', 'integrations/mqtt',
'configuration/metrics',
'integrations/third_party_extensions', 'integrations/third_party_extensions',
], ],
'Frigate+': [ 'Frigate+': [

View File

@ -18,6 +18,7 @@ from fastapi.params import Depends
from fastapi.responses import JSONResponse, PlainTextResponse from fastapi.responses import JSONResponse, PlainTextResponse
from markupsafe import escape from markupsafe import escape
from peewee import operator from peewee import operator
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
from pydantic import ValidationError from pydantic import ValidationError
from frigate.api.defs.query.app_query_parameters import AppTimelineHourlyQueryParameters from frigate.api.defs.query.app_query_parameters import AppTimelineHourlyQueryParameters
@ -108,6 +109,12 @@ def stats_history(request: Request, keys: str = None):
return JSONResponse(content=request.app.stats_emitter.get_stats_history(keys)) return JSONResponse(content=request.app.stats_emitter.get_stats_history(keys))
@router.get("/metrics")
def metrics():
"""Expose Prometheus metrics endpoint"""
return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)
@router.get("/config") @router.get("/config")
def config(request: Request): def config(request: Request):
config_obj: FrigateConfig = request.app.frigate_config config_obj: FrigateConfig = request.app.frigate_config

View File

@ -11,6 +11,7 @@ from typing import Optional
from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig from frigate.config import FrigateConfig
from frigate.const import FREQUENCY_STATS_POINTS from frigate.const import FREQUENCY_STATS_POINTS
from frigate.stats.prometheus import update_metrics
from frigate.stats.util import stats_snapshot from frigate.stats.util import stats_snapshot
from frigate.types import StatsTrackingTypes from frigate.types import StatsTrackingTypes
@ -67,6 +68,16 @@ class StatsEmitter(threading.Thread):
return selected_stats return selected_stats
def stats_init(config, camera_metrics, detectors, processes):
stats = {
"cameras": camera_metrics,
"detectors": detectors,
"processes": processes,
}
# Update Prometheus metrics with initial stats
update_metrics(stats)
return stats
def run(self) -> None: def run(self) -> None:
time.sleep(10) time.sleep(10)
for counter in itertools.cycle( for counter in itertools.cycle(

207
frigate/stats/prometheus.py Normal file
View File

@ -0,0 +1,207 @@
from typing import Dict
from prometheus_client import (
CONTENT_TYPE_LATEST,
Counter,
Gauge,
Info,
generate_latest,
)
# System metrics
SYSTEM_INFO = Info("frigate_system", "System information")
CPU_USAGE = Gauge(
"frigate_cpu_usage_percent",
"Process CPU usage %",
["pid", "name", "process", "type", "cmdline"],
)
MEMORY_USAGE = Gauge(
"frigate_mem_usage_percent",
"Process memory usage %",
["pid", "name", "process", "type", "cmdline"],
)
# Camera metrics
CAMERA_FPS = Gauge(
"frigate_camera_fps",
"Frames per second being consumed from your camera",
["camera_name"],
)
DETECTION_FPS = Gauge(
"frigate_detection_fps",
"Number of times detection is run per second",
["camera_name"],
)
PROCESS_FPS = Gauge(
"frigate_process_fps",
"Frames per second being processed by frigate",
["camera_name"],
)
SKIPPED_FPS = Gauge(
"frigate_skipped_fps", "Frames per second skipped for processing", ["camera_name"]
)
DETECTION_ENABLED = Gauge(
"frigate_detection_enabled", "Detection enabled for camera", ["camera_name"]
)
AUDIO_DBFS = Gauge("frigate_audio_dBFS", "Audio dBFS for camera", ["camera_name"])
AUDIO_RMS = Gauge("frigate_audio_rms", "Audio RMS for camera", ["camera_name"])
# Detector metrics
DETECTOR_INFERENCE = Gauge(
"frigate_detector_inference_speed_seconds",
"Time spent running object detection in seconds",
["name"],
)
DETECTOR_START = Gauge(
"frigate_detection_start", "Detector start time (unix timestamp)", ["name"]
)
# GPU metrics
GPU_USAGE = Gauge("frigate_gpu_usage_percent", "GPU utilisation %", ["gpu_name"])
GPU_MEMORY = Gauge("frigate_gpu_mem_usage_percent", "GPU memory usage %", ["gpu_name"])
# Storage metrics
STORAGE_FREE = Gauge("frigate_storage_free_bytes", "Storage free bytes", ["storage"])
STORAGE_TOTAL = Gauge("frigate_storage_total_bytes", "Storage total bytes", ["storage"])
STORAGE_USED = Gauge("frigate_storage_used_bytes", "Storage used bytes", ["storage"])
STORAGE_MOUNT = Info(
"frigate_storage_mount_type", "Storage mount type", ["mount_type", "storage"]
)
# Service metrics
UPTIME = Gauge("frigate_service_uptime_seconds", "Uptime seconds")
LAST_UPDATE = Gauge(
"frigate_service_last_updated_timestamp", "Stats recorded time (unix timestamp)"
)
TEMPERATURE = Gauge("frigate_device_temperature", "Device Temperature", ["device"])
# Event metrics
CAMERA_EVENTS = Counter(
"frigate_camera_events",
"Count of camera events since exporter started",
["camera", "label"],
)
def update_metrics(stats: Dict) -> None:
"""Update Prometheus metrics based on Frigate stats"""
try:
# Update process metrics
if "cpu_usages" in stats:
for pid, proc_stats in stats["cpu_usages"].items():
cmdline = proc_stats.get("cmdline", "")
process_type = "Other"
process_name = cmdline
CPU_USAGE.labels(
pid=pid,
name=process_name,
process=process_name,
type=process_type,
cmdline=cmdline,
).set(float(proc_stats["cpu"]))
MEMORY_USAGE.labels(
pid=pid,
name=process_name,
process=process_name,
type=process_type,
cmdline=cmdline,
).set(float(proc_stats["mem"]))
# Update camera metrics
if "cameras" in stats:
for camera_name, camera_stats in stats["cameras"].items():
if "camera_fps" in camera_stats:
CAMERA_FPS.labels(camera_name=camera_name).set(
camera_stats["camera_fps"]
)
if "detection_fps" in camera_stats:
DETECTION_FPS.labels(camera_name=camera_name).set(
camera_stats["detection_fps"]
)
if "process_fps" in camera_stats:
PROCESS_FPS.labels(camera_name=camera_name).set(
camera_stats["process_fps"]
)
if "skipped_fps" in camera_stats:
SKIPPED_FPS.labels(camera_name=camera_name).set(
camera_stats["skipped_fps"]
)
if "detection_enabled" in camera_stats:
DETECTION_ENABLED.labels(camera_name=camera_name).set(
camera_stats["detection_enabled"]
)
if "audio_dBFS" in camera_stats:
AUDIO_DBFS.labels(camera_name=camera_name).set(
camera_stats["audio_dBFS"]
)
if "audio_rms" in camera_stats:
AUDIO_RMS.labels(camera_name=camera_name).set(
camera_stats["audio_rms"]
)
# Update detector metrics
if "detectors" in stats:
for name, detector in stats["detectors"].items():
if "inference_speed" in detector:
DETECTOR_INFERENCE.labels(name=name).set(
detector["inference_speed"] * 0.001
) # ms to seconds
if "detection_start" in detector:
DETECTOR_START.labels(name=name).set(detector["detection_start"])
# Update GPU metrics
if "gpu_usages" in stats:
for gpu_name, gpu_stats in stats["gpu_usages"].items():
if "gpu" in gpu_stats:
GPU_USAGE.labels(gpu_name=gpu_name).set(float(gpu_stats["gpu"]))
if "mem" in gpu_stats:
GPU_MEMORY.labels(gpu_name=gpu_name).set(float(gpu_stats["mem"]))
# Update service metrics
if "service" in stats:
service = stats["service"]
if "uptime" in service:
UPTIME.set(service["uptime"])
if "last_updated" in service:
LAST_UPDATE.set(service["last_updated"])
# Storage metrics
if "storage" in service:
for path, storage in service["storage"].items():
if "free" in storage:
STORAGE_FREE.labels(storage=path).set(
storage["free"] * 1e6
) # MB to bytes
if "total" in storage:
STORAGE_TOTAL.labels(storage=path).set(storage["total"] * 1e6)
if "used" in storage:
STORAGE_USED.labels(storage=path).set(storage["used"] * 1e6)
if "mount_type" in storage:
STORAGE_MOUNT.labels(storage=path).info(
{"mount_type": storage["mount_type"], "storage": path}
)
# Temperature metrics
if "temperatures" in service:
for device, temp in service["temperatures"].items():
TEMPERATURE.labels(device=device).set(temp)
# Version info
if "version" in service and "latest_version" in service:
SYSTEM_INFO.info(
{
"version": service["version"],
"latest_version": service["latest_version"],
}
)
except Exception as e:
print(f"Error updating Prometheus metrics: {str(e)}")
def get_metrics() -> tuple[str, str]:
"""Get Prometheus metrics in text format"""
return generate_latest(), CONTENT_TYPE_LATEST