diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 76e5d276e..905769b49 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -52,3 +52,4 @@ pywebpush == 2.0.* # alpr pyclipper == 1.3.* shapely == 2.0.* +prometheus-client == 0.21.* diff --git a/docs/docs/configuration/metrics.md b/docs/docs/configuration/metrics.md new file mode 100644 index 000000000..a12238f0a --- /dev/null +++ b/docs/docs/configuration/metrics.md @@ -0,0 +1,99 @@ +--- +id: metrics +title: Metrics +--- + +# Metrics + +Frigate exposes Prometheus metrics at the `/metrics` endpoint that can be used to monitor the performance and health of your Frigate instance. + +## Available Metrics + +### System Metrics +- `frigate_cpu_usage_percent{pid="", name="", process="", type="", cmdline=""}` - Process CPU usage percentage +- `frigate_mem_usage_percent{pid="", name="", process="", type="", cmdline=""}` - Process memory usage percentage +- `frigate_gpu_usage_percent{gpu_name=""}` - GPU utilization percentage +- `frigate_gpu_mem_usage_percent{gpu_name=""}` - GPU memory usage percentage + +### Camera Metrics +- `frigate_camera_fps{camera_name=""}` - Frames per second being consumed from your camera +- `frigate_detection_fps{camera_name=""}` - Number of times detection is run per second +- `frigate_process_fps{camera_name=""}` - Frames per second being processed +- `frigate_skipped_fps{camera_name=""}` - Frames per second skipped for processing +- `frigate_detection_enabled{camera_name=""}` - Detection enabled status for camera +- `frigate_audio_dBFS{camera_name=""}` - Audio dBFS for camera +- `frigate_audio_rms{camera_name=""}` - Audio RMS for camera + +### Detector Metrics +- `frigate_detector_inference_speed_seconds{name=""}` - Time spent running object detection in seconds +- `frigate_detection_start{name=""}` - Detector start time (unix timestamp) + +### Storage Metrics +- `frigate_storage_free_bytes{storage=""}` - Storage free bytes +- `frigate_storage_total_bytes{storage=""}` - Storage total bytes +- `frigate_storage_used_bytes{storage=""}` - Storage used bytes +- `frigate_storage_mount_type{mount_type="", storage=""}` - Storage mount type info + +### Service Metrics +- `frigate_service_uptime_seconds` - Uptime in seconds +- `frigate_service_last_updated_timestamp` - Stats recorded time (unix timestamp) +- `frigate_device_temperature{device=""}` - Device Temperature + +### Event Metrics +- `frigate_camera_events{camera="", label=""}` - Count of camera events since exporter started + +## Configuring Prometheus + +To scrape metrics from Frigate, add the following to your Prometheus configuration: + +```yaml +scrape_configs: + - job_name: 'frigate' + metrics_path: '/metrics' + static_configs: + - targets: ['frigate:5000'] + scrape_interval: 15s +``` + +## Example Queries + +Here are some example PromQL queries that might be useful: + +```promql +# Average CPU usage across all processes +avg(frigate_cpu_usage_percent) + +# Total GPU memory usage +sum(frigate_gpu_mem_usage_percent) + +# Detection FPS by camera +rate(frigate_detection_fps{camera_name="front_door"}[5m]) + +# Storage usage percentage +(frigate_storage_used_bytes / frigate_storage_total_bytes) * 100 + +# Event count by camera in last hour +increase(frigate_camera_events[1h]) +``` + +## Grafana Dashboard + +You can use these metrics to create Grafana dashboards to monitor your Frigate instance. Here's an example of metrics you might want to track: + +- CPU, Memory and GPU usage over time +- Camera FPS and detection rates +- Storage usage and trends +- Event counts by camera +- System temperatures + +A sample Grafana dashboard JSON will be provided in a future update. + +## Metric Types + +The metrics exposed by Frigate use the following Prometheus metric types: + +- **Counter**: Cumulative values that only increase (e.g., `frigate_camera_events`) +- **Gauge**: Values that can go up and down (e.g., `frigate_cpu_usage_percent`) +- **Info**: Key-value pairs for metadata (e.g., `frigate_storage_mount_type`) + +For more information about Prometheus metric types, see the [Prometheus documentation](https://prometheus.io/docs/concepts/metric_types/). \ No newline at end of file diff --git a/docs/sidebars.ts b/docs/sidebars.ts index ebd6af033..0c25e4eb7 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -84,6 +84,7 @@ const sidebars: SidebarsConfig = { items: frigateHttpApiSidebar, }, 'integrations/mqtt', + 'configuration/metrics', 'integrations/third_party_extensions', ], 'Frigate+': [ diff --git a/frigate/api/app.py b/frigate/api/app.py index f8a5a912d..c7c76c632 100644 --- a/frigate/api/app.py +++ b/frigate/api/app.py @@ -18,6 +18,7 @@ from fastapi.params import Depends from fastapi.responses import JSONResponse, PlainTextResponse from markupsafe import escape from peewee import operator +from prometheus_client import CONTENT_TYPE_LATEST, generate_latest from pydantic import ValidationError from frigate.api.defs.query.app_query_parameters import AppTimelineHourlyQueryParameters @@ -108,6 +109,12 @@ def stats_history(request: Request, keys: str = None): return JSONResponse(content=request.app.stats_emitter.get_stats_history(keys)) +@router.get("/metrics") +def metrics(): + """Expose Prometheus metrics endpoint""" + return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST) + + @router.get("/config") def config(request: Request): config_obj: FrigateConfig = request.app.frigate_config diff --git a/frigate/stats/emitter.py b/frigate/stats/emitter.py index 8a09ff51b..022e99213 100644 --- a/frigate/stats/emitter.py +++ b/frigate/stats/emitter.py @@ -11,6 +11,7 @@ from typing import Optional from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig from frigate.const import FREQUENCY_STATS_POINTS +from frigate.stats.prometheus import update_metrics from frigate.stats.util import stats_snapshot from frigate.types import StatsTrackingTypes @@ -67,6 +68,16 @@ class StatsEmitter(threading.Thread): return selected_stats + def stats_init(config, camera_metrics, detectors, processes): + stats = { + "cameras": camera_metrics, + "detectors": detectors, + "processes": processes, + } + # Update Prometheus metrics with initial stats + update_metrics(stats) + return stats + def run(self) -> None: time.sleep(10) for counter in itertools.cycle( diff --git a/frigate/stats/prometheus.py b/frigate/stats/prometheus.py new file mode 100644 index 000000000..a43c091e2 --- /dev/null +++ b/frigate/stats/prometheus.py @@ -0,0 +1,207 @@ +from typing import Dict + +from prometheus_client import ( + CONTENT_TYPE_LATEST, + Counter, + Gauge, + Info, + generate_latest, +) + +# System metrics +SYSTEM_INFO = Info("frigate_system", "System information") +CPU_USAGE = Gauge( + "frigate_cpu_usage_percent", + "Process CPU usage %", + ["pid", "name", "process", "type", "cmdline"], +) +MEMORY_USAGE = Gauge( + "frigate_mem_usage_percent", + "Process memory usage %", + ["pid", "name", "process", "type", "cmdline"], +) + +# Camera metrics +CAMERA_FPS = Gauge( + "frigate_camera_fps", + "Frames per second being consumed from your camera", + ["camera_name"], +) +DETECTION_FPS = Gauge( + "frigate_detection_fps", + "Number of times detection is run per second", + ["camera_name"], +) +PROCESS_FPS = Gauge( + "frigate_process_fps", + "Frames per second being processed by frigate", + ["camera_name"], +) +SKIPPED_FPS = Gauge( + "frigate_skipped_fps", "Frames per second skipped for processing", ["camera_name"] +) +DETECTION_ENABLED = Gauge( + "frigate_detection_enabled", "Detection enabled for camera", ["camera_name"] +) +AUDIO_DBFS = Gauge("frigate_audio_dBFS", "Audio dBFS for camera", ["camera_name"]) +AUDIO_RMS = Gauge("frigate_audio_rms", "Audio RMS for camera", ["camera_name"]) + +# Detector metrics +DETECTOR_INFERENCE = Gauge( + "frigate_detector_inference_speed_seconds", + "Time spent running object detection in seconds", + ["name"], +) +DETECTOR_START = Gauge( + "frigate_detection_start", "Detector start time (unix timestamp)", ["name"] +) + +# GPU metrics +GPU_USAGE = Gauge("frigate_gpu_usage_percent", "GPU utilisation %", ["gpu_name"]) +GPU_MEMORY = Gauge("frigate_gpu_mem_usage_percent", "GPU memory usage %", ["gpu_name"]) + +# Storage metrics +STORAGE_FREE = Gauge("frigate_storage_free_bytes", "Storage free bytes", ["storage"]) +STORAGE_TOTAL = Gauge("frigate_storage_total_bytes", "Storage total bytes", ["storage"]) +STORAGE_USED = Gauge("frigate_storage_used_bytes", "Storage used bytes", ["storage"]) +STORAGE_MOUNT = Info( + "frigate_storage_mount_type", "Storage mount type", ["mount_type", "storage"] +) + +# Service metrics +UPTIME = Gauge("frigate_service_uptime_seconds", "Uptime seconds") +LAST_UPDATE = Gauge( + "frigate_service_last_updated_timestamp", "Stats recorded time (unix timestamp)" +) +TEMPERATURE = Gauge("frigate_device_temperature", "Device Temperature", ["device"]) + +# Event metrics +CAMERA_EVENTS = Counter( + "frigate_camera_events", + "Count of camera events since exporter started", + ["camera", "label"], +) + + +def update_metrics(stats: Dict) -> None: + """Update Prometheus metrics based on Frigate stats""" + try: + # Update process metrics + if "cpu_usages" in stats: + for pid, proc_stats in stats["cpu_usages"].items(): + cmdline = proc_stats.get("cmdline", "") + process_type = "Other" + process_name = cmdline + + CPU_USAGE.labels( + pid=pid, + name=process_name, + process=process_name, + type=process_type, + cmdline=cmdline, + ).set(float(proc_stats["cpu"])) + + MEMORY_USAGE.labels( + pid=pid, + name=process_name, + process=process_name, + type=process_type, + cmdline=cmdline, + ).set(float(proc_stats["mem"])) + + # Update camera metrics + if "cameras" in stats: + for camera_name, camera_stats in stats["cameras"].items(): + if "camera_fps" in camera_stats: + CAMERA_FPS.labels(camera_name=camera_name).set( + camera_stats["camera_fps"] + ) + if "detection_fps" in camera_stats: + DETECTION_FPS.labels(camera_name=camera_name).set( + camera_stats["detection_fps"] + ) + if "process_fps" in camera_stats: + PROCESS_FPS.labels(camera_name=camera_name).set( + camera_stats["process_fps"] + ) + if "skipped_fps" in camera_stats: + SKIPPED_FPS.labels(camera_name=camera_name).set( + camera_stats["skipped_fps"] + ) + if "detection_enabled" in camera_stats: + DETECTION_ENABLED.labels(camera_name=camera_name).set( + camera_stats["detection_enabled"] + ) + if "audio_dBFS" in camera_stats: + AUDIO_DBFS.labels(camera_name=camera_name).set( + camera_stats["audio_dBFS"] + ) + if "audio_rms" in camera_stats: + AUDIO_RMS.labels(camera_name=camera_name).set( + camera_stats["audio_rms"] + ) + + # Update detector metrics + if "detectors" in stats: + for name, detector in stats["detectors"].items(): + if "inference_speed" in detector: + DETECTOR_INFERENCE.labels(name=name).set( + detector["inference_speed"] * 0.001 + ) # ms to seconds + if "detection_start" in detector: + DETECTOR_START.labels(name=name).set(detector["detection_start"]) + + # Update GPU metrics + if "gpu_usages" in stats: + for gpu_name, gpu_stats in stats["gpu_usages"].items(): + if "gpu" in gpu_stats: + GPU_USAGE.labels(gpu_name=gpu_name).set(float(gpu_stats["gpu"])) + if "mem" in gpu_stats: + GPU_MEMORY.labels(gpu_name=gpu_name).set(float(gpu_stats["mem"])) + + # Update service metrics + if "service" in stats: + service = stats["service"] + + if "uptime" in service: + UPTIME.set(service["uptime"]) + if "last_updated" in service: + LAST_UPDATE.set(service["last_updated"]) + + # Storage metrics + if "storage" in service: + for path, storage in service["storage"].items(): + if "free" in storage: + STORAGE_FREE.labels(storage=path).set( + storage["free"] * 1e6 + ) # MB to bytes + if "total" in storage: + STORAGE_TOTAL.labels(storage=path).set(storage["total"] * 1e6) + if "used" in storage: + STORAGE_USED.labels(storage=path).set(storage["used"] * 1e6) + if "mount_type" in storage: + STORAGE_MOUNT.labels(storage=path).info( + {"mount_type": storage["mount_type"], "storage": path} + ) + + # Temperature metrics + if "temperatures" in service: + for device, temp in service["temperatures"].items(): + TEMPERATURE.labels(device=device).set(temp) + + # Version info + if "version" in service and "latest_version" in service: + SYSTEM_INFO.info( + { + "version": service["version"], + "latest_version": service["latest_version"], + } + ) + + except Exception as e: + print(f"Error updating Prometheus metrics: {str(e)}") + + +def get_metrics() -> tuple[str, str]: + """Get Prometheus metrics in text format""" + return generate_latest(), CONTENT_TYPE_LATEST