Refactor processors and add LPR postprocessing (#16722)

* recordings data pub/sub

* function to process recording stream frames

* model runner

* lpr model runner

* refactor to mixin class and use model runner

* separate out realtime and post processors

* move model and mixin folders

* basic postprocessor

* clean up

* docs

* postprocessing logic

* clean up

* return none if recordings are disabled

* run postprocessor handle_requests too

* tweak expansion

* add put endpoint

* postprocessor tweaks with endpoint
This commit is contained in:
Josh Hawkins
2025-02-21 07:51:37 -06:00
committed by GitHub
parent e773d63c16
commit 60b34bcfca
16 changed files with 568 additions and 104 deletions

View File

@@ -13,34 +13,21 @@ from Levenshtein import distance
from pyclipper import ET_CLOSEDPOLYGON, JT_ROUND, PyclipperOffset
from shapely.geometry import Polygon
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig
from frigate.const import FRIGATE_LOCALHOST
from frigate.embeddings.onnx.lpr_embedding import (
LicensePlateDetector,
PaddleOCRClassification,
PaddleOCRDetection,
PaddleOCRRecognition,
)
from frigate.util.image import area
from ..types import DataProcessorMetrics
from .api import RealTimeProcessorApi
logger = logging.getLogger(__name__)
WRITE_DEBUG_IMAGES = False
class LicensePlateProcessor(RealTimeProcessorApi):
def __init__(self, config: FrigateConfig, metrics: DataProcessorMetrics):
super().__init__(config, metrics)
self.requestor = InterProcessRequestor()
self.lpr_config = config.lpr
class LicensePlateProcessingMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.requires_license_plate_detection = (
"license_plate" not in self.config.objects.all_objects
)
self.detected_license_plates: dict[str, dict[str, any]] = {}
self.ctc_decoder = CTCDecoder()
@@ -52,42 +39,6 @@ class LicensePlateProcessor(RealTimeProcessorApi):
self.box_thresh = 0.8
self.mask_thresh = 0.8
self.lpr_detection_model = None
self.lpr_classification_model = None
self.lpr_recognition_model = None
if self.config.lpr.enabled:
self.detection_model = PaddleOCRDetection(
model_size="large",
requestor=self.requestor,
device="CPU",
)
self.classification_model = PaddleOCRClassification(
model_size="large",
requestor=self.requestor,
device="CPU",
)
self.recognition_model = PaddleOCRRecognition(
model_size="large",
requestor=self.requestor,
device="CPU",
)
self.yolov9_detection_model = LicensePlateDetector(
model_size="large",
requestor=self.requestor,
device="CPU",
)
if self.lpr_config.enabled:
# all models need to be loaded to run LPR
self.detection_model._load_model_and_utils()
self.classification_model._load_model_and_utils()
self.recognition_model._load_model_and_utils()
self.yolov9_detection_model._load_model_and_utils()
def _detect(self, image: np.ndarray) -> List[np.ndarray]:
"""
Detect possible license plates in the input image by first resizing and normalizing it,
@@ -114,7 +65,7 @@ class LicensePlateProcessor(RealTimeProcessorApi):
resized_image,
)
outputs = self.detection_model([normalized_image])[0]
outputs = self.model_runner.detection_model([normalized_image])[0]
outputs = outputs[0, :, :]
boxes, _ = self._boxes_from_bitmap(outputs, outputs > self.mask_thresh, w, h)
@@ -143,7 +94,7 @@ class LicensePlateProcessor(RealTimeProcessorApi):
norm_img = norm_img[np.newaxis, :]
norm_images.append(norm_img)
outputs = self.classification_model(norm_images)
outputs = self.model_runner.classification_model(norm_images)
return self._process_classification_output(images, outputs)
@@ -183,7 +134,7 @@ class LicensePlateProcessor(RealTimeProcessorApi):
norm_image = norm_image[np.newaxis, :]
norm_images.append(norm_image)
outputs = self.recognition_model(norm_images)
outputs = self.model_runner.recognition_model(norm_images)
return self.ctc_decoder(outputs)
def _process_license_plate(
@@ -199,9 +150,9 @@ class LicensePlateProcessor(RealTimeProcessorApi):
Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates.
"""
if (
self.detection_model.runner is None
or self.classification_model.runner is None
or self.recognition_model.runner is None
self.model_runner.detection_model.runner is None
or self.model_runner.classification_model.runner is None
or self.model_runner.recognition_model.runner is None
):
# we might still be downloading the models
logger.debug("Model runners not loaded")
@@ -665,7 +616,9 @@ class LicensePlateProcessor(RealTimeProcessorApi):
input_w = int(input_h * max_wh_ratio)
# check for model-specific input width
model_input_w = self.recognition_model.runner.ort.get_inputs()[0].shape[3]
model_input_w = self.model_runner.recognition_model.runner.ort.get_inputs()[
0
].shape[3]
if isinstance(model_input_w, int) and model_input_w > 0:
input_w = model_input_w
@@ -732,19 +685,13 @@ class LicensePlateProcessor(RealTimeProcessorApi):
image = np.rot90(image, k=3)
return image
def __update_metrics(self, duration: float) -> None:
"""
Update inference metrics.
"""
self.metrics.alpr_pps.value = (self.metrics.alpr_pps.value * 9 + duration) / 10
def _detect_license_plate(self, input: np.ndarray) -> tuple[int, int, int, int]:
"""
Use a lightweight YOLOv9 model to detect license plates for users without Frigate+
Return the dimensions of the detected plate as [x1, y1, x2, y2].
"""
predictions = self.yolov9_detection_model(input)
predictions = self.model_runner.yolov9_detection_model(input)
confidence_threshold = self.lpr_config.detection_threshold
@@ -770,8 +717,8 @@ class LicensePlateProcessor(RealTimeProcessorApi):
# Return the top scoring bounding box if found
if top_box is not None:
# expand box by 15% to help with OCR
expansion = (top_box[2:] - top_box[:2]) * 0.1
# expand box by 30% to help with OCR
expansion = (top_box[2:] - top_box[:2]) * 0.30
# Expand box
expanded_box = np.array(
@@ -869,9 +816,8 @@ class LicensePlateProcessor(RealTimeProcessorApi):
# 5. Return True if we should keep the previous plate (i.e., if it scores higher)
return prev_score > curr_score
def process_frame(self, obj_data: dict[str, any], frame: np.ndarray):
def lpr_process(self, obj_data: dict[str, any], frame: np.ndarray):
"""Look for license plates in image."""
start = datetime.datetime.now().timestamp()
id = obj_data["id"]
@@ -934,7 +880,7 @@ class LicensePlateProcessor(RealTimeProcessorApi):
# check that license plate is valid
# double the value because we've doubled the size of the car
if license_plate_area < self.config.lpr.min_area * 2:
if license_plate_area < self.lpr_config.min_area * 2:
logger.debug("License plate is less than min_area")
return
@@ -972,7 +918,7 @@ class LicensePlateProcessor(RealTimeProcessorApi):
# check that license plate is valid
if (
not license_plate_box
or area(license_plate_box) < self.config.lpr.min_area
or area(license_plate_box) < self.lpr_config.min_area
):
logger.debug(f"Invalid license plate box {license_plate}")
return
@@ -1078,10 +1024,9 @@ class LicensePlateProcessor(RealTimeProcessorApi):
"plate": top_plate,
"char_confidences": top_char_confidences,
"area": top_area,
"obj_data": obj_data,
}
self.__update_metrics(datetime.datetime.now().timestamp() - start)
def handle_request(self, topic, request_data) -> dict[str, any] | None:
return

View File

@@ -0,0 +1,31 @@
from frigate.embeddings.onnx.lpr_embedding import (
LicensePlateDetector,
PaddleOCRClassification,
PaddleOCRDetection,
PaddleOCRRecognition,
)
from ...types import DataProcessorModelRunner
class LicensePlateModelRunner(DataProcessorModelRunner):
def __init__(self, requestor, device: str = "CPU", model_size: str = "large"):
super().__init__(requestor, device, model_size)
self.detection_model = PaddleOCRDetection(
model_size=model_size, requestor=requestor, device=device
)
self.classification_model = PaddleOCRClassification(
model_size=model_size, requestor=requestor, device=device
)
self.recognition_model = PaddleOCRRecognition(
model_size=model_size, requestor=requestor, device=device
)
self.yolov9_detection_model = LicensePlateDetector(
model_size=model_size, requestor=requestor, device=device
)
# Load all models once
self.detection_model._load_model_and_utils()
self.classification_model._load_model_and_utils()
self.recognition_model._load_model_and_utils()
self.yolov9_detection_model._load_model_and_utils()

View File

@@ -5,16 +5,22 @@ from abc import ABC, abstractmethod
from frigate.config import FrigateConfig
from ..types import DataProcessorMetrics, PostProcessDataEnum
from ..types import DataProcessorMetrics, DataProcessorModelRunner, PostProcessDataEnum
logger = logging.getLogger(__name__)
class PostProcessorApi(ABC):
@abstractmethod
def __init__(self, config: FrigateConfig, metrics: DataProcessorMetrics) -> None:
def __init__(
self,
config: FrigateConfig,
metrics: DataProcessorMetrics,
model_runner: DataProcessorModelRunner,
) -> None:
self.config = config
self.metrics = metrics
self.model_runner = model_runner
pass
@abstractmethod

View File

@@ -0,0 +1,231 @@
"""Handle post processing for license plate recognition."""
import datetime
import logging
import cv2
import numpy as np
from peewee import DoesNotExist
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
from frigate.config import FrigateConfig
from frigate.data_processing.common.license_plate.mixin import (
WRITE_DEBUG_IMAGES,
LicensePlateProcessingMixin,
)
from frigate.data_processing.common.license_plate.model import (
LicensePlateModelRunner,
)
from frigate.data_processing.types import PostProcessDataEnum
from frigate.models import Recordings
from frigate.util.image import get_image_from_recording
from ..types import DataProcessorMetrics
from .api import PostProcessorApi
logger = logging.getLogger(__name__)
class LicensePlatePostProcessor(LicensePlateProcessingMixin, PostProcessorApi):
def __init__(
self,
config: FrigateConfig,
metrics: DataProcessorMetrics,
model_runner: LicensePlateModelRunner,
detected_license_plates: dict[str, dict[str, any]],
):
self.detected_license_plates = detected_license_plates
self.model_runner = model_runner
self.lpr_config = config.lpr
self.config = config
super().__init__(config, metrics, model_runner)
def __update_metrics(self, duration: float) -> None:
"""
Update inference metrics.
"""
self.metrics.alpr_pps.value = (self.metrics.alpr_pps.value * 9 + duration) / 10
def process_data(
self, data: dict[str, any], data_type: PostProcessDataEnum
) -> None:
"""Look for license plates in recording stream image
Args:
data (dict): containing data about the input.
data_type (enum): Describing the data that is being processed.
Returns:
None.
"""
start = datetime.datetime.now().timestamp()
event_id = data["event_id"]
camera_name = data["camera"]
if data_type == PostProcessDataEnum.recording:
obj_data = data["obj_data"]
frame_time = obj_data["frame_time"]
recordings_available_through = data["recordings_available"]
if frame_time > recordings_available_through:
logger.debug(
f"LPR post processing: No recordings available for this frame time {frame_time}, available through {recordings_available_through}"
)
elif data_type == PostProcessDataEnum.tracked_object:
# non-functional, need to think about snapshot time
obj_data = data["event"]["data"]
obj_data["id"] = data["event"]["id"]
obj_data["camera"] = data["event"]["camera"]
# TODO: snapshot time?
frame_time = data["event"]["start_time"]
else:
logger.error("No data type passed to LPR postprocessing")
return
recording_query = (
Recordings.select(
Recordings.path,
Recordings.start_time,
)
.where(
(
(frame_time >= Recordings.start_time)
& (frame_time <= Recordings.end_time)
)
)
.where(Recordings.camera == camera_name)
.order_by(Recordings.start_time.desc())
.limit(1)
)
try:
recording: Recordings = recording_query.get()
time_in_segment = frame_time - recording.start_time
codec = "mjpeg"
image_data = get_image_from_recording(
self.config.ffmpeg, recording.path, time_in_segment, codec, None
)
if not image_data:
logger.debug(
"LPR post processing: Unable to fetch license plate from recording"
)
# Convert bytes to numpy array
image_array = np.frombuffer(image_data, dtype=np.uint8)
if len(image_array) == 0:
logger.debug("LPR post processing: No image")
return
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
except DoesNotExist:
logger.debug("Error fetching license plate for postprocessing")
return
if WRITE_DEBUG_IMAGES:
cv2.imwrite(f"debug/frames/lpr_post_{start}.jpg", image)
# convert to yuv for processing
frame = cv2.cvtColor(image, cv2.COLOR_BGR2YUV_I420)
detect_width = self.config.cameras[camera_name].detect.width
detect_height = self.config.cameras[camera_name].detect.height
# Scale the boxes based on detect dimensions
scale_x = image.shape[1] / detect_width
scale_y = image.shape[0] / detect_height
# Determine which box to enlarge based on detection mode
if self.requires_license_plate_detection:
# Scale and enlarge the car box
box = obj_data.get("box")
if not box:
return
# Scale original car box to detection dimensions
left = int(box[0] * scale_x)
top = int(box[1] * scale_y)
right = int(box[2] * scale_x)
bottom = int(box[3] * scale_y)
box = [left, top, right, bottom]
else:
# Get the license plate box from attributes
if not obj_data.get("current_attributes"):
return
license_plate = None
for attr in obj_data["current_attributes"]:
if attr.get("label") != "license_plate":
continue
if license_plate is None or attr.get("score", 0.0) > license_plate.get(
"score", 0.0
):
license_plate = attr
if not license_plate or not license_plate.get("box"):
return
# Scale license plate box to detection dimensions
orig_box = license_plate["box"]
left = int(orig_box[0] * scale_x)
top = int(orig_box[1] * scale_y)
right = int(orig_box[2] * scale_x)
bottom = int(orig_box[3] * scale_y)
box = [left, top, right, bottom]
width_box = right - left
height_box = bottom - top
# Enlarge box slightly to account for drift in detect vs recording stream
enlarge_factor = 0.3
new_left = max(0, int(left - (width_box * enlarge_factor / 2)))
new_top = max(0, int(top - (height_box * enlarge_factor / 2)))
new_right = min(image.shape[1], int(right + (width_box * enlarge_factor / 2)))
new_bottom = min(
image.shape[0], int(bottom + (height_box * enlarge_factor / 2))
)
keyframe_obj_data = obj_data.copy()
if self.requires_license_plate_detection:
# car box
keyframe_obj_data["box"] = [new_left, new_top, new_right, new_bottom]
else:
# Update the license plate box in the attributes
new_attributes = []
for attr in obj_data["current_attributes"]:
if attr.get("label") == "license_plate":
new_attr = attr.copy()
new_attr["box"] = [new_left, new_top, new_right, new_bottom]
new_attributes.append(new_attr)
else:
new_attributes.append(attr)
keyframe_obj_data["current_attributes"] = new_attributes
# run the frame through lpr processing
logger.debug(f"Post processing plate: {event_id}, {frame_time}")
self.lpr_process(keyframe_obj_data, frame)
self.__update_metrics(datetime.datetime.now().timestamp() - start)
def handle_request(self, topic, request_data) -> dict[str, any] | None:
if topic == EmbeddingsRequestEnum.reprocess_plate.value:
event = request_data["event"]
self.process_data(
{
"event_id": event["id"],
"camera": event["camera"],
"event": event,
},
PostProcessDataEnum.tracked_object,
)
return {
"message": "Successfully requested reprocessing of license plate.",
"success": True,
}

View File

@@ -7,16 +7,22 @@ import numpy as np
from frigate.config import FrigateConfig
from ..types import DataProcessorMetrics
from ..types import DataProcessorMetrics, DataProcessorModelRunner
logger = logging.getLogger(__name__)
class RealTimeProcessorApi(ABC):
@abstractmethod
def __init__(self, config: FrigateConfig, metrics: DataProcessorMetrics) -> None:
def __init__(
self,
config: FrigateConfig,
metrics: DataProcessorMetrics,
model_runner: DataProcessorModelRunner,
) -> None:
self.config = config
self.metrics = metrics
self.model_runner = model_runner
pass
@abstractmethod

View File

@@ -22,7 +22,7 @@ except ModuleNotFoundError:
logger = logging.getLogger(__name__)
class BirdProcessor(RealTimeProcessorApi):
class BirdRealTimeProcessor(RealTimeProcessorApi):
def __init__(self, config: FrigateConfig, metrics: DataProcessorMetrics):
super().__init__(config, metrics)
self.interpreter: Interpreter = None

View File

@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
MIN_MATCHING_FACES = 2
class FaceProcessor(RealTimeProcessorApi):
class FaceRealTimeProcessor(RealTimeProcessorApi):
def __init__(self, config: FrigateConfig, metrics: DataProcessorMetrics):
super().__init__(config, metrics)
self.face_config = config.face_recognition

View File

@@ -0,0 +1,53 @@
"""Handle processing images for face detection and recognition."""
import datetime
import logging
import numpy as np
from frigate.config import FrigateConfig
from frigate.data_processing.common.license_plate.mixin import (
LicensePlateProcessingMixin,
)
from frigate.data_processing.common.license_plate.model import (
LicensePlateModelRunner,
)
from ..types import DataProcessorMetrics
from .api import RealTimeProcessorApi
logger = logging.getLogger(__name__)
class LicensePlateRealTimeProcessor(LicensePlateProcessingMixin, RealTimeProcessorApi):
def __init__(
self,
config: FrigateConfig,
metrics: DataProcessorMetrics,
model_runner: LicensePlateModelRunner,
detected_license_plates: dict[str, dict[str, any]],
):
self.detected_license_plates = detected_license_plates
self.model_runner = model_runner
self.lpr_config = config.lpr
self.config = config
super().__init__(config, metrics, model_runner)
def __update_metrics(self, duration: float) -> None:
"""
Update inference metrics.
"""
self.metrics.alpr_pps.value = (self.metrics.alpr_pps.value * 9 + duration) / 10
def process_frame(self, obj_data: dict[str, any], frame: np.ndarray):
"""Look for license plates in image."""
start = datetime.datetime.now().timestamp()
self.lpr_process(obj_data, frame)
self.__update_metrics(datetime.datetime.now().timestamp() - start)
def handle_request(self, topic, request_data) -> dict[str, any] | None:
return
def expire_object(self, object_id: str):
if object_id in self.detected_license_plates:
self.detected_license_plates.pop(object_id)

View File

@@ -18,6 +18,13 @@ class DataProcessorMetrics:
self.alpr_pps = mp.Value("d", 0.01)
class DataProcessorModelRunner:
def __init__(self, requestor, device: str = "CPU", model_size: str = "large"):
self.requestor = requestor
self.device = device
self.model_size = model_size
class PostProcessDataEnum(str, Enum):
recording = "recording"
review = "review"