From 97bea8487da9bab8efb44fe8c237bdfc8d86416f Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sun, 1 Jun 2025 15:21:12 -0500 Subject: [PATCH] Upgrade PaddleOCR models to v4 (rec) and v5 (det) (#18505) The PP_OCRv5 text detection models have greatly improved over v3. The v5 recognition model makes improvements to challenging handwriting and uncommon characters, which are not necessary for LPR, so using v4 seemed like a better choice to continue to keep inference time as low as possible. Also included is the full dictionary for Chinese character support. --- .../common/license_plate/mixin.py | 226 +++++++++--------- frigate/embeddings/onnx/lpr_embedding.py | 11 +- 2 files changed, 126 insertions(+), 111 deletions(-) diff --git a/frigate/data_processing/common/license_plate/mixin.py b/frigate/data_processing/common/license_plate/mixin.py index 2c68ce374..2d63c1c69 100644 --- a/frigate/data_processing/common/license_plate/mixin.py +++ b/frigate/data_processing/common/license_plate/mixin.py @@ -22,7 +22,7 @@ from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, EventMetadataTypeEnum, ) -from frigate.const import CLIPS_DIR +from frigate.const import CLIPS_DIR, MODEL_CACHE_DIR from frigate.embeddings.onnx.lpr_embedding import LPR_EMBEDDING_SIZE from frigate.types import TrackedObjectUpdateTypesEnum from frigate.util.builtin import EventsPerSecond, InferenceSpeed @@ -43,7 +43,11 @@ class LicensePlateProcessingMixin: self.plates_det_second = EventsPerSecond() self.plates_det_second.start() self.event_metadata_publisher = EventMetadataPublisher() - self.ctc_decoder = CTCDecoder() + self.ctc_decoder = CTCDecoder( + character_dict_path=os.path.join( + MODEL_CACHE_DIR, "paddleocr-onnx", "ppocr_keys_v1.txt" + ) + ) self.batch_size = 6 # Detection specific parameters @@ -1595,113 +1599,121 @@ class CTCDecoder: for each decoded character sequence. """ - def __init__(self): + def __init__(self, character_dict_path=None): """ - Initialize the CTCDecoder with a list of characters and a character map. + Initializes the CTCDecoder. + :param character_dict_path: Path to the character dictionary file. + If None, a default (English-focused) list is used. + For Chinese models, this should point to the correct + character dictionary file provided with the model. + """ + self.characters = [] + if character_dict_path and os.path.exists(character_dict_path): + with open(character_dict_path, "r", encoding="utf-8") as f: + self.characters = ["blank"] + [ + line.strip() for line in f if line.strip() + ] + else: + self.characters = [ + "blank", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "[", + "\\", + "]", + "^", + "_", + "`", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "{", + "|", + "}", + "~", + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + " ", + " ", + ] - The character set includes digits, letters, special characters, and a "blank" token - (used by the CTC model for decoding purposes). A character map is created to map - indices to characters. - """ - self.characters = [ - "blank", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - ":", - ";", - "<", - "=", - ">", - "?", - "@", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "[", - "\\", - "]", - "^", - "_", - "`", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "{", - "|", - "}", - "~", - "!", - '"', - "#", - "$", - "%", - "&", - "'", - "(", - ")", - "*", - "+", - ",", - "-", - ".", - "/", - " ", - " ", - ] self.char_map = {i: char for i, char in enumerate(self.characters)} def __call__( diff --git a/frigate/embeddings/onnx/lpr_embedding.py b/frigate/embeddings/onnx/lpr_embedding.py index ac981da8d..1b5b9acd0 100644 --- a/frigate/embeddings/onnx/lpr_embedding.py +++ b/frigate/embeddings/onnx/lpr_embedding.py @@ -32,13 +32,15 @@ class PaddleOCRDetection(BaseEmbedding): device: str = "AUTO", ): model_file = ( - "detection-large.onnx" if model_size == "large" else "detection-small.onnx" + "detection_v5-large.onnx" + if model_size == "large" + else "detection_v5-small.onnx" ) super().__init__( model_name="paddleocr-onnx", model_file=model_file, download_urls={ - model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/{model_file}" + model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v5/{model_file}" }, ) self.requestor = requestor @@ -156,9 +158,10 @@ class PaddleOCRRecognition(BaseEmbedding): ): super().__init__( model_name="paddleocr-onnx", - model_file="recognition.onnx", + model_file="recognition_v4.onnx", download_urls={ - "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" + "recognition_v4.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/recognition_v4.onnx", + "ppocr_keys_v1.txt": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/ppocr_keys_v1.txt", }, ) self.requestor = requestor