Upgrade PaddleOCR models to v4 (rec) and v5 (det) (#18505)

The PP_OCRv5 text detection models have greatly improved over v3. The v5 recognition model makes improvements to challenging handwriting and uncommon characters, which are not necessary for LPR, so using v4 seemed like a better choice to continue to keep inference time as low as possible. Also included is the full dictionary for Chinese character support.
This commit is contained in:
Josh Hawkins 2025-06-01 15:21:12 -05:00 committed by Nicolas Mowen
parent 21807ca255
commit 97bea8487d
2 changed files with 126 additions and 111 deletions

View File

@ -22,7 +22,7 @@ from frigate.comms.event_metadata_updater import (
EventMetadataPublisher, EventMetadataPublisher,
EventMetadataTypeEnum, EventMetadataTypeEnum,
) )
from frigate.const import CLIPS_DIR from frigate.const import CLIPS_DIR, MODEL_CACHE_DIR
from frigate.embeddings.onnx.lpr_embedding import LPR_EMBEDDING_SIZE from frigate.embeddings.onnx.lpr_embedding import LPR_EMBEDDING_SIZE
from frigate.types import TrackedObjectUpdateTypesEnum from frigate.types import TrackedObjectUpdateTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed from frigate.util.builtin import EventsPerSecond, InferenceSpeed
@ -43,7 +43,11 @@ class LicensePlateProcessingMixin:
self.plates_det_second = EventsPerSecond() self.plates_det_second = EventsPerSecond()
self.plates_det_second.start() self.plates_det_second.start()
self.event_metadata_publisher = EventMetadataPublisher() self.event_metadata_publisher = EventMetadataPublisher()
self.ctc_decoder = CTCDecoder() self.ctc_decoder = CTCDecoder(
character_dict_path=os.path.join(
MODEL_CACHE_DIR, "paddleocr-onnx", "ppocr_keys_v1.txt"
)
)
self.batch_size = 6 self.batch_size = 6
# Detection specific parameters # Detection specific parameters
@ -1595,113 +1599,121 @@ class CTCDecoder:
for each decoded character sequence. for each decoded character sequence.
""" """
def __init__(self): def __init__(self, character_dict_path=None):
""" """
Initialize the CTCDecoder with a list of characters and a character map. Initializes the CTCDecoder.
:param character_dict_path: Path to the character dictionary file.
If None, a default (English-focused) list is used.
For Chinese models, this should point to the correct
character dictionary file provided with the model.
"""
self.characters = []
if character_dict_path and os.path.exists(character_dict_path):
with open(character_dict_path, "r", encoding="utf-8") as f:
self.characters = ["blank"] + [
line.strip() for line in f if line.strip()
]
else:
self.characters = [
"blank",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
":",
";",
"<",
"=",
">",
"?",
"@",
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"[",
"\\",
"]",
"^",
"_",
"`",
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"{",
"|",
"}",
"~",
"!",
'"',
"#",
"$",
"%",
"&",
"'",
"(",
")",
"*",
"+",
",",
"-",
".",
"/",
" ",
" ",
]
The character set includes digits, letters, special characters, and a "blank" token
(used by the CTC model for decoding purposes). A character map is created to map
indices to characters.
"""
self.characters = [
"blank",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
":",
";",
"<",
"=",
">",
"?",
"@",
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"[",
"\\",
"]",
"^",
"_",
"`",
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"{",
"|",
"}",
"~",
"!",
'"',
"#",
"$",
"%",
"&",
"'",
"(",
")",
"*",
"+",
",",
"-",
".",
"/",
" ",
" ",
]
self.char_map = {i: char for i, char in enumerate(self.characters)} self.char_map = {i: char for i, char in enumerate(self.characters)}
def __call__( def __call__(

View File

@ -32,13 +32,15 @@ class PaddleOCRDetection(BaseEmbedding):
device: str = "AUTO", device: str = "AUTO",
): ):
model_file = ( model_file = (
"detection-large.onnx" if model_size == "large" else "detection-small.onnx" "detection_v5-large.onnx"
if model_size == "large"
else "detection_v5-small.onnx"
) )
super().__init__( super().__init__(
model_name="paddleocr-onnx", model_name="paddleocr-onnx",
model_file=model_file, model_file=model_file,
download_urls={ download_urls={
model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/{model_file}" model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v5/{model_file}"
}, },
) )
self.requestor = requestor self.requestor = requestor
@ -156,9 +158,10 @@ class PaddleOCRRecognition(BaseEmbedding):
): ):
super().__init__( super().__init__(
model_name="paddleocr-onnx", model_name="paddleocr-onnx",
model_file="recognition.onnx", model_file="recognition_v4.onnx",
download_urls={ download_urls={
"recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" "recognition_v4.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/recognition_v4.onnx",
"ppocr_keys_v1.txt": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/ppocr_keys_v1.txt",
}, },
) )
self.requestor = requestor self.requestor = requestor