Catch invalid character index in lpr CTC decoder (#18825)

This commit is contained in:
Josh Hawkins 2025-06-21 21:44:37 -05:00 committed by GitHub
parent 55c6008ff0
commit 93a3cf8fb9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1615,9 +1615,9 @@ class CTCDecoder:
self.characters = []
if character_dict_path and os.path.exists(character_dict_path):
with open(character_dict_path, "r", encoding="utf-8") as f:
self.characters = ["blank"] + [
line.strip() for line in f if line.strip()
]
self.characters = (
["blank"] + [line.strip() for line in f if line.strip()] + [" "]
)
else:
self.characters = [
"blank",
@ -1752,7 +1752,7 @@ class CTCDecoder:
merged_path.append(char_index)
merged_probs.append(seq_log_probs[t, char_index])
result = "".join(self.char_map[idx] for idx in merged_path)
result = "".join(self.char_map.get(idx, "") for idx in merged_path)
results.append(result)
confidence = np.exp(merged_probs).tolist()