Auto convert ONNX models to RKNN format (#19674)

* Implement base rknn conversion

* Remove unused

* Formatting

* Add model conversion lock so it doesn't break when multiple detectors are defined

* Ignore unused impor
t
This commit is contained in:
Nicolas Mowen 2025-08-20 15:15:57 -06:00 committed by GitHub
parent 6e3b40eaee
commit 2236ecf23f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 427 additions and 1 deletions

View File

@ -12,6 +12,7 @@ from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
from frigate.util.model import post_process_yolo
from frigate.util.rknn_converter import auto_convert_model
logger = logging.getLogger(__name__)
@ -94,6 +95,30 @@ class Rknn(DetectionApi):
# user provided models should be a path and contain a "/"
if "/" in model_path:
model_props["preset"] = False
# Check if this is an ONNX model or model without extension that needs conversion
if model_path.endswith(".onnx") or not os.path.splitext(model_path)[1]:
# Try to auto-convert to RKNN format
logger.info(
f"Attempting to auto-convert {model_path} to RKNN format..."
)
# Determine model type from config
model_type = self.detector_config.model.model_type
# Auto-convert the model
converted_path = auto_convert_model(model_path, model_type.value)
if converted_path:
model_props["path"] = converted_path
logger.info(f"Successfully converted model to: {converted_path}")
else:
# Fall back to original path if conversion fails
logger.warning(
f"Failed to convert {model_path} to RKNN format, using original path"
)
model_props["path"] = model_path
else:
model_props["path"] = model_path
else:
model_props["preset"] = True

View File

@ -0,0 +1,401 @@
"""RKNN model conversion utility for Frigate."""
import fcntl
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
MODEL_TYPE_CONFIGS = {
"yolo-generic": {
"mean_values": [[0, 0, 0]],
"std_values": [[255, 255, 255]],
"target_platform": None, # Will be set dynamically
},
"yolonas": {
"mean_values": [[0, 0, 0]],
"std_values": [[255, 255, 255]],
"target_platform": None, # Will be set dynamically
},
"yolox": {
"mean_values": [[0, 0, 0]],
"std_values": [[255, 255, 255]],
"target_platform": None, # Will be set dynamically
},
}
def ensure_torch_dependencies() -> bool:
"""Dynamically install torch dependencies if not available."""
try:
import torch # type: ignore
logger.debug("PyTorch is already available")
return True
except ImportError:
logger.info("PyTorch not found, attempting to install...")
try:
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"install",
"--break-system-packages",
"torch",
"torchvision",
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
import torch # type: ignore # noqa: F401
logger.info("PyTorch installed successfully")
return True
except (subprocess.CalledProcessError, ImportError) as e:
logger.error(f"Failed to install PyTorch: {e}")
return False
def ensure_rknn_toolkit() -> bool:
"""Ensure RKNN toolkit is available."""
try:
import rknn # type: ignore # noqa: F401
from rknn.api import RKNN # type: ignore # noqa: F401
logger.debug("RKNN toolkit is already available")
return True
except ImportError:
logger.error("RKNN toolkit not found. Please ensure it's installed.")
return False
def get_soc_type() -> Optional[str]:
"""Get the SoC type from device tree."""
try:
with open("/proc/device-tree/compatible") as file:
soc = file.read().split(",")[-1].strip("\x00")
return soc
except FileNotFoundError:
logger.warning("Could not determine SoC type from device tree")
return None
def convert_onnx_to_rknn(
onnx_path: str,
output_path: str,
model_type: str,
quantization: bool = False,
soc: Optional[str] = None,
) -> bool:
"""
Convert ONNX model to RKNN format.
Args:
onnx_path: Path to input ONNX model
output_path: Path for output RKNN model
model_type: Type of model (yolo-generic, yolonas, yolox, ssd)
quantization: Whether to use 8-bit quantization (i8) or 16-bit float (fp16)
soc: Target SoC platform (auto-detected if None)
Returns:
True if conversion successful, False otherwise
"""
if not ensure_torch_dependencies():
logger.error("PyTorch dependencies not available")
return False
if not ensure_rknn_toolkit():
logger.error("RKNN toolkit not available")
return False
# Get SoC type if not provided
if soc is None:
soc = get_soc_type()
if soc is None:
logger.error("Could not determine SoC type")
return False
# Get model config for the specified type
if model_type not in MODEL_TYPE_CONFIGS:
logger.error(f"Unsupported model type: {model_type}")
return False
config = MODEL_TYPE_CONFIGS[model_type].copy()
config["target_platform"] = soc
try:
from rknn.api import RKNN # type: ignore
logger.info(f"Converting {onnx_path} to RKNN format for {soc}")
rknn = RKNN(verbose=True)
rknn.config(**config)
if rknn.load_onnx(model=onnx_path) != 0:
logger.error("Failed to load ONNX model")
return False
if rknn.build(do_quantization=quantization) != 0:
logger.error("Failed to build RKNN model")
return False
if rknn.export_rknn(output_path) != 0:
logger.error("Failed to export RKNN model")
return False
logger.info(f"Successfully converted model to {output_path}")
return True
except Exception as e:
logger.error(f"Error during RKNN conversion: {e}")
return False
def cleanup_stale_lock(lock_file_path: Path) -> bool:
"""
Clean up a stale lock file if it exists and is old.
Args:
lock_file_path: Path to the lock file
Returns:
True if lock was cleaned up, False otherwise
"""
try:
if lock_file_path.exists():
# Check if lock file is older than 10 minutes (stale)
lock_age = time.time() - lock_file_path.stat().st_mtime
if lock_age > 600: # 10 minutes
logger.warning(
f"Removing stale lock file: {lock_file_path} (age: {lock_age:.1f}s)"
)
lock_file_path.unlink()
return True
except Exception as e:
logger.error(f"Error cleaning up stale lock: {e}")
return False
def acquire_conversion_lock(lock_file_path: Path, timeout: int = 300) -> bool:
"""
Acquire a file-based lock for model conversion.
Args:
lock_file_path: Path to the lock file
timeout: Maximum time to wait for lock in seconds
Returns:
True if lock acquired, False if timeout or error
"""
try:
lock_file_path.parent.mkdir(parents=True, exist_ok=True)
cleanup_stale_lock(lock_file_path)
lock_fd = os.open(lock_file_path, os.O_CREAT | os.O_RDWR)
# Try to acquire exclusive lock
start_time = time.time()
while time.time() - start_time < timeout:
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
# Lock acquired successfully
logger.debug(f"Acquired conversion lock: {lock_file_path}")
return True
except (OSError, IOError):
# Lock is held by another process, wait and retry
if time.time() - start_time >= timeout:
logger.warning(
f"Timeout waiting for conversion lock: {lock_file_path}"
)
os.close(lock_fd)
return False
logger.debug("Waiting for conversion lock to be released...")
time.sleep(1)
os.close(lock_fd)
return False
except Exception as e:
logger.error(f"Error acquiring conversion lock: {e}")
return False
def release_conversion_lock(lock_file_path: Path) -> None:
"""
Release the conversion lock.
Args:
lock_file_path: Path to the lock file
"""
try:
if lock_file_path.exists():
lock_file_path.unlink()
logger.debug(f"Released conversion lock: {lock_file_path}")
except Exception as e:
logger.error(f"Error releasing conversion lock: {e}")
def is_lock_stale(lock_file_path: Path, max_age: int = 600) -> bool:
"""
Check if a lock file is stale (older than max_age seconds).
Args:
lock_file_path: Path to the lock file
max_age: Maximum age in seconds before considering lock stale
Returns:
True if lock is stale, False otherwise
"""
try:
if lock_file_path.exists():
lock_age = time.time() - lock_file_path.stat().st_mtime
return lock_age > max_age
except Exception:
pass
return False
def wait_for_conversion_completion(
rknn_path: Path, lock_file_path: Path, timeout: int = 300
) -> bool:
"""
Wait for another process to complete the conversion.
Args:
rknn_path: Path to the expected RKNN model
lock_file_path: Path to the lock file to monitor
timeout: Maximum time to wait in seconds
Returns:
True if RKNN model appears, False if timeout
"""
start_time = time.time()
while time.time() - start_time < timeout:
# Check if RKNN model appeared
if rknn_path.exists():
logger.info(f"RKNN model appeared: {rknn_path}")
return True
# Check if lock file is gone (conversion completed or failed)
if not lock_file_path.exists():
logger.info("Lock file removed, checking for RKNN model...")
if rknn_path.exists():
logger.info(f"RKNN model found after lock removal: {rknn_path}")
return True
else:
logger.warning(
"Lock file removed but RKNN model not found, conversion may have failed"
)
return False
# Check if lock is stale
if is_lock_stale(lock_file_path):
logger.warning("Lock file is stale, attempting to clean up and retry...")
cleanup_stale_lock(lock_file_path)
# Try to acquire lock again
if acquire_conversion_lock(lock_file_path, timeout=60):
try:
# Check if RKNN file appeared while waiting
if rknn_path.exists():
logger.info(f"RKNN model appeared while waiting: {rknn_path}")
return str(rknn_path)
# Convert ONNX to RKNN
logger.info(
f"Retrying conversion of {rknn_path} after stale lock cleanup..."
)
# Get the original model path from rknn_path
base_path = rknn_path.parent / rknn_path.stem
onnx_path = base_path.with_suffix(".onnx")
if onnx_path.exists():
if convert_onnx_to_rknn(
str(onnx_path), str(rknn_path), "yolo-generic", False
):
return str(rknn_path)
logger.error("Failed to convert model after stale lock cleanup")
return None
finally:
release_conversion_lock(lock_file_path)
logger.debug("Waiting for RKNN model to appear...")
time.sleep(1)
logger.warning(f"Timeout waiting for RKNN model: {rknn_path}")
return False
def auto_convert_model(
model_path: str, model_type: str, quantization: bool = False
) -> Optional[str]:
"""
Automatically convert a model to RKNN format if needed.
Args:
model_path: Path to the model file
model_type: Type of the model
quantization: Whether to use quantization
Returns:
Path to the RKNN model if successful, None otherwise
"""
if model_path.endswith(".rknn"):
return model_path
# Check if equivalent .rknn file exists
base_path = Path(model_path)
if base_path.suffix.lower() in [".onnx", ""]:
base_name = base_path.stem if base_path.suffix else base_path.name
rknn_path = base_path.parent / f"{base_name}.rknn"
if rknn_path.exists():
logger.info(f"Found existing RKNN model: {rknn_path}")
return str(rknn_path)
lock_file_path = base_path.parent / f"{base_name}.conversion.lock"
if acquire_conversion_lock(lock_file_path):
try:
if rknn_path.exists():
logger.info(
f"RKNN model appeared while waiting for lock: {rknn_path}"
)
return str(rknn_path)
logger.info(f"Converting {model_path} to RKNN format...")
rknn_path.parent.mkdir(parents=True, exist_ok=True)
if convert_onnx_to_rknn(
str(base_path), str(rknn_path), model_type, quantization
):
return str(rknn_path)
else:
logger.error(f"Failed to convert {model_path} to RKNN format")
return None
finally:
release_conversion_lock(lock_file_path)
else:
logger.info(
f"Another process is converting {model_path}, waiting for completion..."
)
if wait_for_conversion_completion(rknn_path, lock_file_path):
return str(rknn_path)
else:
logger.error(f"Timeout waiting for conversion of {model_path}")
return None
return None