Auto convert ONNX models to RKNN format (#19674)

* Implement base rknn conversion * Remove unused * Formatting * Add model conversion lock so it doesn't break when multiple detectors are defined * Ignore unused impor t
2025-09-14 17:52:10 +02:00 · 2025-08-20 15:15:57 -06:00 · 2025-08-20 15:15:57 -06:00 · 2236ecf23f
commit 2236ecf23f
parent 6e3b40eaee
2 changed files with 427 additions and 1 deletions
--- a/frigate/detectors/plugins/rknn.py
+++ b/frigate/detectors/plugins/rknn.py
@ -12,6 +12,7 @@ from frigate.const import MODEL_CACHE_DIR
 from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
 from frigate.util.model import post_process_yolo
+from frigate.util.rknn_converter import auto_convert_model

 logger = logging.getLogger(__name__)

@ -94,6 +95,30 @@ class Rknn(DetectionApi):
        # user provided models should be a path and contain a "/"
        if "/" in model_path:
            model_props["preset"] = False
+
+            # Check if this is an ONNX model or model without extension that needs conversion
+            if model_path.endswith(".onnx") or not os.path.splitext(model_path)[1]:
+                # Try to auto-convert to RKNN format
+                logger.info(
+                    f"Attempting to auto-convert {model_path} to RKNN format..."
+                )
+
+                # Determine model type from config
+                model_type = self.detector_config.model.model_type
+
+                # Auto-convert the model
+                converted_path = auto_convert_model(model_path, model_type.value)
+
+                if converted_path:
+                    model_props["path"] = converted_path
+                    logger.info(f"Successfully converted model to: {converted_path}")
+                else:
+                    # Fall back to original path if conversion fails
+                    logger.warning(
+                        f"Failed to convert {model_path} to RKNN format, using original path"
+                    )
+                    model_props["path"] = model_path
+            else:
                model_props["path"] = model_path
        else:
            model_props["preset"] = True
--- a/frigate/util/rknn_converter.py
+++ b/frigate/util/rknn_converter.py
@ -0,0 +1,401 @@
+"""RKNN model conversion utility for Frigate."""
+
+import fcntl
+import logging
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+MODEL_TYPE_CONFIGS = {
+    "yolo-generic": {
+        "mean_values": [[0, 0, 0]],
+        "std_values": [[255, 255, 255]],
+        "target_platform": None,  # Will be set dynamically
+    },
+    "yolonas": {
+        "mean_values": [[0, 0, 0]],
+        "std_values": [[255, 255, 255]],
+        "target_platform": None,  # Will be set dynamically
+    },
+    "yolox": {
+        "mean_values": [[0, 0, 0]],
+        "std_values": [[255, 255, 255]],
+        "target_platform": None,  # Will be set dynamically
+    },
+}
+
+
+def ensure_torch_dependencies() -> bool:
+    """Dynamically install torch dependencies if not available."""
+    try:
+        import torch  # type: ignore
+
+        logger.debug("PyTorch is already available")
+        return True
+    except ImportError:
+        logger.info("PyTorch not found, attempting to install...")
+
+        try:
+            subprocess.check_call(
+                [
+                    sys.executable,
+                    "-m",
+                    "pip",
+                    "install",
+                    "--break-system-packages",
+                    "torch",
+                    "torchvision",
+                ],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+
+            import torch  # type: ignore # noqa: F401
+
+            logger.info("PyTorch installed successfully")
+            return True
+        except (subprocess.CalledProcessError, ImportError) as e:
+            logger.error(f"Failed to install PyTorch: {e}")
+            return False
+
+
+def ensure_rknn_toolkit() -> bool:
+    """Ensure RKNN toolkit is available."""
+    try:
+        import rknn  # type: ignore  # noqa: F401
+        from rknn.api import RKNN  # type: ignore # noqa: F401
+
+        logger.debug("RKNN toolkit is already available")
+        return True
+    except ImportError:
+        logger.error("RKNN toolkit not found. Please ensure it's installed.")
+        return False
+
+
+def get_soc_type() -> Optional[str]:
+    """Get the SoC type from device tree."""
+    try:
+        with open("/proc/device-tree/compatible") as file:
+            soc = file.read().split(",")[-1].strip("\x00")
+            return soc
+    except FileNotFoundError:
+        logger.warning("Could not determine SoC type from device tree")
+        return None
+
+
+def convert_onnx_to_rknn(
+    onnx_path: str,
+    output_path: str,
+    model_type: str,
+    quantization: bool = False,
+    soc: Optional[str] = None,
+) -> bool:
+    """
+    Convert ONNX model to RKNN format.
+
+    Args:
+        onnx_path: Path to input ONNX model
+        output_path: Path for output RKNN model
+        model_type: Type of model (yolo-generic, yolonas, yolox, ssd)
+        quantization: Whether to use 8-bit quantization (i8) or 16-bit float (fp16)
+        soc: Target SoC platform (auto-detected if None)
+
+    Returns:
+        True if conversion successful, False otherwise
+    """
+    if not ensure_torch_dependencies():
+        logger.error("PyTorch dependencies not available")
+        return False
+
+    if not ensure_rknn_toolkit():
+        logger.error("RKNN toolkit not available")
+        return False
+
+    # Get SoC type if not provided
+    if soc is None:
+        soc = get_soc_type()
+        if soc is None:
+            logger.error("Could not determine SoC type")
+            return False
+
+    # Get model config for the specified type
+    if model_type not in MODEL_TYPE_CONFIGS:
+        logger.error(f"Unsupported model type: {model_type}")
+        return False
+
+    config = MODEL_TYPE_CONFIGS[model_type].copy()
+    config["target_platform"] = soc
+
+    try:
+        from rknn.api import RKNN  # type: ignore
+
+        logger.info(f"Converting {onnx_path} to RKNN format for {soc}")
+        rknn = RKNN(verbose=True)
+        rknn.config(**config)
+
+        if rknn.load_onnx(model=onnx_path) != 0:
+            logger.error("Failed to load ONNX model")
+            return False
+
+        if rknn.build(do_quantization=quantization) != 0:
+            logger.error("Failed to build RKNN model")
+            return False
+
+        if rknn.export_rknn(output_path) != 0:
+            logger.error("Failed to export RKNN model")
+            return False
+
+        logger.info(f"Successfully converted model to {output_path}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Error during RKNN conversion: {e}")
+        return False
+
+
+def cleanup_stale_lock(lock_file_path: Path) -> bool:
+    """
+    Clean up a stale lock file if it exists and is old.
+
+    Args:
+        lock_file_path: Path to the lock file
+
+    Returns:
+        True if lock was cleaned up, False otherwise
+    """
+    try:
+        if lock_file_path.exists():
+            # Check if lock file is older than 10 minutes (stale)
+            lock_age = time.time() - lock_file_path.stat().st_mtime
+            if lock_age > 600:  # 10 minutes
+                logger.warning(
+                    f"Removing stale lock file: {lock_file_path} (age: {lock_age:.1f}s)"
+                )
+                lock_file_path.unlink()
+                return True
+    except Exception as e:
+        logger.error(f"Error cleaning up stale lock: {e}")
+
+    return False
+
+
+def acquire_conversion_lock(lock_file_path: Path, timeout: int = 300) -> bool:
+    """
+    Acquire a file-based lock for model conversion.
+
+    Args:
+        lock_file_path: Path to the lock file
+        timeout: Maximum time to wait for lock in seconds
+
+    Returns:
+        True if lock acquired, False if timeout or error
+    """
+    try:
+        lock_file_path.parent.mkdir(parents=True, exist_ok=True)
+        cleanup_stale_lock(lock_file_path)
+        lock_fd = os.open(lock_file_path, os.O_CREAT | os.O_RDWR)
+
+        # Try to acquire exclusive lock
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            try:
+                fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                # Lock acquired successfully
+                logger.debug(f"Acquired conversion lock: {lock_file_path}")
+                return True
+            except (OSError, IOError):
+                # Lock is held by another process, wait and retry
+                if time.time() - start_time >= timeout:
+                    logger.warning(
+                        f"Timeout waiting for conversion lock: {lock_file_path}"
+                    )
+                    os.close(lock_fd)
+                    return False
+
+                logger.debug("Waiting for conversion lock to be released...")
+                time.sleep(1)
+
+        os.close(lock_fd)
+        return False
+
+    except Exception as e:
+        logger.error(f"Error acquiring conversion lock: {e}")
+        return False
+
+
+def release_conversion_lock(lock_file_path: Path) -> None:
+    """
+    Release the conversion lock.
+
+    Args:
+        lock_file_path: Path to the lock file
+    """
+    try:
+        if lock_file_path.exists():
+            lock_file_path.unlink()
+            logger.debug(f"Released conversion lock: {lock_file_path}")
+    except Exception as e:
+        logger.error(f"Error releasing conversion lock: {e}")
+
+
+def is_lock_stale(lock_file_path: Path, max_age: int = 600) -> bool:
+    """
+    Check if a lock file is stale (older than max_age seconds).
+
+    Args:
+        lock_file_path: Path to the lock file
+        max_age: Maximum age in seconds before considering lock stale
+
+    Returns:
+        True if lock is stale, False otherwise
+    """
+    try:
+        if lock_file_path.exists():
+            lock_age = time.time() - lock_file_path.stat().st_mtime
+            return lock_age > max_age
+    except Exception:
+        pass
+
+    return False
+
+
+def wait_for_conversion_completion(
+    rknn_path: Path, lock_file_path: Path, timeout: int = 300
+) -> bool:
+    """
+    Wait for another process to complete the conversion.
+
+    Args:
+        rknn_path: Path to the expected RKNN model
+        lock_file_path: Path to the lock file to monitor
+        timeout: Maximum time to wait in seconds
+
+    Returns:
+        True if RKNN model appears, False if timeout
+    """
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        # Check if RKNN model appeared
+        if rknn_path.exists():
+            logger.info(f"RKNN model appeared: {rknn_path}")
+            return True
+
+        # Check if lock file is gone (conversion completed or failed)
+        if not lock_file_path.exists():
+            logger.info("Lock file removed, checking for RKNN model...")
+            if rknn_path.exists():
+                logger.info(f"RKNN model found after lock removal: {rknn_path}")
+                return True
+            else:
+                logger.warning(
+                    "Lock file removed but RKNN model not found, conversion may have failed"
+                )
+                return False
+
+        # Check if lock is stale
+        if is_lock_stale(lock_file_path):
+            logger.warning("Lock file is stale, attempting to clean up and retry...")
+            cleanup_stale_lock(lock_file_path)
+            # Try to acquire lock again
+            if acquire_conversion_lock(lock_file_path, timeout=60):
+                try:
+                    # Check if RKNN file appeared while waiting
+                    if rknn_path.exists():
+                        logger.info(f"RKNN model appeared while waiting: {rknn_path}")
+                        return str(rknn_path)
+
+                    # Convert ONNX to RKNN
+                    logger.info(
+                        f"Retrying conversion of {rknn_path} after stale lock cleanup..."
+                    )
+
+                    # Get the original model path from rknn_path
+                    base_path = rknn_path.parent / rknn_path.stem
+                    onnx_path = base_path.with_suffix(".onnx")
+
+                    if onnx_path.exists():
+                        if convert_onnx_to_rknn(
+                            str(onnx_path), str(rknn_path), "yolo-generic", False
+                        ):
+                            return str(rknn_path)
+
+                    logger.error("Failed to convert model after stale lock cleanup")
+                    return None
+
+                finally:
+                    release_conversion_lock(lock_file_path)
+
+        logger.debug("Waiting for RKNN model to appear...")
+        time.sleep(1)
+
+    logger.warning(f"Timeout waiting for RKNN model: {rknn_path}")
+    return False
+
+
+def auto_convert_model(
+    model_path: str, model_type: str, quantization: bool = False
+) -> Optional[str]:
+    """
+    Automatically convert a model to RKNN format if needed.
+
+    Args:
+        model_path: Path to the model file
+        model_type: Type of the model
+        quantization: Whether to use quantization
+
+    Returns:
+        Path to the RKNN model if successful, None otherwise
+    """
+    if model_path.endswith(".rknn"):
+        return model_path
+
+    # Check if equivalent .rknn file exists
+    base_path = Path(model_path)
+    if base_path.suffix.lower() in [".onnx", ""]:
+        base_name = base_path.stem if base_path.suffix else base_path.name
+        rknn_path = base_path.parent / f"{base_name}.rknn"
+
+        if rknn_path.exists():
+            logger.info(f"Found existing RKNN model: {rknn_path}")
+            return str(rknn_path)
+
+        lock_file_path = base_path.parent / f"{base_name}.conversion.lock"
+
+        if acquire_conversion_lock(lock_file_path):
+            try:
+                if rknn_path.exists():
+                    logger.info(
+                        f"RKNN model appeared while waiting for lock: {rknn_path}"
+                    )
+                    return str(rknn_path)
+
+                logger.info(f"Converting {model_path} to RKNN format...")
+                rknn_path.parent.mkdir(parents=True, exist_ok=True)
+
+                if convert_onnx_to_rknn(
+                    str(base_path), str(rknn_path), model_type, quantization
+                ):
+                    return str(rknn_path)
+                else:
+                    logger.error(f"Failed to convert {model_path} to RKNN format")
+                    return None
+
+            finally:
+                release_conversion_lock(lock_file_path)
+        else:
+            logger.info(
+                f"Another process is converting {model_path}, waiting for completion..."
+            )
+
+            if wait_for_conversion_completion(rknn_path, lock_file_path):
+                return str(rknn_path)
+            else:
+                logger.error(f"Timeout waiting for conversion of {model_path}")
+                return None
+
+    return None