Implement common post_processing (#11408)

* implement common post_processing * fix formatting * rename yolonas to post_process_yolonas
2025-09-14 17:52:10 +02:00 · 2024-05-17 18:50:45 +02:00 · 2024-05-17 18:50:45 +02:00 · a70dd02788
commit a70dd02788
parent 97f5ba0145
3 changed files with 66 additions and 2 deletions
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@ -80,7 +80,7 @@ model:
  # Valid values are nhwc or nchw (default: shown below)
  input_tensor: nhwc
  # Optional: Object detection model type, currently only used with the OpenVINO detector
-  # Valid values are ssd, yolox (default: shown below)
+  # Valid values are ssd, yolox, yolonas (default: shown below)
  model_type: ssd
  # Optional: Label name modifications. These are merged into the standard labelmap.
  labelmap:
--- a/frigate/detectors/detection_api.py
+++ b/frigate/detectors/detection_api.py
@ -1,6 +1,10 @@
 import logging
 from abc import ABC, abstractmethod

+import numpy as np
+
+from frigate.detectors.detector_config import ModelTypeEnum
+
 logger = logging.getLogger(__name__)


@ -9,8 +13,67 @@ class DetectionApi(ABC):

    @abstractmethod
    def __init__(self, detector_config):
-        pass
+        self.detector_config = detector_config
+        self.thresh = 0.5
+        self.height = detector_config.model.height
+        self.width = detector_config.model.width

    @abstractmethod
    def detect_raw(self, tensor_input):
        pass
+
+    def post_process_yolonas(self, output):
+        """
+        @param output: output of inference
+        expected shape: [np.array(1, N, 4), np.array(1, N, 80)]
+        where N depends on the input size e.g. N=2100 for 320x320 images
+
+        @return: best results: np.array(20, 6) where each row is
+        in this order (class_id, score, y1/height, x1/width, y2/height, x2/width)
+        """
+
+        N = output[0].shape[1]
+
+        boxes = output[0].reshape(N, 4)
+        scores = output[1].reshape(N, 80)
+
+        class_ids = np.argmax(scores, axis=1)
+        scores = scores[np.arange(N), class_ids]
+
+        args_best = np.argwhere(scores > self.thresh)[:, 0]
+
+        num_matches = len(args_best)
+        if num_matches == 0:
+            return np.zeros((20, 6), np.float32)
+        elif num_matches > 20:
+            args_best20 = np.argpartition(scores[args_best], -20)[-20:]
+            args_best = args_best[args_best20]
+
+        boxes = boxes[args_best]
+        class_ids = class_ids[args_best]
+        scores = scores[args_best]
+
+        boxes = np.transpose(
+            np.vstack(
+                (
+                    boxes[:, 1] / self.height,
+                    boxes[:, 0] / self.width,
+                    boxes[:, 3] / self.height,
+                    boxes[:, 2] / self.width,
+                )
+            )
+        )
+
+        results = np.hstack(
+            (class_ids[..., np.newaxis], scores[..., np.newaxis], boxes)
+        )
+
+        return np.resize(results, (20, 6))
+
+    def post_process(self, output):
+        if self.detector_config.model.model_type == ModelTypeEnum.yolonas:
+            return self.yolonas(output)
+        else:
+            raise ValueError(
+                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
+            )
--- a/frigate/detectors/detector_config.py
+++ b/frigate/detectors/detector_config.py
@ -30,6 +30,7 @@ class InputTensorEnum(str, Enum):
 class ModelTypeEnum(str, Enum):
    ssd = "ssd"
    yolox = "yolox"
+    yolonas = "yolonas"


 class ModelConfig(BaseModel):