blakeblackshear.frigate/frigate/util/model.py

"""Model Utils"""

import logging
import os
from typing import Any, Optional

import numpy as np
import onnxruntime as ort
from playhouse.sqliteq import SqliteQueueDatabase
from sklearn.preprocessing import LabelEncoder, Normalizer
from sklearn.svm import SVC

from frigate.util.builtin import deserialize

try:
    import openvino as ov
except ImportError:
    # openvino is not included
    pass

logger = logging.getLogger(__name__)


def get_ort_providers(
    force_cpu: bool = False, device: str = "AUTO", requires_fp16: bool = False
) -> tuple[list[str], list[dict[str, any]]]:
    if force_cpu:
        return (
            ["CPUExecutionProvider"],
            [
                {
                    "enable_cpu_mem_arena": False,
                }
            ],
        )

    providers = []
    options = []

    for provider in ort.get_available_providers():
        if provider == "CUDAExecutionProvider":
            device_id = 0 if not device.isdigit() else int(device)
            providers.append(provider)
            options.append(
                {
                    "arena_extend_strategy": "kSameAsRequested",
                    "device_id": device_id,
                }
            )
        elif provider == "TensorrtExecutionProvider":
            # TensorrtExecutionProvider uses too much memory without options to control it
            # so it is not enabled by default
            if device == "Tensorrt":
                os.makedirs(
                    "/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True
                )
                device_id = 0 if not device.isdigit() else int(device)
                providers.append(provider)
                options.append(
                    {
                        "device_id": device_id,
                        "trt_fp16_enable": requires_fp16
                        and os.environ.get("USE_FP_16", "True") != "False",
                        "trt_timing_cache_enable": True,
                        "trt_engine_cache_enable": True,
                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
                    }
                )
            else:
                continue
        elif provider == "OpenVINOExecutionProvider":
            os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
            providers.append(provider)
            options.append(
                {
                    "arena_extend_strategy": "kSameAsRequested",
                    "cache_dir": "/config/model_cache/openvino/ort",
                    "device_type": device,
                }
            )
        elif provider == "CPUExecutionProvider":
            providers.append(provider)
            options.append(
                {
                    "enable_cpu_mem_arena": False,
                }
            )
        else:
            providers.append(provider)
            options.append({})

    return (providers, options)


class ONNXModelRunner:
    """Run onnx models optimally based on available hardware."""

    def __init__(self, model_path: str, device: str, requires_fp16: bool = False):
        self.model_path = model_path
        self.ort: ort.InferenceSession = None
        self.ov: ov.Core = None
        providers, options = get_ort_providers(device == "CPU", device, requires_fp16)
        self.interpreter = None

        if "OpenVINOExecutionProvider" in providers:
            try:
                # use OpenVINO directly
                self.type = "ov"
                self.ov = ov.Core()
                self.ov.set_property(
                    {ov.properties.cache_dir: "/config/model_cache/openvino"}
                )
                self.interpreter = self.ov.compile_model(
                    model=model_path, device_name=device
                )
            except Exception as e:
                logger.warning(
                    f"OpenVINO failed to build model, using CPU instead: {e}"
                )
                self.interpreter = None

        # Use ONNXRuntime
        if self.interpreter is None:
            self.type = "ort"
            self.ort = ort.InferenceSession(
                model_path,
                providers=providers,
                provider_options=options,
            )

    def get_input_names(self) -> list[str]:
        if self.type == "ov":
            input_names = []

            for input in self.interpreter.inputs:
                input_names.extend(input.names)

            return input_names
        elif self.type == "ort":
            return [input.name for input in self.ort.get_inputs()]

    def run(self, input: dict[str, Any]) -> Any:
        if self.type == "ov":
            infer_request = self.interpreter.create_infer_request()
            input_tensor = list(input.values())

            if len(input_tensor) == 1:
                input_tensor = ov.Tensor(array=input_tensor[0])
            else:
                input_tensor = ov.Tensor(array=input_tensor)

            infer_request.infer(input_tensor)
            return [infer_request.get_output_tensor().data]
        elif self.type == "ort":
            return self.ort.run(None, input)


class FaceClassificationModel:
    def __init__(self, db: SqliteQueueDatabase):
        self.db = db
        self.labeler: Optional[LabelEncoder] = None
        self.classifier: Optional[SVC] = None

    def __build_classifier(self) -> None:
        faces: list[tuple[str, bytes]] = self.db.execute_sql(
            "SELECT id, face_embedding FROM vec_faces"
        ).fetchall()
        embeddings = np.array([deserialize(f[1]) for f in faces])
        self.labeler = LabelEncoder()
        norms = Normalizer(norm="l2").transform(embeddings)
        labels = self.labeler.fit_transform([f[0].split("-")[0] for f in faces])
        self.classifier = SVC(kernel="linear", probability=True)
        self.classifier.fit(norms, labels)

    def clear_classifier(self) -> None:
        self.classifier = None
        self.labeler = None

    def classify_face(self, embedding: np.ndarray) -> Optional[tuple[str, float]]:
        if not self.classifier:
            self.__build_classifier()

        res = self.classifier.predict([embedding])

        if res is None:
            return None

        label = res[0]
        probabilities = self.classifier.predict_proba([embedding])[0]
        return (
            self.labeler.inverse_transform([label])[0],
            round(probabilities[label], 2),
        )
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`"""Model Utils"""`

Various fixes (#14786) * Catch openvino error * Remove clip deletion * Update deletion text * Fix timeline not respecting timezone config * Tweaks * More timezone fixes * Fix * More timezone fixes * Fix shm docs 2024-11-04 15:07:57 +01:00			`import logging`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`import os`
Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency 2024-11-06 17:07:29 +01:00			`from typing import Any, Optional`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00
Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency 2024-11-06 17:07:29 +01:00			`import numpy as np`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`import onnxruntime as ort`
Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency 2024-11-06 17:07:29 +01:00			`from playhouse.sqliteq import SqliteQueueDatabase`
			`from sklearn.preprocessing import LabelEncoder, Normalizer`
			`from sklearn.svm import SVC`

			`from frigate.util.builtin import deserialize`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00
Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy 2024-10-11 18:47:23 +02:00			`try:`
			`import openvino as ov`
			`except ImportError:`
			`# openvino is not included`
			`pass`

Various fixes (#14786) * Catch openvino error * Remove clip deletion * Update deletion text * Fix timeline not respecting timezone config * Tweaks * More timezone fixes * Fix * More timezone fixes * Fix shm docs 2024-11-04 15:07:57 +01:00			`logger = logging.getLogger(__name__)`

Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00
			`def get_ort_providers(`
Add ability to configure model input dtype (#14659) * Add input type for dtype * Add ability to manually enable TRT execution provider * Formatting 2024-10-29 16:28:05 +01:00			`force_cpu: bool = False, device: str = "AUTO", requires_fp16: bool = False`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`) -> tuple[list[str], list[dict[str, any]]]:`
			`if force_cpu:`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`return (`
			`["CPUExecutionProvider"],`
			`[`
			`{`
Model and genai fixes (#14481) * disable mem arena in options for cpu only * add try/except around ollama initialization * update docs 2024-10-21 17:00:45 +02:00			`"enable_cpu_mem_arena": False,`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`}`
			`],`
			`)`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`providers = []`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`options = []`

Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`for provider in ort.get_available_providers():`
			`if provider == "CUDAExecutionProvider":`
Add support for specifying tensorrt device (#14898) 2024-11-10 15:43:24 +01:00			`device_id = 0 if not device.isdigit() else int(device)`
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`providers.append(provider)`
			`options.append(`
			`{`
			`"arena_extend_strategy": "kSameAsRequested",`
Add support for specifying tensorrt device (#14898) 2024-11-10 15:43:24 +01:00			`"device_id": device_id,`
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`}`
			`)`
			`elif provider == "TensorrtExecutionProvider":`
			`# TensorrtExecutionProvider uses too much memory without options to control it`
Add ability to configure model input dtype (#14659) * Add input type for dtype * Add ability to manually enable TRT execution provider * Formatting 2024-10-29 16:28:05 +01:00			`# so it is not enabled by default`
			`if device == "Tensorrt":`
			`os.makedirs(`
			`"/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True`
			`)`
Add support for specifying tensorrt device (#14898) 2024-11-10 15:43:24 +01:00			`device_id = 0 if not device.isdigit() else int(device)`
Add ability to configure model input dtype (#14659) * Add input type for dtype * Add ability to manually enable TRT execution provider * Formatting 2024-10-29 16:28:05 +01:00			`providers.append(provider)`
			`options.append(`
			`{`
Add support for specifying tensorrt device (#14898) 2024-11-10 15:43:24 +01:00			`"device_id": device_id,`
Add ability to configure model input dtype (#14659) * Add input type for dtype * Add ability to manually enable TRT execution provider * Formatting 2024-10-29 16:28:05 +01:00			`"trt_fp16_enable": requires_fp16`
			`and os.environ.get("USE_FP_16", "True") != "False",`
			`"trt_timing_cache_enable": True,`
			`"trt_engine_cache_enable": True,`
			`"trt_timing_cache_path": "/config/model_cache/tensorrt/ort",`
			`"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",`
			`}`
			`)`
			`else:`
			`continue`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`elif provider == "OpenVINOExecutionProvider":`
			`os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)`
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`providers.append(provider)`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`options.append(`
			`{`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`"arena_extend_strategy": "kSameAsRequested",`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`"cache_dir": "/config/model_cache/openvino/ort",`
Add ability to configure model input dtype (#14659) * Add input type for dtype * Add ability to manually enable TRT execution provider * Formatting 2024-10-29 16:28:05 +01:00			`"device_type": device,`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`}`
			`)`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`elif provider == "CPUExecutionProvider":`
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`providers.append(provider)`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`options.append(`
			`{`
Model and genai fixes (#14481) * disable mem arena in options for cpu only * add try/except around ollama initialization * update docs 2024-10-21 17:00:45 +02:00			`"enable_cpu_mem_arena": False,`
Reduce onnx memory usage (#14285) 2024-10-11 20:03:47 +02:00			`}`
			`)`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`else:`
Detection adjustments (#14329) 2024-10-14 04:34:51 +02:00			`providers.append(provider)`
Fixes & Tweaks (#14013) * Rework to create util for onnx initialization * Fix shm log * Fix onClick exceptoins 2024-09-27 15:41:48 +02:00			`options.append({})`

			`return (providers, options)`
Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy 2024-10-11 18:47:23 +02:00

			`class ONNXModelRunner:`
			`"""Run onnx models optimally based on available hardware."""`

			`def __init__(self, model_path: str, device: str, requires_fp16: bool = False):`
			`self.model_path = model_path`
			`self.ort: ort.InferenceSession = None`
			`self.ov: ov.Core = None`
			`providers, options = get_ort_providers(device == "CPU", device, requires_fp16)`
Various fixes (#14786) * Catch openvino error * Remove clip deletion * Update deletion text * Fix timeline not respecting timezone config * Tweaks * More timezone fixes * Fix * More timezone fixes * Fix shm docs 2024-11-04 15:07:57 +01:00			`self.interpreter = None`
Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy 2024-10-11 18:47:23 +02:00
			`if "OpenVINOExecutionProvider" in providers:`
Various fixes (#14786) * Catch openvino error * Remove clip deletion * Update deletion text * Fix timeline not respecting timezone config * Tweaks * More timezone fixes * Fix * More timezone fixes * Fix shm docs 2024-11-04 15:07:57 +01:00			`try:`
			`# use OpenVINO directly`
			`self.type = "ov"`
			`self.ov = ov.Core()`
			`self.ov.set_property(`
			`{ov.properties.cache_dir: "/config/model_cache/openvino"}`
			`)`
			`self.interpreter = self.ov.compile_model(`
			`model=model_path, device_name=device`
			`)`
			`except Exception as e:`
			`logger.warning(`
			`f"OpenVINO failed to build model, using CPU instead: {e}"`
			`)`
			`self.interpreter = None`

			`# Use ONNXRuntime`
			`if self.interpreter is None:`
Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy 2024-10-11 18:47:23 +02:00			`self.type = "ort"`
			`self.ort = ort.InferenceSession(`
Misc bugfixes and improvements (#14460) * only save a fixed number of thumbnails if genai is enabled * disable cpu_mem_arena to save on memory until its actually needed * fix search settings pane so it actually saves to the config 2024-10-20 22:14:51 +02:00			`model_path,`
			`providers=providers,`
			`provider_options=options,`
Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy 2024-10-11 18:47:23 +02:00			`)`

			`def get_input_names(self) -> list[str]:`
			`if self.type == "ov":`
			`input_names = []`

			`for input in self.interpreter.inputs:`
			`input_names.extend(input.names)`

			`return input_names`
			`elif self.type == "ort":`
			`return [input.name for input in self.ort.get_inputs()]`

			`def run(self, input: dict[str, Any]) -> Any:`
			`if self.type == "ov":`
			`infer_request = self.interpreter.create_infer_request()`
			`input_tensor = list(input.values())`

			`if len(input_tensor) == 1:`
			`input_tensor = ov.Tensor(array=input_tensor[0])`
			`else:`
			`input_tensor = ov.Tensor(array=input_tensor)`

			`infer_request.infer(input_tensor)`
			`return [infer_request.get_output_tensor().data]`
			`elif self.type == "ort":`
			`return self.ort.run(None, input)`
Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency 2024-11-06 17:07:29 +01:00

			`class FaceClassificationModel:`
			`def __init__(self, db: SqliteQueueDatabase):`
			`self.db = db`
			`self.labeler: Optional[LabelEncoder] = None`
			`self.classifier: Optional[SVC] = None`

			`def __build_classifier(self) -> None:`
			`faces: list[tuple[str, bytes]] = self.db.execute_sql(`
			`"SELECT id, face_embedding FROM vec_faces"`
			`).fetchall()`
			`embeddings = np.array([deserialize(f[1]) for f in faces])`
			`self.labeler = LabelEncoder()`
			`norms = Normalizer(norm="l2").transform(embeddings)`
			`labels = self.labeler.fit_transform([f[0].split("-")[0] for f in faces])`
			`self.classifier = SVC(kernel="linear", probability=True)`
			`self.classifier.fit(norms, labels)`

			`def clear_classifier(self) -> None:`
			`self.classifier = None`
			`self.labeler = None`

			`def classify_face(self, embedding: np.ndarray) -> Optional[tuple[str, float]]:`
			`if not self.classifier:`
			`self.__build_classifier()`

			`res = self.classifier.predict([embedding])`

Fix check 2024-11-08 00:26:49 +01:00			`if res is None:`
Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency 2024-11-06 17:07:29 +01:00			`return None`

			`label = res[0]`
			`probabilities = self.classifier.predict_proba([embedding])[0]`
			`return (`
			`self.labeler.inverse_transform([label])[0],`
			`round(probabilities[label], 2),`
			`)`