blakeblackshear.frigate/frigate/util/downloader.py

import logging
import os
import threading
import time
from pathlib import Path
from typing import Callable, List

import requests

from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import UPDATE_MODEL_STATE
from frigate.types import ModelStatusTypesEnum

logger = logging.getLogger(__name__)


class FileLock:
    def __init__(self, path):
        self.path = path
        self.lock_file = f"{path}.lock"

        # we have not acquired the lock yet so it should not exist
        if os.path.exists(self.lock_file):
            try:
                os.remove(self.lock_file)
            except Exception:
                pass

    def acquire(self):
        parent_dir = os.path.dirname(self.lock_file)
        os.makedirs(parent_dir, exist_ok=True)

        while True:
            try:
                with open(self.lock_file, "x"):
                    return
            except FileExistsError:
                time.sleep(0.1)

    def release(self):
        try:
            os.remove(self.lock_file)
        except FileNotFoundError:
            pass


class ModelDownloader:
    def __init__(
        self,
        model_name: str,
        download_path: str,
        file_names: List[str],
        download_func: Callable[[str], None],
        silent: bool = False,
    ):
        self.model_name = model_name
        self.download_path = download_path
        self.file_names = file_names
        self.download_func = download_func
        self.silent = silent
        self.requestor = InterProcessRequestor()
        self.download_thread = None
        self.download_complete = threading.Event()

    def ensure_model_files(self):
        self.mark_files_state(
            self.requestor,
            self.model_name,
            self.file_names,
            ModelStatusTypesEnum.downloading,
        )
        self.download_thread = threading.Thread(
            target=self._download_models,
            name=f"_download_model_{self.model_name}",
            daemon=True,
        )
        self.download_thread.start()

    def _download_models(self):
        for file_name in self.file_names:
            path = os.path.join(self.download_path, file_name)
            lock = FileLock(path)

            if not os.path.exists(path):
                lock.acquire()
                try:
                    if not os.path.exists(path):
                        self.download_func(path)
                finally:
                    lock.release()

            self.requestor.send_data(
                UPDATE_MODEL_STATE,
                {
                    "model": f"{self.model_name}-{file_name}",
                    "state": ModelStatusTypesEnum.downloaded,
                },
            )

        self.requestor.stop()
        self.download_complete.set()

    @staticmethod
    def download_from_url(url: str, save_path: str, silent: bool = False):
        temporary_filename = Path(save_path).with_name(
            os.path.basename(save_path) + ".part"
        )
        temporary_filename.parent.mkdir(parents=True, exist_ok=True)

        if not silent:
            logger.info(f"Downloading model file from: {url}")

        try:
            with requests.get(url, stream=True, allow_redirects=True) as r:
                r.raise_for_status()
                with open(temporary_filename, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)

            temporary_filename.rename(save_path)
        except Exception as e:
            logger.error(f"Error downloading model: {str(e)}")
            raise

        if not silent:
            logger.info(f"Downloading complete: {url}")

    @staticmethod
    def mark_files_state(
        requestor: InterProcessRequestor,
        model_name: str,
        files: list[str],
        state: ModelStatusTypesEnum,
    ) -> None:
        for file_name in files:
            requestor.send_data(
                UPDATE_MODEL_STATE,
                {
                    "model": f"{model_name}-{file_name}",
                    "state": state,
                },
            )

    def wait_for_download(self):
        self.download_complete.wait()
Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`import logging`
			`import os`
			`import threading`
			`import time`
			`from pathlib import Path`
			`from typing import Callable, List`

			`import requests`

			`from frigate.comms.inter_process import InterProcessRequestor`
			`from frigate.const import UPDATE_MODEL_STATE`
			`from frigate.types import ModelStatusTypesEnum`

			`logger = logging.getLogger(__name__)`


			`class FileLock:`
			`def __init__(self, path):`
			`self.path = path`
			`self.lock_file = f"{path}.lock"`

Fixes for model downloading (#14305) * Use different requestor for downloaders * Handle case where lock is left over from failed partial download * close requestor * Formatting 2024-10-12 20:36:10 +02:00			`# we have not acquired the lock yet so it should not exist`
			`if os.path.exists(self.lock_file):`
			`try:`
			`os.remove(self.lock_file)`
			`except Exception:`
			`pass`

Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`def acquire(self):`
			`parent_dir = os.path.dirname(self.lock_file)`
			`os.makedirs(parent_dir, exist_ok=True)`

			`while True:`
			`try:`
			`with open(self.lock_file, "x"):`
			`return`
			`except FileExistsError:`
			`time.sleep(0.1)`

			`def release(self):`
			`try:`
			`os.remove(self.lock_file)`
			`except FileNotFoundError:`
			`pass`


			`class ModelDownloader:`
			`def __init__(`
			`self,`
			`model_name: str,`
			`download_path: str,`
			`file_names: List[str],`
			`download_func: Callable[[str], None],`
			`silent: bool = False,`
			`):`
			`self.model_name = model_name`
			`self.download_path = download_path`
			`self.file_names = file_names`
			`self.download_func = download_func`
			`self.silent = silent`
Fixes for model downloading (#14305) * Use different requestor for downloaders * Handle case where lock is left over from failed partial download * close requestor * Formatting 2024-10-12 20:36:10 +02:00			`self.requestor = InterProcessRequestor()`
Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`self.download_thread = None`
			`self.download_complete = threading.Event()`

			`def ensure_model_files(self):`
Embeddings fixes (#14269) * Add debugging logs for more info * Improve timeout handling * Fix event cleanup * Handle zmq error and empty data * Don't run download * Remove unneeded embeddings creations * Update timouts * Init models immediately * Fix order of init * Cleanup 2024-10-10 23:37:43 +02:00			`self.mark_files_state(`
			`self.requestor,`
			`self.model_name,`
			`self.file_names,`
			`ModelStatusTypesEnum.downloading,`
			`)`
Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`self.download_thread = threading.Thread(`
			`target=self._download_models,`
			`name=f"_download_model_{self.model_name}",`
			`daemon=True,`
			`)`
			`self.download_thread.start()`

			`def _download_models(self):`
			`for file_name in self.file_names:`
			`path = os.path.join(self.download_path, file_name)`
			`lock = FileLock(path)`

			`if not os.path.exists(path):`
			`lock.acquire()`
			`try:`
			`if not os.path.exists(path):`
			`self.download_func(path)`
			`finally:`
			`lock.release()`

			`self.requestor.send_data(`
			`UPDATE_MODEL_STATE,`
			`{`
			`"model": f"{self.model_name}-{file_name}",`
			`"state": ModelStatusTypesEnum.downloaded,`
			`},`
			`)`

Fixes for model downloading (#14305) * Use different requestor for downloaders * Handle case where lock is left over from failed partial download * close requestor * Formatting 2024-10-12 20:36:10 +02:00			`self.requestor.stop()`
Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`self.download_complete.set()`

			`@staticmethod`
			`def download_from_url(url: str, save_path: str, silent: bool = False):`
			`temporary_filename = Path(save_path).with_name(`
			`os.path.basename(save_path) + ".part"`
			`)`
			`temporary_filename.parent.mkdir(parents=True, exist_ok=True)`

			`if not silent:`
			`logger.info(f"Downloading model file from: {url}")`

			`try:`
			`with requests.get(url, stream=True, allow_redirects=True) as r:`
			`r.raise_for_status()`
			`with open(temporary_filename, "wb") as f:`
			`for chunk in r.iter_content(chunk_size=8192):`
			`f.write(chunk)`

			`temporary_filename.rename(save_path)`
			`except Exception as e:`
			`logger.error(f"Error downloading model: {str(e)}")`
			`raise`

			`if not silent:`
			`logger.info(f"Downloading complete: {url}")`

Embeddings fixes (#14269) * Add debugging logs for more info * Improve timeout handling * Fix event cleanup * Handle zmq error and empty data * Don't run download * Remove unneeded embeddings creations * Update timouts * Init models immediately * Fix order of init * Cleanup 2024-10-10 23:37:43 +02:00			`@staticmethod`
			`def mark_files_state(`
			`requestor: InterProcessRequestor,`
			`model_name: str,`
			`files: list[str],`
			`state: ModelStatusTypesEnum,`
			`) -> None:`
			`for file_name in files:`
			`requestor.send_data(`
			`UPDATE_MODEL_STATE,`
			`{`
			`"model": f"{model_name}-{file_name}",`
			`"state": state,`
			`},`
			`)`

Use sqlite-vec extension instead of chromadb for embeddings (#14163) * swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization 2024-10-07 22:30:45 +02:00			`def wait_for_download(self):`
			`self.download_complete.wait()`