blakeblackshear.frigate/frigate/util/downloader.py
Josh Hawkins 24ac9f3e5a
Use sqlite-vec extension instead of chromadb for embeddings (#14163)
* swap sqlite_vec for chroma in requirements

* load sqlite_vec in embeddings manager

* remove chroma and revamp Embeddings class for sqlite_vec

* manual minilm onnx inference

* remove chroma in clip model

* migrate api from chroma to sqlite_vec

* migrate event cleanup from chroma to sqlite_vec

* migrate embedding maintainer from chroma to sqlite_vec

* genai description for sqlite_vec

* load sqlite_vec in main thread db

* extend the SqliteQueueDatabase class and use peewee db.execute_sql

* search with Event type for similarity

* fix similarity search

* install and add comment about transformers

* fix normalization

* add id filter

* clean up

* clean up

* fully remove chroma and add transformers env var

* readd uvicorn for fastapi

* readd tokenizer parallelism env var

* remove chroma from docs

* remove chroma from UI

* try removing custom pysqlite3 build

* hard code limit

* optimize queries

* revert explore query

* fix query

* keep building pysqlite3

* single pass fetch and process

* remove unnecessary re-embed

* update deps

* move SqliteVecQueueDatabase to db directory

* make search thumbnail take up full size of results box

* improve typing

* improve model downloading and add status screen

* daemon downloading thread

* catch case when semantic search is disabled

* fix typing

* build sqlite_vec from source

* resolve conflict

* file permissions

* try build deps

* remove sources

* sources

* fix thread start

* include git in build

* reorder embeddings after detectors are started

* build with sqlite amalgamation

* non-platform specific

* use wget instead of curl

* remove unzip -d

* remove sqlite_vec from requirements and load the compiled version

* fix build

* avoid race in db connection

* add scale_factor and bias to description zscore normalization
2024-10-07 14:30:45 -06:00

124 lines
3.5 KiB
Python

import logging
import os
import threading
import time
from pathlib import Path
from typing import Callable, List
import requests
from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import UPDATE_MODEL_STATE
from frigate.types import ModelStatusTypesEnum
logger = logging.getLogger(__name__)
class FileLock:
def __init__(self, path):
self.path = path
self.lock_file = f"{path}.lock"
def acquire(self):
parent_dir = os.path.dirname(self.lock_file)
os.makedirs(parent_dir, exist_ok=True)
while True:
try:
with open(self.lock_file, "x"):
return
except FileExistsError:
time.sleep(0.1)
def release(self):
try:
os.remove(self.lock_file)
except FileNotFoundError:
pass
class ModelDownloader:
def __init__(
self,
model_name: str,
download_path: str,
file_names: List[str],
download_func: Callable[[str], None],
silent: bool = False,
):
self.model_name = model_name
self.download_path = download_path
self.file_names = file_names
self.download_func = download_func
self.silent = silent
self.requestor = InterProcessRequestor()
self.download_thread = None
self.download_complete = threading.Event()
def ensure_model_files(self):
for file in self.file_names:
self.requestor.send_data(
UPDATE_MODEL_STATE,
{
"model": f"{self.model_name}-{file}",
"state": ModelStatusTypesEnum.downloading,
},
)
self.download_thread = threading.Thread(
target=self._download_models,
name=f"_download_model_{self.model_name}",
daemon=True,
)
self.download_thread.start()
def _download_models(self):
for file_name in self.file_names:
path = os.path.join(self.download_path, file_name)
lock = FileLock(path)
if not os.path.exists(path):
lock.acquire()
try:
if not os.path.exists(path):
self.download_func(path)
finally:
lock.release()
self.requestor.send_data(
UPDATE_MODEL_STATE,
{
"model": f"{self.model_name}-{file_name}",
"state": ModelStatusTypesEnum.downloaded,
},
)
self.download_complete.set()
@staticmethod
def download_from_url(url: str, save_path: str, silent: bool = False):
temporary_filename = Path(save_path).with_name(
os.path.basename(save_path) + ".part"
)
temporary_filename.parent.mkdir(parents=True, exist_ok=True)
if not silent:
logger.info(f"Downloading model file from: {url}")
try:
with requests.get(url, stream=True, allow_redirects=True) as r:
r.raise_for_status()
with open(temporary_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
temporary_filename.rename(save_path)
except Exception as e:
logger.error(f"Error downloading model: {str(e)}")
raise
if not silent:
logger.info(f"Downloading complete: {url}")
def wait_for_download(self):
self.download_complete.wait()