blakeblackshear.frigate/frigate/embeddings/util.py
Josh Hawkins 24ac9f3e5a
Use sqlite-vec extension instead of chromadb for embeddings (#14163)
* swap sqlite_vec for chroma in requirements

* load sqlite_vec in embeddings manager

* remove chroma and revamp Embeddings class for sqlite_vec

* manual minilm onnx inference

* remove chroma in clip model

* migrate api from chroma to sqlite_vec

* migrate event cleanup from chroma to sqlite_vec

* migrate embedding maintainer from chroma to sqlite_vec

* genai description for sqlite_vec

* load sqlite_vec in main thread db

* extend the SqliteQueueDatabase class and use peewee db.execute_sql

* search with Event type for similarity

* fix similarity search

* install and add comment about transformers

* fix normalization

* add id filter

* clean up

* clean up

* fully remove chroma and add transformers env var

* readd uvicorn for fastapi

* readd tokenizer parallelism env var

* remove chroma from docs

* remove chroma from UI

* try removing custom pysqlite3 build

* hard code limit

* optimize queries

* revert explore query

* fix query

* keep building pysqlite3

* single pass fetch and process

* remove unnecessary re-embed

* update deps

* move SqliteVecQueueDatabase to db directory

* make search thumbnail take up full size of results box

* improve typing

* improve model downloading and add status screen

* daemon downloading thread

* catch case when semantic search is disabled

* fix typing

* build sqlite_vec from source

* resolve conflict

* file permissions

* try build deps

* remove sources

* sources

* fix thread start

* include git in build

* reorder embeddings after detectors are started

* build with sqlite amalgamation

* non-platform specific

* use wget instead of curl

* remove unzip -d

* remove sqlite_vec from requirements and load the compiled version

* fix build

* avoid race in db connection

* add scale_factor and bias to description zscore normalization
2024-10-07 14:30:45 -06:00

54 lines
1.4 KiB
Python

"""Z-score normalization for search distance."""
import math
class ZScoreNormalization:
def __init__(self, scale_factor: float = 1.0, bias: float = 0.0):
"""Initialize with optional scaling and bias adjustments."""
"""scale_factor adjusts the magnitude of each score"""
"""bias will artificially shift the entire distribution upwards"""
self.n = 0
self.mean = 0
self.m2 = 0
self.scale_factor = scale_factor
self.bias = bias
@property
def variance(self):
return self.m2 / (self.n - 1) if self.n > 1 else 0.0
@property
def stddev(self):
return math.sqrt(self.variance)
def normalize(self, distances: list[float]):
self._update(distances)
if self.stddev == 0:
return distances
return [
(x - self.mean) / self.stddev * self.scale_factor + self.bias
for x in distances
]
def _update(self, distances: list[float]):
for x in distances:
self.n += 1
delta = x - self.mean
self.mean += delta / self.n
delta2 = x - self.mean
self.m2 += delta * delta2
def to_dict(self):
return {
"n": self.n,
"mean": self.mean,
"m2": self.m2,
}
def from_dict(self, data: dict):
self.n = data["n"]
self.mean = data["mean"]
self.m2 = data["m2"]
return self