reindex events in batches to reduce memory and cpu load (#12124)

This commit is contained in:
Jason Hunter 2024-06-23 15:27:21 -04:00 committed by Nicolas Mowen
parent 9e825811f2
commit 0d7a148897

View File

@ -12,6 +12,9 @@ from playhouse.shortcuts import model_to_dict
from frigate.models import Event from frigate.models import Event
# Squelch posthog logging
logging.getLogger("chromadb.telemetry.product.posthog").setLevel(logging.CRITICAL)
# Hotsawp the sqlite3 module for Chroma compatibility # Hotsawp the sqlite3 module for Chroma compatibility
try: try:
from chromadb import Collection from chromadb import Collection
@ -91,45 +94,70 @@ class Embeddings:
self.client.reset() self.client.reset()
st = time.time() st = time.time()
totals = {
"thumb": 0,
"desc": 0,
}
thumbnails = {"ids": [], "images": [], "metadatas": []} batch_size = 100
descriptions = {"ids": [], "documents": [], "metadatas": []} current_page = 1
events = (
events = Event.select().where( Event.select()
(Event.has_clip == True | Event.has_snapshot == True) .where(
& Event.thumbnail.is_null(False) (Event.has_clip == True | Event.has_snapshot == True)
& Event.thumbnail.is_null(False)
)
.order_by(Event.start_time.desc())
.paginate(current_page, batch_size)
) )
event: Event while len(events) > 0:
for event in events.iterator(): thumbnails = {"ids": [], "images": [], "metadatas": []}
metadata = get_metadata(event) descriptions = {"ids": [], "documents": [], "metadatas": []}
thumbnail = base64.b64decode(event.thumbnail)
img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB"))
thumbnails["ids"].append(event.id)
thumbnails["images"].append(img)
thumbnails["metadatas"].append(metadata)
if event.data.get("description") is not None:
descriptions["ids"].append(event.id)
descriptions["documents"].append(event.data["description"])
descriptions["metadatas"].append(metadata)
if len(thumbnails["ids"]) > 0: event: Event
self.thumbnail.upsert( for event in events:
images=thumbnails["images"], metadata = get_metadata(event)
metadatas=thumbnails["metadatas"], thumbnail = base64.b64decode(event.thumbnail)
ids=thumbnails["ids"], img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB"))
) thumbnails["ids"].append(event.id)
thumbnails["images"].append(img)
thumbnails["metadatas"].append(metadata)
if event.data.get("description") is not None:
descriptions["ids"].append(event.id)
descriptions["documents"].append(event.data["description"])
descriptions["metadatas"].append(metadata)
if len(descriptions["ids"]) > 0: if len(thumbnails["ids"]) > 0:
self.description.upsert( totals["thumb"] += len(thumbnails["ids"])
documents=descriptions["documents"], self.thumbnail.upsert(
metadatas=descriptions["metadatas"], images=thumbnails["images"],
ids=descriptions["ids"], metadatas=thumbnails["metadatas"],
ids=thumbnails["ids"],
)
if len(descriptions["ids"]) > 0:
totals["desc"] += len(descriptions["ids"])
self.description.upsert(
documents=descriptions["documents"],
metadatas=descriptions["metadatas"],
ids=descriptions["ids"],
)
current_page += 1
events = (
Event.select()
.where(
(Event.has_clip == True | Event.has_snapshot == True)
& Event.thumbnail.is_null(False)
)
.order_by(Event.start_time.desc())
.paginate(current_page, batch_size)
) )
logger.info( logger.info(
"Embedded %d thumbnails and %d descriptions in %s seconds", "Embedded %d thumbnails and %d descriptions in %s seconds",
len(thumbnails["ids"]), totals["thumb"],
len(descriptions["ids"]), totals["desc"],
time.time() - st, time.time() - st,
) )