* fix embeddings reindex

- always increment processed objects to prevent division by zero
- ensure description still gets processed even if there is no thumbnail

* clean up

* Add newer labels to default attribute map

---------

Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>
This commit is contained in:
Josh Hawkins 2025-07-17 08:29:50 -05:00 committed by GitHub
parent 3eb3797bc5
commit ca1c98eab8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 10 deletions

View File

@ -26,6 +26,7 @@ DEFAULT_ATTRIBUTE_LABEL_MAP = {
"car": [
"amazon",
"an_post",
"canada_post",
"dhl",
"dpd",
"fedex",
@ -35,6 +36,7 @@ DEFAULT_ATTRIBUTE_LABEL_MAP = {
"postnl",
"postnord",
"purolator",
"royal_mail",
"ups",
"usps",
],

View File

@ -334,26 +334,23 @@ class Embeddings:
.paginate(current_page, batch_size)
)
while len(events) > 0:
while events:
event: Event
batch_thumbs = {}
batch_descs = {}
for event in events:
thumbnail = get_event_thumbnail_bytes(event)
if thumbnail is None:
continue
batch_thumbs[event.id] = thumbnail
totals["thumbnails"] += 1
totals["processed_objects"] += 1
if description := event.data.get("description", "").strip():
batch_descs[event.id] = description
totals["descriptions"] += 1
totals["processed_objects"] += 1
if thumbnail := get_event_thumbnail_bytes(event):
batch_thumbs[event.id] = thumbnail
totals["thumbnails"] += 1
# run batch embedding
if batch_thumbs:
self.batch_embed_thumbnail(batch_thumbs)
if batch_descs: