Always run embedding descs one by one (#14365)

This commit is contained in:
Nicolas Mowen 2024-10-15 07:40:45 -06:00 committed by GitHub
parent 644069fb23
commit 25043278ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -175,15 +175,11 @@ class Embeddings:
return embedding
def batch_upsert_description(self, event_descriptions: dict[str, str]) -> ndarray:
descs = list(event_descriptions.values())
# upsert embeddings one by one to avoid token limit
embeddings = []
try:
embeddings = self.text_embedding(descs)
except ort.RuntimeException:
half_size = len(descs) / 2
embeddings = []
embeddings.extend(self.text_embedding(descs[0:half_size]))
embeddings.extend(self.text_embedding(descs[half_size:]))
for desc in event_descriptions.values():
embeddings.append(self.text_embedding([desc]))
ids = list(event_descriptions.keys())