Always run embedding descs one by one (#14365)

This commit is contained in:
Nicolas Mowen 2024-10-15 07:40:45 -06:00 committed by GitHub
parent 644069fb23
commit 25043278ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -175,15 +175,11 @@ class Embeddings:
return embedding return embedding
def batch_upsert_description(self, event_descriptions: dict[str, str]) -> ndarray: def batch_upsert_description(self, event_descriptions: dict[str, str]) -> ndarray:
descs = list(event_descriptions.values()) # upsert embeddings one by one to avoid token limit
embeddings = []
try: for desc in event_descriptions.values():
embeddings = self.text_embedding(descs) embeddings.append(self.text_embedding([desc]))
except ort.RuntimeException:
half_size = len(descs) / 2
embeddings = []
embeddings.extend(self.text_embedding(descs[0:half_size]))
embeddings.extend(self.text_embedding(descs[half_size:]))
ids = list(event_descriptions.keys()) ids = list(event_descriptions.keys())