Selaa lähdekoodia

[Bug fix]: fix Cannot add documents to chromadb with inconsistent sizes. (#1314)

berwin joule 1 vuosi sitten
vanhempi
commit
ef69c91b60
1 muutettua tiedostoa jossa 5 lisäystä ja 3 poistoa
  1. 5 3
      embedchain/embedchain.py

+ 5 - 3
embedchain/embedchain.py

@@ -378,11 +378,13 @@ class EmbedChain(JSONSerializable):
         # Chunk documents into batches of 2048 and handle each batch
         # helps wigth large loads of embeddings  that hit OpenAI limits
         document_batches = [documents[i : i + 2048] for i in range(0, len(documents), 2048)]
-        for batch in document_batches:
+        metadata_batches = [metadatas[i : i + 2048] for i in range(0, len(metadatas), 2048)]
+        id_batches = [ids[i : i + 2048] for i in range(0, len(ids), 2048)]
+        for batch_docs, batch_meta, batch_ids in zip(document_batches, metadata_batches, id_batches):
             try:
                 # Add only valid batches
-                if batch:
-                    self.db.add(documents=batch, metadatas=metadatas, ids=ids, **kwargs)
+                if batch_docs:
+                    self.db.add(documents=batch_docs, metadatas=batch_meta, ids=batch_ids, **kwargs)
             except Exception as e:
                 print(f"Failed to add batch due to a bad request: {e}")
                 # Handle the error, e.g., by logging, retrying, or skipping