|
@@ -97,9 +97,11 @@ class EmbedChain:
|
|
|
metadatas = embeddings_data["metadatas"]
|
|
|
ids = embeddings_data["ids"]
|
|
|
# get existing ids, and discard doc if any common id exist.
|
|
|
+ where={"app_id": self.config.id} if self.config.id is not None else {}
|
|
|
+ # where={"url": src}
|
|
|
existing_docs = self.collection.get(
|
|
|
ids=ids,
|
|
|
- # where={"url": src}
|
|
|
+ where=where, # optional filter
|
|
|
)
|
|
|
existing_ids = set(existing_docs["ids"])
|
|
|
|
|
@@ -113,6 +115,10 @@ class EmbedChain:
|
|
|
|
|
|
ids = list(data_dict.keys())
|
|
|
documents, metadatas = zip(*data_dict.values())
|
|
|
+
|
|
|
+ # Add app id in metadatas so that they can be queried on later
|
|
|
+ if (self.config.id is not None):
|
|
|
+ metadatas = [{**m, "app_id": self.config.id} for m in metadatas]
|
|
|
|
|
|
chunks_before_addition = self.count()
|
|
|
|
|
@@ -144,11 +150,11 @@ class EmbedChain:
|
|
|
:param config: The query configuration.
|
|
|
:return: The content of the document that matched your query.
|
|
|
"""
|
|
|
+ where = {"app_id": self.config.id} if self.config.id is not None else {} # optional filter
|
|
|
result = self.collection.query(
|
|
|
- query_texts=[
|
|
|
- input_query,
|
|
|
- ],
|
|
|
+ query_texts=[input_query,],
|
|
|
n_results=config.number_documents,
|
|
|
+ where=where,
|
|
|
)
|
|
|
results_formatted = self._format_result(result)
|
|
|
contents = [result[0].page_content for result in results_formatted]
|