Kaynağa Gözat

[Bugfix]: Fix issue of context overspilling into other apps (#835)

Deshraj Yadav 1 yıl önce
ebeveyn
işleme
a5c86a2f5c

+ 1 - 0
embedchain/chunkers/base_chunker.py

@@ -44,6 +44,7 @@ class BaseChunker(JSONSerializable):
 
             for chunk in chunks:
                 chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest()
+                chunk_id = f"{app_id}--{chunk_id}" if app_id is not None else chunk_id
                 if idMap.get(chunk_id) is None:
                     idMap[chunk_id] = True
                     chunk_ids.append(chunk_id)

+ 2 - 2
tests/chunkers/test_base_chunker.py

@@ -44,8 +44,8 @@ def test_create_chunks(chunker, text_splitter_mock, loader_mock, app_id, data_ty
 
     result = chunker.create_chunks(loader_mock, "test_src", app_id)
     expected_ids = [
-        hashlib.sha256(("Chunk 1" + "URL 1").encode()).hexdigest(),
-        hashlib.sha256(("Chunk 2" + "URL 1").encode()).hexdigest(),
+        f"{app_id}--" + hashlib.sha256(("Chunk 1" + "URL 1").encode()).hexdigest(),
+        f"{app_id}--" + hashlib.sha256(("Chunk 2" + "URL 1").encode()).hexdigest(),
     ]
 
     assert result["documents"] == ["Chunk 1", "Chunk 2"]