Browse Source

[improvement] update web page default chunk size to 2000 (#1005)

Co-authored-by: Deven Patel <deven298@yahoo.com>
Deven Patel 1 năm trước cách đây
mục cha
commit
ae6f866901
2 tập tin đã thay đổi với 2 bổ sung2 xóa
  1. 1 1
      embedchain/chunkers/web_page.py
  2. 1 1
      tests/chunkers/test_chunkers.py

+ 1 - 1
embedchain/chunkers/web_page.py

@@ -13,7 +13,7 @@ class WebPageChunker(BaseChunker):
 
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
-            config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
+            config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len)
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=config.chunk_size,
             chunk_overlap=config.chunk_overlap,

+ 1 - 1
tests/chunkers/test_chunkers.py

@@ -31,7 +31,7 @@ chunker_common_config = {
     QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
     TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
     SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
-    WebPageChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
+    WebPageChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
     XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
     YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
     JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},