浏览代码

fix: `docs_site` use chunker config implementation (#326)

cachho 2 年之前
父节点
当前提交
a681d47bce
共有 2 个文件被更改,包括 7 次插入8 次删除
  1. 1 0
      docs/advanced/query_configuration.mdx
  2. 6 8
      embedchain/chunkers/docs_site.py

+ 1 - 0
docs/advanced/query_configuration.mdx

@@ -36,6 +36,7 @@ Default values of chunker config parameters for different `data_type`:
 |web_page|500|0|len|
 |pdf_file|1000|0|len|
 |youtube_video|2000|0|len|
+|docs_site|500|50|len|
 
 ### LoaderConfig
 

+ 6 - 8
embedchain/chunkers/docs_site.py

@@ -5,18 +5,16 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from embedchain.chunkers.base_chunker import BaseChunker
 from embedchain.config.AddConfig import ChunkerConfig
 
-TEXT_SPLITTER_CHUNK_PARAMS = {
-    "chunk_size": 500,
-    "chunk_overlap": 50,
-    "length_function": len,
-}
-
 
 class DocsSiteChunker(BaseChunker):
     """Chunker for code docs site."""
 
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
-            config = TEXT_SPLITTER_CHUNK_PARAMS
-        text_splitter = RecursiveCharacterTextSplitter(**config)
+            config = ChunkerConfig(chunk_size=500, chunk_overlap=50, length_function=len)
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=config.chunk_size,
+            chunk_overlap=config.chunk_overlap,
+            length_function=config.length_function,
+        )
         super().__init__(text_splitter)