Procházet zdrojové kódy

[Bugfix] fix chunker config bug (#1024)

Co-authored-by: Deven Patel <deven298@yahoo.com>
Deven Patel před 1 rokem
rodič
revize
db37b2ac15

+ 5 - 2
docs/api-reference/advanced/configuration.mdx

@@ -53,6 +53,7 @@ chunker:
   chunk_size: 2000
   chunk_overlap: 100
   length_function: 'len'
+  min_chunk_size: 0
 ```
 
 ```json config.json
@@ -91,7 +92,8 @@ chunker:
   "chunker": {
     "chunk_size": 2000,
     "chunk_overlap": 100,
-    "length_function": "len"
+    "length_function": "len",
+    "min_chunk_size": 0
   }
 }
 ```
@@ -138,7 +140,8 @@ config = {
     'chunker': {
         'chunk_size': 2000,
         'chunk_overlap': 100,
-        'length_function': 'len'
+        'length_function': 'len',
+        'min_chunk_size': 0
     }
 }
 ```

+ 1 - 0
embedchain/utils.py

@@ -428,6 +428,7 @@ def validate_config(config_data):
                 Optional("chunk_size"): int,
                 Optional("chunk_overlap"): int,
                 Optional("length_function"): str,
+                Optional("min_chunk_size"): int,
             },
         }
     )

+ 14 - 14
examples/chainlit/app.py

@@ -1,24 +1,24 @@
+import os
+
 import chainlit as cl
-from embedchain import Pipeline as App
 
-import os
+from embedchain import Pipeline as App
 
 os.environ["OPENAI_API_KEY"] = "sk-xxx"
 
+
 @cl.on_chat_start
 async def on_chat_start():
-    app = App.from_config(config={
-        'app': {
-            'config': {
-                'name': 'chainlit-app'
-            }
-        },
-        'llm': {
-            'config': {
-                'stream': True,
-            }
+    app = App.from_config(
+        config={
+            "app": {"config": {"name": "chainlit-app"}},
+            "llm": {
+                "config": {
+                    "stream": True,
+                }
+            },
         }
-    })
+    )
     # import your data here
     app.add("https://www.forbes.com/profile/elon-musk/")
     app.collect_metrics = False
@@ -31,5 +31,5 @@ async def on_message(message: cl.Message):
     msg = cl.Message(content="")
     for chunk in await cl.make_async(app.chat)(message.content):
         await msg.stream_token(chunk)
-    
+
     await msg.send()