2 jaren geleden · f5f5e7edd1
--- a/README.md
+++ b/README.md
@@ -116,6 +116,15 @@ To add any web page, use the data_type as `web_page`. Eg:
 
				 app.add('web_page', 'a_valid_web_page_url')
			
 
				 ```
			
 
				 
			
 
				+### Text
			
 
				+
			
 
				+To supply your own text, use the data_type as `text` and enter a string. The text is not processed, this can be very versatile. Eg:
			
 
				+
			
 
				+```python
			
 
				+app.add_local('text', 'Seek wealth, not money or status. Wealth is having assets that earn while you sleep. Money is how we transfer time and wealth. Status is your place in the social hierarchy.')
			
 
				+```
			
 
				+Note: This is not used in the examples because in most cases you will supply a whole paragraph or file, which did not fit.
			
 
				+
			
 
				 ### QnA Pair
			
 
				 
			
 
				 To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:
			
--- a/embedchain/chunkers/text.py
+++ b/embedchain/chunkers/text.py
@@ -0,0 +1,16 @@
 
				+from embedchain.chunkers.base_chunker import BaseChunker
			
 
				+
			
 
				+from langchain.text_splitter import RecursiveCharacterTextSplitter
			
 
				+
			
 
				+
			
 
				+TEXT_SPLITTER_CHUNK_PARAMS = {
			
 
				+    "chunk_size": 300,
			
 
				+    "chunk_overlap": 0,
			
 
				+    "length_function": len,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class TextChunker(BaseChunker):
			
 
				+    def __init__(self):
			
 
				+        text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
			
 
				+        super().__init__(text_splitter)
			
--- a/embedchain/embedchain.py
+++ b/embedchain/embedchain.py
@@ -9,10 +9,12 @@ from embedchain.loaders.youtube_video import YoutubeVideoLoader
 
				 from embedchain.loaders.pdf_file import PdfFileLoader
			
 
				 from embedchain.loaders.web_page import WebPageLoader
			
 
				 from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
			
 
				+from embedchain.loaders.local_text import LocalTextLoader
			
 
				 from embedchain.chunkers.youtube_video import YoutubeVideoChunker
			
 
				 from embedchain.chunkers.pdf_file import PdfFileChunker
			
 
				 from embedchain.chunkers.web_page import WebPageChunker
			
 
				 from embedchain.chunkers.qna_pair import QnaPairChunker
			
 
				+from embedchain.chunkers.text import TextChunker
			
 
				 from embedchain.vectordb.chroma_db import ChromaDB
			
 
				 
			
 
				 load_dotenv()
			
@@ -49,7 +51,8 @@ class EmbedChain:
 
				             'youtube_video': YoutubeVideoLoader(),
			
 
				             'pdf_file': PdfFileLoader(),
			
 
				             'web_page': WebPageLoader(),
			
 
				-            'qna_pair': LocalQnaPairLoader()
			
 
				+            'qna_pair': LocalQnaPairLoader(),
			
 
				+            'text': LocalTextLoader(),
			
 
				         }
			
 
				         if data_type in loaders:
			
 
				             return loaders[data_type]
			
@@ -69,6 +72,7 @@ class EmbedChain:
 
				             'pdf_file': PdfFileChunker(),
			
 
				             'web_page': WebPageChunker(),
			
 
				             'qna_pair': QnaPairChunker(),
			
 
				+            'text': TextChunker(),
			
 
				         }
			
 
				         if data_type in chunkers:
			
 
				             return chunkers[data_type]
			
--- a/embedchain/loaders/local_text.py
+++ b/embedchain/loaders/local_text.py
@@ -0,0 +1,10 @@
 
				+class LocalTextLoader:
			
 
				+
			
 
				+    def load_data(self, content):
			
 
				+        meta_data = {
			
 
				+            "url": "local",
			
 
				+        }
			
 
				+        return [{
			
 
				+            "content": content,
			
 
				+            "meta_data": meta_data,
			
 
				+        }]