2 vuotta sitten · 6936d6983d
--- a/embedchain/chunkers/base_chunker.py
+++ b/embedchain/chunkers/base_chunker.py
@@ -3,6 +3,7 @@ import hashlib
 
				 
			
 
				 class BaseChunker:
			
 
				     def __init__(self, text_splitter):
			
 
				+        ''' Initialize the chunker. '''
			
 
				         self.text_splitter = text_splitter
			
 
				 
			
 
				     def create_chunks(self, loader, src):
			
--- a/embedchain/chunkers/docx_file.py
+++ b/embedchain/chunkers/docx_file.py
@@ -14,6 +14,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class DocxFileChunker(BaseChunker):
			
 
				+    ''' Chunker for .docx file. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/chunkers/pdf_file.py
+++ b/embedchain/chunkers/pdf_file.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class PdfFileChunker(BaseChunker):
			
 
				+    ''' Chunker for PDF file. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/chunkers/qna_pair.py
+++ b/embedchain/chunkers/qna_pair.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class QnaPairChunker(BaseChunker):
			
 
				+    ''' Chunker for QnA pair. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/chunkers/text.py
+++ b/embedchain/chunkers/text.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class TextChunker(BaseChunker):
			
 
				+    ''' Chunker for text. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/chunkers/web_page.py
+++ b/embedchain/chunkers/web_page.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class WebPageChunker(BaseChunker):
			
 
				+    ''' Chunker for web page. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/chunkers/youtube_video.py
+++ b/embedchain/chunkers/youtube_video.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
				 
			
 
				 
			
 
				 class YoutubeVideoChunker(BaseChunker):
			
 
				+    ''' Chunker for Youtube video. '''
			
 
				     def __init__(self, config: Optional[ChunkerConfig] = None):
			
 
				         if config is None:
			
 
				             config = TEXT_SPLITTER_CHUNK_PARAMS
			
--- a/embedchain/loaders/docx_file.py
+++ b/embedchain/loaders/docx_file.py
@@ -2,6 +2,7 @@ from langchain.document_loaders import Docx2txtLoader
 
				 
			
 
				 class DocxFileLoader:
			
 
				     def load_data(self, url):
			
 
				+        ''' Load data from a .docx file. '''
			
 
				         loader = Docx2txtLoader(url)
			
 
				         output = []
			
 
				         data = loader.load()
			
--- a/embedchain/loaders/local_qna_pair.py
+++ b/embedchain/loaders/local_qna_pair.py
@@ -1,6 +1,7 @@
 
				 class LocalQnaPairLoader:
			
 
				 
			
 
				     def load_data(self, content):
			
 
				+        ''' Load data from a local QnA pair. '''
			
 
				         question, answer = content
			
 
				         content = f"Q: {question}\nA: {answer}"
			
 
				         meta_data = {
			
--- a/embedchain/loaders/local_text.py
+++ b/embedchain/loaders/local_text.py
@@ -1,6 +1,7 @@
 
				 class LocalTextLoader:
			
 
				 
			
 
				     def load_data(self, content):
			
 
				+        ''' Load data from a local text file. '''
			
 
				         meta_data = {
			
 
				             "url": "local",
			
 
				         }
			
--- a/embedchain/loaders/pdf_file.py
+++ b/embedchain/loaders/pdf_file.py
@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 
				 class PdfFileLoader:
			
 
				     
			
 
				     def load_data(self, url):
			
 
				+        ''' Load data from a PDF file. '''
			
 
				         loader = PyPDFLoader(url)
			
 
				         output = []
			
 
				         pages = loader.load_and_split()
			
--- a/embedchain/loaders/web_page.py
+++ b/embedchain/loaders/web_page.py
@@ -8,6 +8,7 @@ from embedchain.utils import clean_string
 
				 class WebPageLoader:
			
 
				 
			
 
				     def load_data(self, url):
			
 
				+        ''' Load data from a web page. '''
			
 
				         response = requests.get(url)
			
 
				         data = response.content
			
 
				         soup = BeautifulSoup(data, 'html.parser')
			
--- a/embedchain/loaders/youtube_video.py
+++ b/embedchain/loaders/youtube_video.py
@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 
				 class YoutubeVideoLoader:
			
 
				 
			
 
				     def load_data(self, url):
			
 
				+        ''' Load data from a Youtube video. '''
			
 
				         loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
			
 
				         doc = loader.load()
			
 
				         output = []
			
--- a/embedchain/vectordb/base_vector_db.py
+++ b/embedchain/vectordb/base_vector_db.py
@@ -1,10 +1,13 @@
 
				 class BaseVectorDB:
			
 
				+    ''' Base class for vector database. '''
			
 
				+
			
 
				     def __init__(self):
			
 
				         self.client = self._get_or_create_db()
			
 
				         self.collection = self._get_or_create_collection()
			
 
				 
			
 
				     def _get_or_create_db(self):
			
 
				+        ''' Get or create the database. '''
			
 
				         raise NotImplementedError
			
 
				 
			
 
				     def _get_or_create_collection(self):
			
 
				-        raise NotImplementedError
			
 
				+        raise NotImplementedError
			
--- a/embedchain/vectordb/chroma_db.py
+++ b/embedchain/vectordb/chroma_db.py
@@ -7,6 +7,8 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
 
				 
			
 
				 
			
 
				 class ChromaDB(BaseVectorDB):
			
 
				+    ''' Vector database using ChromaDB. '''
			
 
				+    
			
 
				     def __init__(self, db_dir=None, ef=None):
			
 
				         if ef:
			
 
				             self.ef = ef
			
@@ -26,9 +28,11 @@ class ChromaDB(BaseVectorDB):
 
				         super().__init__()
			
 
				 
			
 
				     def _get_or_create_db(self):
			
 
				+        ''' Get or create the database. '''
			
 
				         return chromadb.Client(self.client_settings)
			
 
				 
			
 
				     def _get_or_create_collection(self):
			
 
				+        ''' Get or create the collection. '''
			
 
				         return self.client.get_or_create_collection(
			
 
				             'embedchain_store', embedding_function=self.ef,
			
 
				         )