Selaa lähdekoodia

Added documentation (#219)

aaishikdutta 2 vuotta sitten
vanhempi
commit
6936d6983d

+ 1 - 0
embedchain/chunkers/base_chunker.py

@@ -3,6 +3,7 @@ import hashlib
 
 class BaseChunker:
     def __init__(self, text_splitter):
+        ''' Initialize the chunker. '''
         self.text_splitter = text_splitter
 
     def create_chunks(self, loader, src):

+ 1 - 0
embedchain/chunkers/docx_file.py

@@ -14,6 +14,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class DocxFileChunker(BaseChunker):
+    ''' Chunker for .docx file. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/chunkers/pdf_file.py

@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class PdfFileChunker(BaseChunker):
+    ''' Chunker for PDF file. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/chunkers/qna_pair.py

@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class QnaPairChunker(BaseChunker):
+    ''' Chunker for QnA pair. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/chunkers/text.py

@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class TextChunker(BaseChunker):
+    ''' Chunker for text. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/chunkers/web_page.py

@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class WebPageChunker(BaseChunker):
+    ''' Chunker for web page. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/chunkers/youtube_video.py

@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 
 
 class YoutubeVideoChunker(BaseChunker):
+    ''' Chunker for Youtube video. '''
     def __init__(self, config: Optional[ChunkerConfig] = None):
         if config is None:
             config = TEXT_SPLITTER_CHUNK_PARAMS

+ 1 - 0
embedchain/loaders/docx_file.py

@@ -2,6 +2,7 @@ from langchain.document_loaders import Docx2txtLoader
 
 class DocxFileLoader:
     def load_data(self, url):
+        ''' Load data from a .docx file. '''
         loader = Docx2txtLoader(url)
         output = []
         data = loader.load()

+ 1 - 0
embedchain/loaders/local_qna_pair.py

@@ -1,6 +1,7 @@
 class LocalQnaPairLoader:
 
     def load_data(self, content):
+        ''' Load data from a local QnA pair. '''
         question, answer = content
         content = f"Q: {question}\nA: {answer}"
         meta_data = {

+ 1 - 0
embedchain/loaders/local_text.py

@@ -1,6 +1,7 @@
 class LocalTextLoader:
 
     def load_data(self, content):
+        ''' Load data from a local text file. '''
         meta_data = {
             "url": "local",
         }

+ 1 - 0
embedchain/loaders/pdf_file.py

@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 class PdfFileLoader:
     
     def load_data(self, url):
+        ''' Load data from a PDF file. '''
         loader = PyPDFLoader(url)
         output = []
         pages = loader.load_and_split()

+ 1 - 0
embedchain/loaders/web_page.py

@@ -8,6 +8,7 @@ from embedchain.utils import clean_string
 class WebPageLoader:
 
     def load_data(self, url):
+        ''' Load data from a web page. '''
         response = requests.get(url)
         data = response.content
         soup = BeautifulSoup(data, 'html.parser')

+ 1 - 0
embedchain/loaders/youtube_video.py

@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 class YoutubeVideoLoader:
 
     def load_data(self, url):
+        ''' Load data from a Youtube video. '''
         loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
         doc = loader.load()
         output = []

+ 4 - 1
embedchain/vectordb/base_vector_db.py

@@ -1,10 +1,13 @@
 class BaseVectorDB:
+    ''' Base class for vector database. '''
+
     def __init__(self):
         self.client = self._get_or_create_db()
         self.collection = self._get_or_create_collection()
 
     def _get_or_create_db(self):
+        ''' Get or create the database. '''
         raise NotImplementedError
 
     def _get_or_create_collection(self):
-        raise NotImplementedError
+        raise NotImplementedError

+ 4 - 0
embedchain/vectordb/chroma_db.py

@@ -7,6 +7,8 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
 
 
 class ChromaDB(BaseVectorDB):
+    ''' Vector database using ChromaDB. '''
+    
     def __init__(self, db_dir=None, ef=None):
         if ef:
             self.ef = ef
@@ -26,9 +28,11 @@ class ChromaDB(BaseVectorDB):
         super().__init__()
 
     def _get_or_create_db(self):
+        ''' Get or create the database. '''
         return chromadb.Client(self.client_settings)
 
     def _get_or_create_collection(self):
+        ''' Get or create the collection. '''
         return self.client.get_or_create_collection(
             'embedchain_store', embedding_function=self.ef,
         )