|
@@ -1,3 +1,4 @@
|
|
|
+from embedchain.config import AddConfig
|
|
|
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
|
|
from embedchain.loaders.pdf_file import PdfFileLoader
|
|
|
from embedchain.loaders.web_page import WebPageLoader
|
|
@@ -18,11 +19,11 @@ class DataFormatter:
|
|
|
loaders and chunkers to the data_type entered by the user in their
|
|
|
.add or .add_local method call
|
|
|
"""
|
|
|
- def __init__(self, data_type):
|
|
|
- self.loader = self._get_loader(data_type)
|
|
|
- self.chunker = self._get_chunker(data_type)
|
|
|
-
|
|
|
- def _get_loader(self, data_type):
|
|
|
+ def __init__(self, data_type: str, config: AddConfig):
|
|
|
+ self.loader = self._get_loader(data_type, config.loader)
|
|
|
+ self.chunker = self._get_chunker(data_type, config.chunker)
|
|
|
+
|
|
|
+ def _get_loader(self, data_type, config):
|
|
|
"""
|
|
|
Returns the appropriate data loader for the given data type.
|
|
|
|
|
@@ -43,7 +44,7 @@ class DataFormatter:
|
|
|
else:
|
|
|
raise ValueError(f"Unsupported data type: {data_type}")
|
|
|
|
|
|
- def _get_chunker(self, data_type):
|
|
|
+ def _get_chunker(self, data_type, config):
|
|
|
"""
|
|
|
Returns the appropriate chunker for the given data type.
|
|
|
|
|
@@ -52,15 +53,14 @@ class DataFormatter:
|
|
|
:raises ValueError: If an unsupported data type is provided.
|
|
|
"""
|
|
|
chunkers = {
|
|
|
- 'youtube_video': YoutubeVideoChunker(),
|
|
|
- 'pdf_file': PdfFileChunker(),
|
|
|
- 'web_page': WebPageChunker(),
|
|
|
- 'qna_pair': QnaPairChunker(),
|
|
|
- 'text': TextChunker(),
|
|
|
- 'docx': DocxFileChunker(),
|
|
|
+ 'youtube_video': YoutubeVideoChunker(config),
|
|
|
+ 'pdf_file': PdfFileChunker(config),
|
|
|
+ 'web_page': WebPageChunker(config),
|
|
|
+ 'qna_pair': QnaPairChunker(config),
|
|
|
+ 'text': TextChunker(config),
|
|
|
+ 'docx': DocxFileChunker(config),
|
|
|
}
|
|
|
if data_type in chunkers:
|
|
|
return chunkers[data_type]
|
|
|
else:
|
|
|
raise ValueError(f"Unsupported data type: {data_type}")
|
|
|
-
|