소스 검색

Update dependencies for fast installation (#811)

Sidharth Mohanty 1 년 전
부모
커밋
413b107b9a

+ 6 - 2
embedchain/llm/base.py

@@ -129,8 +129,12 @@ class BaseLlm(JSONSerializable):
         :return: Search results
         :rtype: Unknown
         """
-        from langchain.tools import DuckDuckGoSearchRun
-
+        try:
+            from langchain.tools import DuckDuckGoSearchRun
+        except ImportError:
+            raise ImportError(
+                'Searching requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+            ) from None
         search = DuckDuckGoSearchRun()
         logging.info(f"Access search to get answers for {input_query}")
         return search.run(input_query)

+ 8 - 1
embedchain/loaders/docs_site_loader.py

@@ -3,7 +3,14 @@ import logging
 from urllib.parse import urljoin, urlparse
 
 import requests
-from bs4 import BeautifulSoup
+
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    raise ImportError(
+        'DocsSite requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
+
 
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader

+ 6 - 2
embedchain/loaders/docx_file.py

@@ -1,7 +1,11 @@
 import hashlib
 
-from langchain.document_loaders import Docx2txtLoader
-
+try:
+    from langchain.document_loaders import Docx2txtLoader
+except ImportError:
+    raise ImportError(
+        'Docx file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 

+ 6 - 2
embedchain/loaders/pdf_file.py

@@ -1,7 +1,11 @@
 import hashlib
 
-from langchain.document_loaders import PyPDFLoader
-
+try:
+    from langchain.document_loaders import PyPDFLoader
+except ImportError:
+    raise ImportError(
+        'PDF File requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 from embedchain.utils import clean_string

+ 8 - 2
embedchain/loaders/sitemap.py

@@ -2,8 +2,14 @@ import hashlib
 import logging
 
 import requests
-from bs4 import BeautifulSoup
-from bs4.builder import ParserRejectedMarkup
+
+try:
+    from bs4 import BeautifulSoup
+    from bs4.builder import ParserRejectedMarkup
+except ImportError:
+    raise ImportError(
+        'Sitemap requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader

+ 7 - 1
embedchain/loaders/web_page.py

@@ -2,7 +2,13 @@ import hashlib
 import logging
 
 import requests
-from bs4 import BeautifulSoup
+
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    raise ImportError(
+        'Webpage requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader

+ 6 - 2
embedchain/loaders/xml.py

@@ -1,7 +1,11 @@
 import hashlib
 
-from langchain.document_loaders import UnstructuredXMLLoader
-
+try:
+    from langchain.document_loaders import UnstructuredXMLLoader
+except ImportError:
+    raise ImportError(
+        'XML file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 from embedchain.utils import clean_string

+ 6 - 2
embedchain/loaders/youtube_video.py

@@ -1,7 +1,11 @@
 import hashlib
 
-from langchain.document_loaders import YoutubeLoader
-
+try:
+    from langchain.document_loaders import YoutubeLoader
+except ImportError:
+    raise ImportError(
+        'YouTube video requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+    ) from None
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 from embedchain.utils import clean_string

+ 20 - 10
pyproject.toml

@@ -90,13 +90,13 @@ python-dotenv = "^1.0.0"
 langchain = "^0.0.279"
 requests = "^2.31.0"
 openai = "^0.27.5"
-tiktoken = "^0.4.0"
+tiktoken = { version="^0.4.0", optional=true }
 chromadb ="^0.4.8"
-youtube-transcript-api = "^0.6.1"
-beautifulsoup4 = "^4.12.2"
-pypdf = "^3.11.0"
-pytube = "^15.0.0"
-duckduckgo-search = "^3.8.5"
+youtube-transcript-api = { version="^0.6.1", optional=true }
+beautifulsoup4 = { version="^4.12.2", optional=true }
+pypdf = { version="^3.11.0", optional=true }
+pytube = { version="^15.0.0", optional=true }
+duckduckgo-search = { version="^3.8.5", optional=true }
 llama-hub = { version = "^0.0.29", optional = true }
 sentence-transformers = { version = "^2.2.2", optional = true }
 torch = { version = "2.0.0", optional = true }
@@ -111,15 +111,15 @@ fastapi-poe = { version = "0.0.16", optional = true }
 discord = { version = "^2.3.2", optional = true }
 slack-sdk = { version = "3.21.3", optional = true }
 cohere = { version = "^4.27", optional= true }
-docx2txt = "^0.8"
-pinecone-client = "^2.2.4"
-unstructured = {extras = ["local-inference"], version = "^0.10.18"}
+docx2txt = { version="^0.8", optional=true }
+pinecone-client = { version = "^2.2.4", optional = true }
+unstructured = {extras = ["local-inference"], version = "^0.10.18", optional=true}
 pillow = { version = "10.0.1", optional = true }
 torchvision = { version = ">=0.15.1, !=0.15.2", optional = true }
 ftfy = { version = "6.1.1", optional = true }
 regex = { version = "2023.8.8", optional = true }
 huggingface_hub = { version = "^0.17.3", optional = true }
-pymilvus = "2.3.1"
+pymilvus = { version="2.3.1", optional = true }
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.3.0"
@@ -149,6 +149,16 @@ images = ["torch", "ftfy", "regex", "pillow", "torchvision"]
 huggingface_hub=["huggingface_hub"]
 cohere = ["cohere"]
 milvus = ["pymilvus"]
+dataloaders=[
+    "youtube-transcripts-api",
+    "beautifulsoup4",
+    "pypdf",
+    "pytube",
+    "duckduckgo-search",
+    "docx2txt",
+    "unstructured",
+    "sentence-transformers",
+]
 
 [tool.poetry.group.docs.dependencies]