Prechádzať zdrojové kódy

Remove unwanted libraries and lighten package (#1391)

Dev Khant 1 rok pred
rodič
commit
1a5d0d236a

+ 5 - 1
Makefile

@@ -9,9 +9,13 @@ PROJECT_NAME := embedchain
 install:
 	poetry install
 
+# TODO: use a more efficient way to install these packages
 install_all:
 	poetry install --all-extras
-	poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil
+	poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 \
+		langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil clarifai==10.0.1 flask==2.3.3 twilio==8.5.0 fastapi-poe==0.0.16 discord==2.3.2 \
+	 	slack-sdk==3.21.3 huggingface_hub==0.23.0 gitpython==3.1.38 yt_dlp==2023.11.14 PyGithub==1.59.1 feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19 \
+	 	modal==0.56.4329 dropbox==11.36.2 boto3==1.34.20 youtube-transcript-api==0.6.1 pytube==15.0.0 beautifulsoup4==4.12.3
 
 install_es:
 	poetry install --extras elasticsearch

+ 1 - 1
docs/examples/poe_bot.mdx

@@ -7,7 +7,7 @@ title: '🔮 Poe Bot'
 1. Install embedchain python package:
 
 ```bash
-pip install --upgrade "embedchain[poe]"
+pip install fastapi-poe==0.0.16 
 ```
 
 2. Create a free account on [Poe](https://www.poe.com?utm_source=embedchain).

+ 1 - 2
embedchain/bots/discord.py

@@ -12,8 +12,7 @@ try:
     from discord.ext import commands
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
-        "The required dependencies for Discord are not installed."
-        'Please install with `pip install "embedchain[discord]"`'
+        "The required dependencies for Discord are not installed." "Please install with `pip install discord==2.3.2`"
     ) from None
 
 

+ 1 - 1
embedchain/bots/poe.py

@@ -11,7 +11,7 @@ try:
     from fastapi_poe import PoeBot, run
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
-        "The required dependencies for Poe are not installed." 'Please install with `pip install "embedchain[poe]"`'
+        "The required dependencies for Poe are not installed." "Please install with `pip install fastapi-poe==0.0.16`"
     ) from None
 
 

+ 1 - 1
embedchain/bots/slack.py

@@ -15,7 +15,7 @@ try:
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
         "The required dependencies for Slack are not installed."
-        'Please install with `pip install --upgrade "embedchain[slack]"`'
+        "Please install with `pip install slack-sdk==3.21.3 flask==2.3.3`"
     ) from None
 
 

+ 1 - 1
embedchain/bots/whatsapp.py

@@ -20,7 +20,7 @@ class WhatsAppBot(BaseBot):
         except ModuleNotFoundError:
             raise ModuleNotFoundError(
                 "The required dependencies for WhatsApp are not installed. "
-                'Please install with `pip install --upgrade "embedchain[whatsapp]"`'
+                "Please install with `pip install twilio==8.5.0 flask==2.3.3`"
             ) from None
         super().__init__()
 

+ 12 - 1
embedchain/deployment/modal.com/app.py

@@ -8,7 +8,18 @@ load_dotenv(".env")
 
 image = Image.debian_slim().pip_install(
     "embedchain",
-    "embedchain[dataloaders]",
+    "lanchain_community==0.2.6",
+    "youtube-transcript-api==0.6.1",
+    "pytube==15.0.0",
+    "beautifulsoup4==4.12.3",
+    "slack-sdk==3.21.3",
+    "huggingface_hub==0.23.0",
+    "gitpython==3.1.38",
+    "yt_dlp==2023.11.14",
+    "PyGithub==1.59.1",
+    "feedparser==6.0.10",
+    "newspaper3k==0.2.8",
+    "listparser==0.19",
 )
 
 stub = Stub(

+ 2 - 3
embedchain/llm/aws_bedrock.py

@@ -23,7 +23,7 @@ class AWSBedrockLlm(BaseLlm):
         except ModuleNotFoundError:
             raise ModuleNotFoundError(
                 "The required dependencies for AWSBedrock are not installed."
-                'Please install with `pip install --upgrade "embedchain[aws-bedrock]"`'
+                "Please install with `pip install boto3==1.34.20`"
             ) from None
 
         self.boto_client = boto3.client("bedrock-runtime", "us-west-2" or os.environ.get("AWS_REGION"))
@@ -38,8 +38,7 @@ class AWSBedrockLlm(BaseLlm):
         }
 
         if config.stream:
-            from langchain.callbacks.streaming_stdout import \
-                StreamingStdOutCallbackHandler
+            from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
             callbacks = [StreamingStdOutCallbackHandler()]
             llm = Bedrock(**kwargs, streaming=config.stream, callbacks=callbacks)

+ 1 - 1
embedchain/llm/base.py

@@ -180,7 +180,7 @@ class BaseLlm(JSONSerializable):
             from langchain.tools import DuckDuckGoSearchRun
         except ImportError:
             raise ImportError(
-                'Searching requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+                "Searching requires extra dependencies. Install with `pip install duckduckgo-search==6.1.5`"
             ) from None
         search = DuckDuckGoSearchRun()
         logger.info(f"Access search to get answers for {input_query}")

+ 1 - 1
embedchain/llm/clarifai.py

@@ -24,7 +24,7 @@ class ClarifaiLlm(BaseLlm):
         except ModuleNotFoundError:
             raise ModuleNotFoundError(
                 "The required dependencies for Clarifai are not installed."
-                'Please install with `pip install --upgrade "embedchain[clarifai]"`'
+                "Please install with `pip install clarifai==10.0.1`"
             ) from None
 
         model_name = config.model

+ 1 - 1
embedchain/llm/huggingface.py

@@ -22,7 +22,7 @@ class HuggingFaceLlm(BaseLlm):
         except ModuleNotFoundError:
             raise ModuleNotFoundError(
                 "The required dependencies for HuggingFaceHub are not installed."
-                'Please install with `pip install --upgrade "embedchain[huggingface-hub]"`'
+                "Please install with `pip install huggingface-hub==0.23.0`"
             ) from None
 
         super().__init__(config=config)

+ 1 - 1
embedchain/loaders/beehiiv.py

@@ -24,7 +24,7 @@ class BeehiivLoader(BaseLoader):
             from bs4.builder import ParserRejectedMarkup
         except ImportError:
             raise ImportError(
-                'Beehiiv requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+                "Beehiiv requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
             ) from None
 
         if not url.endswith("sitemap.xml"):

+ 1 - 1
embedchain/loaders/docs_site_loader.py

@@ -8,7 +8,7 @@ try:
     from bs4 import BeautifulSoup
 except ImportError:
     raise ImportError(
-        'DocsSite requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+        "DocsSite requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
     ) from None
 
 

+ 1 - 3
embedchain/loaders/docx_file.py

@@ -3,9 +3,7 @@ import hashlib
 try:
     from langchain_community.document_loaders import Docx2txtLoader
 except ImportError:
-    raise ImportError(
-        'Docx file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
-    ) from None
+    raise ImportError("Docx file requires extra dependencies. Install with `pip install docx2txt==0.8`") from None
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 

+ 1 - 3
embedchain/loaders/dropbox.py

@@ -17,9 +17,7 @@ class DropboxLoader(BaseLoader):
         try:
             from dropbox import Dropbox, exceptions
         except ImportError:
-            raise ImportError(
-                'Dropbox requires extra dependencies. Install with `pip install --upgrade "embedchain[dropbox]"`'
-            )
+            raise ImportError("Dropbox requires extra dependencies. Install with `pip install dropbox==11.36.2`")
 
         try:
             dbx = Dropbox(access_token)

+ 2 - 1
embedchain/loaders/github.py

@@ -30,7 +30,8 @@ class GithubLoader(BaseLoader):
             from github import Github
         except ImportError as e:
             raise ValueError(
-                "GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[github]'`"
+                "GithubLoader requires extra dependencies. \
+                  Install with `pip install gitpython==3.1.38 PyGithub==1.59.1`"
             ) from e
 
         self.config = config

+ 1 - 6
embedchain/loaders/pdf_file.py

@@ -1,11 +1,6 @@
 import hashlib
 
-try:
-    from langchain_community.document_loaders import PyPDFLoader
-except ImportError:
-    raise ImportError(
-        'PDF File requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
-    ) from None
+from langchain_community.document_loaders import PyPDFLoader
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 from embedchain.utils.misc import clean_string

+ 2 - 3
embedchain/loaders/rss_feed.py

@@ -28,12 +28,11 @@ class RSSFeedLoader(BaseLoader):
     @staticmethod
     def get_rss_content(url: str):
         try:
-            from langchain_community.document_loaders import \
-                RSSFeedLoader as LangchainRSSFeedLoader
+            from langchain_community.document_loaders import RSSFeedLoader as LangchainRSSFeedLoader
         except ImportError:
             raise ImportError(
                 """RSSFeedLoader file requires extra dependencies.
-                Install with `pip install --upgrade "embedchain[rss_feed]"`"""
+                Install with `pip install feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19`"""
             ) from None
 
         output = []

+ 1 - 1
embedchain/loaders/sitemap.py

@@ -12,7 +12,7 @@ try:
     from bs4.builder import ParserRejectedMarkup
 except ImportError:
     raise ImportError(
-        'Sitemap requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+        "Sitemap requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
     ) from None
 
 from embedchain.helpers.json_serializable import register_deserializable

+ 1 - 1
embedchain/loaders/substack.py

@@ -24,7 +24,7 @@ class SubstackLoader(BaseLoader):
             from bs4.builder import ParserRejectedMarkup
         except ImportError:
             raise ImportError(
-                'Substack requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+                "Substack requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
             ) from None
 
         if not url.endswith("sitemap.xml"):

+ 1 - 1
embedchain/loaders/web_page.py

@@ -7,7 +7,7 @@ try:
     from bs4 import BeautifulSoup
 except ImportError:
     raise ImportError(
-        'Webpage requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+        "Webpage requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
     ) from None
 
 from embedchain.helpers.json_serializable import register_deserializable

+ 1 - 1
embedchain/loaders/youtube_channel.py

@@ -18,7 +18,7 @@ class YoutubeChannelLoader(BaseLoader):
             import yt_dlp
         except ImportError as e:
             raise ValueError(
-                "YoutubeLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[youtube_channel]'`"  # noqa: E501
+                "YoutubeChannelLoader requires extra dependencies. Install with `pip install yt_dlp==2023.11.14 youtube-transcript-api==0.6.1`"  # noqa: E501
             ) from e
 
         data = []

+ 2 - 4
embedchain/loaders/youtube_video.py

@@ -5,14 +5,12 @@ import logging
 try:
     from youtube_transcript_api import YouTubeTranscriptApi
 except ImportError:
-    raise ImportError('YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api "`')
+    raise ImportError("YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api`")
 try:
     from langchain_community.document_loaders import YoutubeLoader
     from langchain_community.document_loaders.youtube import _parse_video_id
 except ImportError:
-    raise ImportError(
-        'YouTube video requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
-    ) from None
+    raise ImportError("YouTube video requires extra dependencies. Install with `pip install pytube==15.0.0`") from None
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader
 from embedchain.utils.misc import clean_string

+ 11 - 1
examples/api_server/requirements.txt

@@ -1,2 +1,12 @@
 flask==2.3.2
-embedchain[dataloaders]==0.0.78
+youtube-transcript-api==0.6.1 
+pytube==15.0.0 
+beautifulsoup4==4.12.3
+slack-sdk==3.21.3
+huggingface_hub==0.23.0
+gitpython==3.1.38
+yt_dlp==2023.11.14
+PyGithub==1.59.1
+feedparser==6.0.10
+newspaper3k==0.2.8
+listparser==0.19

+ 19 - 1
examples/rest-api/requirements.txt

@@ -1,6 +1,24 @@
 fastapi==0.104.0
 uvicorn==0.23.2
+streamlit==1.29.0
 embedchain==0.1.3
-embedchain[streamlit, community, opensource, elasticsearch, opensearch, poe, discord, slack, whatsapp, weaviate, pinecone, qdrant, images, huggingface_hub, cohere, together, milvus, dataloaders, vertexai, llama2, gmail, json]==0.1.3
+slack-sdk==3.21.3 
+flask==2.3.3
+fastapi-poe==0.0.16
+discord==2.3.2
+twilio==8.5.0
+huggingface-hub==0.17.3
+embedchain[community, opensource, elasticsearch, opensearch, weaviate, pinecone, qdrant, images, cohere, together, milvus, vertexai, llama2, gmail, json]==0.1.3
 sqlalchemy==2.0.22
 python-multipart==0.0.6
+youtube-transcript-api==0.6.1 
+pytube==15.0.0 
+beautifulsoup4==4.12.3
+slack-sdk==3.21.3
+huggingface_hub==0.23.0
+gitpython==3.1.38
+yt_dlp==2023.11.14
+PyGithub==1.59.1
+feedparser==6.0.10
+newspaper3k==0.2.8
+listparser==0.19

+ 3 - 1
examples/slack_bot/requirements.txt

@@ -1 +1,3 @@
-embedchain[slack, poe]==0.1.7
+slack-sdk==3.21.3 
+flask==2.3.3
+fastapi-poe==0.0.16

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 20 - 777
poetry.lock


+ 0 - 46
pyproject.toml

@@ -105,9 +105,6 @@ gptcache = "^0.1.43"
 pysbd = "^0.3.4"
 memzero = "^0.0.7"
 tiktoken = { version = "^0.7.0", optional = true }
-youtube-transcript-api = { version = "^0.6.1", optional = true }
-pytube = { version = "^15.0.0", optional = true }
-duckduckgo-search = { version = "^6.1.5", optional = true }
 sentence-transformers = { version = "^2.2.2", optional = true }
 torch = { version = "2.3.0", optional = true }
 # Torch 2.0.1 is not compatible with poetry (https://github.com/pytorch/pytorch/issues/100974)
@@ -115,19 +112,11 @@ gpt4all = { version = "2.0.2", optional = true }
 # 1.0.9 is not working for some users (https://github.com/nomic-ai/gpt4all/issues/1394)
 opensearch-py = { version = "2.3.1", optional = true }
 elasticsearch = { version = "^8.9.0", optional = true }
-flask = { version = "^2.3.3", optional = true }
-twilio = { version = "^8.5.0", optional = true }
-fastapi-poe = { version = "0.0.16", optional = true }
-discord = { version = "^2.3.2", optional = true }
-slack-sdk = { version = "3.21.3", optional = true }
-clarifai = { version = "^10.0.1", optional = true }
 cohere = { version = "^5.3", optional = true }
 together = { version = "^0.2.8", optional = true }
 lancedb = { version = "^0.6.2", optional = true }
 weaviate-client = { version = "^3.24.1", optional = true }
-docx2txt = { version = "^0.8", optional = true }
 qdrant-client = { version = "^1.6.3", optional = true }
-huggingface_hub = { version = "^0.17.3", optional = true }
 pymilvus = { version = "2.4.3", optional = true }
 google-cloud-aiplatform = { version = "^1.26.1", optional = true }
 replicate = { version = "^0.15.4", optional = true }
@@ -136,21 +125,12 @@ psycopg = { version = "^3.1.12", optional = true }
 psycopg-binary = { version = "^3.1.12", optional = true }
 psycopg-pool = { version = "^3.1.8", optional = true }
 mysql-connector-python = { version = "^8.1.0", optional = true }
-gitpython = { version = "^3.1.38", optional = true }
-yt_dlp = { version = "^2023.11.14", optional = true }
-PyGithub = { version = "^1.59.1", optional = true }
-feedparser = { version = "^6.0.10", optional = true }
-newspaper3k = { version = "^0.2.8", optional = true }
-listparser = { version = "^0.19", optional = true }
 google-generativeai = { version = "^0.3.0", optional = true }
-modal = { version = "^0.56.4329", optional = true }
-dropbox = { version = "^11.36.2", optional = true }
 google-api-python-client = { version = "^2.111.0", optional = true }
 google-auth-oauthlib = { version = "^1.2.0", optional = true }
 google-auth = { version = "^2.25.2", optional = true }
 google-auth-httplib2 = { version = "^0.2.0", optional = true }
 google-api-core = { version = "^2.15.0", optional = true }
-boto3 = { version = "^1.34.20", optional = true }
 langchain-mistralai = { version = "^0.1.9", optional = true }
 langchain-openai = "^0.1.7"
 langchain-google-vertexai = { version = "^1.0.6", optional = true }
@@ -174,27 +154,14 @@ mock = "^5.1.0"
 pytest-asyncio = "^0.21.1"
 
 [tool.poetry.extras]
-streamlit = ["streamlit"]
 opensource = ["sentence-transformers", "torch", "gpt4all"]
 lancedb = ["lancedb"]
 elasticsearch = ["elasticsearch"]
 opensearch = ["opensearch-py"]
-poe = ["fastapi-poe"]
-discord = ["discord"]
-slack = ["slack-sdk", "flask"]
-whatsapp = ["twilio", "flask"]
 weaviate = ["weaviate-client"]
 qdrant = ["qdrant-client"]
 together = ["together"]
-huggingface_hub=["huggingface_hub"]
 milvus = ["pymilvus"]
-dataloaders=[
-    "youtube-transcript-api",
-    "docx2txt",
-    "duckduckgo-search",
-    "pytube",
-    "sentence-transformers"
-]
 vertexai = ["langchain-google-vertexai"]
 llama2 = ["replicate"]
 gmail = [
@@ -208,20 +175,7 @@ gmail = [
 googledrive = ["google-api-python-client", "google-auth-oauthlib", "google-auth-httplib2"]
 postgres = ["psycopg", "psycopg-binary", "psycopg-pool"]
 mysql = ["mysql-connector-python"]
-github = ["PyGithub", "gitpython"]
-youtube = [
-    "yt_dlp",
-    "youtube-transcript-api",
-]
-rss_feed = [
-    "feedparser",
-    "listparser",
-    "newspaper3k"
-]
 google = ["google-generativeai"]
-modal = ["modal"]
-dropbox = ["dropbox"]
-aws_bedrock = ["boto3"]
 mistralai = ["langchain-mistralai"]
 
 [tool.poetry.group.docs.dependencies]

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov