Dev Khant il y a 1 an
Parent
commit
b0e436d9c4

+ 1 - 1
.pre-commit-config.yaml

@@ -4,7 +4,7 @@ repos:
     hooks:
       - id: black
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.0.220'
+    rev: 'v0.0.252'
     hooks:
       - id: ruff
         name: ruff

+ 1 - 1
Makefile

@@ -11,7 +11,7 @@ install:
 
 install_all:
 	poetry install --all-extras
-	poetry run pip install pinecone-text pinecone-client langchain-anthropic
+	poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]"
 
 install_es:
 	poetry install --extras elasticsearch

+ 2 - 1
embedchain/loaders/excel_file.py

@@ -2,10 +2,11 @@ import hashlib
 import importlib.util
 
 try:
+    import unstructured  # noqa: F401
     from langchain_community.document_loaders import UnstructuredExcelLoader
 except ImportError:
     raise ImportError(
-        'Excel file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+        'Excel file requires extra dependencies. Install with `pip install "unstructured[local-inference, all-docs]"`'
     ) from None
 
 if importlib.util.find_spec("openpyxl") is None and importlib.util.find_spec("xlrd") is None:

+ 8 - 1
embedchain/loaders/google_drive.py

@@ -9,7 +9,14 @@ except ImportError:
     ) from None
 
 from langchain_community.document_loaders import GoogleDriveLoader as Loader
-from langchain_community.document_loaders import UnstructuredFileIOLoader
+
+try:
+    import unstructured  # noqa: F401
+    from langchain_community.document_loaders import UnstructuredFileIOLoader
+except ImportError:
+    raise ImportError(
+        'Unstructured file requires extra dependencies. Install with `pip install "unstructured[local-inference, all-docs]"`'  # noqa: E501
+    ) from None
 
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader

+ 3 - 3
embedchain/loaders/unstructured_file.py

@@ -10,11 +10,11 @@ class UnstructuredLoader(BaseLoader):
     def load_data(self, url):
         """Load data from an Unstructured file."""
         try:
-            from langchain_community.document_loaders import \
-                UnstructuredFileLoader
+            import unstructured  # noqa: F401
+            from langchain_community.document_loaders import UnstructuredFileLoader
         except ImportError:
             raise ImportError(
-                'Unstructured file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'  # noqa: E501
+                'Unstructured file requires extra dependencies. Install with `pip install "unstructured[local-inference, all-docs]"`'  # noqa: E501
             ) from None
 
         loader = UnstructuredFileLoader(url)

+ 2 - 1
embedchain/loaders/xml.py

@@ -1,10 +1,11 @@
 import hashlib
 
 try:
+    import unstructured  # noqa: F401
     from langchain_community.document_loaders import UnstructuredXMLLoader
 except ImportError:
     raise ImportError(
-        'XML file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
+        'XML file requires extra dependencies. Install with `pip install "unstructured[local-inference, all-docs]"`'
     ) from None
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.loaders.base_loader import BaseLoader

Fichier diff supprimé car celui-ci est trop grand
+ 346 - 457
poetry.lock


+ 3 - 5
pyproject.toml

@@ -108,7 +108,7 @@ youtube-transcript-api = { version = "^0.6.1", optional = true }
 pytube = { version = "^15.0.0", optional = true }
 duckduckgo-search = { version = "^3.8.5", optional = true }
 sentence-transformers = { version = "^2.2.2", optional = true }
-torch = { version = "2.0.0", optional = true }
+torch = { version = "2.3.0", optional = true }
 # Torch 2.0.1 is not compatible with poetry (https://github.com/pytorch/pytorch/issues/100974)
 gpt4all = { version = "2.0.2", optional = true }
 # 1.0.9 is not working for some users (https://github.com/nomic-ai/gpt4all/issues/1394)
@@ -124,9 +124,8 @@ together = { version = "^0.2.8", optional = true }
 weaviate-client = { version = "^3.24.1", optional = true }
 docx2txt = { version = "^0.8", optional = true }
 qdrant-client = { version = "^1.6.3", optional = true }
-unstructured = {extras = ["local-inference", "all-docs"], version = "^0.10.18", optional = true}
 huggingface_hub = { version = "^0.17.3", optional = true }
-pymilvus = { version = "2.3.1", optional = true }
+pymilvus = { version = "2.4.3", optional = true }
 google-cloud-aiplatform = { version = "^1.26.1", optional = true }
 replicate = { version = "^0.15.4", optional = true }
 schema = "^0.7.5"
@@ -190,8 +189,7 @@ dataloaders=[
     "docx2txt",
     "duckduckgo-search",
     "pytube",
-    "sentence-transformers",
-    "unstructured"
+    "sentence-transformers"
 ]
 vertexai = ["langchain-google-vertexai"]
 llama2 = ["replicate"]

Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff