il y a 1 an · 56bf33ab7f
--- a/docs/components/llms.mdx
+++ b/docs/components/llms.mdx
@@ -451,7 +451,15 @@ pip install --upgrade 'embedchain[huggingface-hub]'
 
				 
			
 
				 First, set `HUGGINGFACE_ACCESS_TOKEN` in environment variable which you can obtain from [their platform](https://huggingface.co/settings/tokens).
			
 
				 
			
 
				-Once you have the token, load the app using the config yaml file:
			
 
				+You can load the LLMs from Hugging Face using three ways:
			
 
				+
			
 
				+- [Hugging Face Hub](#hugging-face-hub)
			
 
				+- [Hugging Face Local Pipelines](#hugging-face-local-pipelines)
			
 
				+- [Hugging Face Inference Endpoint](#hugging-face-inference-endpoint)
			
 
				+
			
 
				+### Hugging Face Hub
			
 
				+
			
 
				+To load the model from Hugging Face Hub, use the following code:
			
 
				 
			
 
				 <CodeGroup>
			
 
				 
			
@@ -461,24 +469,49 @@ from embedchain import App
 
				 
			
 
				 os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
			
 
				 
			
 
				-# load llm configuration from config.yaml file
			
 
				-app = App.from_config(config_path="config.yaml")
			
 
				-```
			
 
				+config = {
			
 
				+  "app": {"config": {"id": "my-app"}},
			
 
				+  "llm": {
			
 
				+      "provider": "huggingface",
			
 
				+      "config": {
			
 
				+          "model": "bigscience/bloom-1b7",
			
 
				+          "top_p": 0.5,
			
 
				+          "max_length": 200,
			
 
				+          "temperature": 0.1,
			
 
				+      },
			
 
				+  },
			
 
				+}
			
 
				 
			
 
				-```yaml config.yaml
			
 
				-llm:
			
 
				-  provider: huggingface
			
 
				-  config:
			
 
				-    model: 'google/flan-t5-xxl'
			
 
				-    temperature: 0.5
			
 
				-    max_tokens: 1000
			
 
				-    top_p: 0.5
			
 
				-    stream: false
			
 
				+app = App.from_config(config=config)
			
 
				 ```
			
 
				 </CodeGroup>
			
 
				 
			
 
				-### Custom Endpoints
			
 
				+### Hugging Face Local Pipelines
			
 
				+
			
 
				+If you want to load the locally downloaded model from Hugging Face, you can do so by following the code provided below:
			
 
				 
			
 
				+<CodeGroup>
			
 
				+```python main.py
			
 
				+from embedchain import App
			
 
				+
			
 
				+config = {
			
 
				+  "app": {"config": {"id": "my-app"}},
			
 
				+  "llm": {
			
 
				+      "provider": "huggingface",
			
 
				+      "config": {
			
 
				+          "model": "Trendyol/Trendyol-LLM-7b-chat-v0.1",
			
 
				+          "local": True,  # Necessary if you want to run model locally
			
 
				+          "top_p": 0.5,
			
 
				+          "max_tokens": 1000,
			
 
				+          "temperature": 0.1,
			
 
				+      },
			
 
				+  }
			
 
				+}
			
 
				+app = App.from_config(config=config)
			
 
				+```
			
 
				+</CodeGroup>
			
 
				+
			
 
				+### Hugging Face Inference Endpoint
			
 
				 
			
 
				 You can also use [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index#-inference-endpoints) to access custom endpoints. First, set the `HUGGINGFACE_ACCESS_TOKEN` as above.
			
 
				 
			
@@ -487,35 +520,23 @@ Then, load the app using the config yaml file:
 
				 <CodeGroup>
			
 
				 
			
 
				 ```python main.py
			
 
				-import os
			
 
				 from embedchain import App
			
 
				 
			
 
				-os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
			
 
				-
			
 
				-# load llm configuration from config.yaml file
			
 
				-app = App.from_config(config_path="config.yaml")
			
 
				-```
			
 
				+config = {
			
 
				+  "app": {"config": {"id": "my-app"}},
			
 
				+  "llm": {
			
 
				+      "provider": "huggingface",
			
 
				+      "config": {
			
 
				+        "endpoint": "https://api-inference.huggingface.co/models/gpt2",
			
 
				+        "model_params": {"temprature": 0.1, "max_new_tokens": 100}
			
 
				+      },
			
 
				+  },
			
 
				+}
			
 
				+app = App.from_config(config=config)
			
 
				 
			
 
				-```yaml config.yaml
			
 
				-llm:
			
 
				-  provider: huggingface
			
 
				-  config:
			
 
				-    endpoint: https://api-inference.huggingface.co/models/gpt2 # replace with your personal endpoint
			
 
				 ```
			
 
				 </CodeGroup>
			
 
				 
			
 
				-If your endpoint requires additional parameters, you can pass them in the `model_kwargs` field:
			
 
				-
			
 
				-```
			
 
				-llm:
			
 
				-  provider: huggingface
			
 
				-  config:
			
 
				-    endpoint: <YOUR_ENDPOINT_URL_HERE>
			
 
				-    model_kwargs:
			
 
				-      max_new_tokens: 100
			
 
				-      temperature: 0.5
			
 
				-```
			
 
				-
			
 
				 Currently only supports `text-generation` and `text2text-generation` for now [[ref](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html?highlight=huggingfaceendpoint#)].
			
 
				 
			
 
				 See langchain's [hugging face endpoint](https://python.langchain.com/docs/integrations/chat/huggingface#huggingfaceendpoint) for more information. 
			
--- a/embedchain/config/llm/base.py
+++ b/embedchain/config/llm/base.py
@@ -95,6 +95,7 @@ class BaseLlmConfig(BaseConfig):
 
				         api_key: Optional[str] = None,
			
 
				         endpoint: Optional[str] = None,
			
 
				         model_kwargs: Optional[dict[str, Any]] = None,
			
 
				+        local: Optional[bool] = False,
			
 
				     ):
			
 
				         """
			
 
				         Initializes a configuration class instance for the LLM.
			
@@ -138,6 +139,8 @@ class BaseLlmConfig(BaseConfig):
 
				         :type callbacks: Optional[list], optional
			
 
				         :param query_type: The type of query to use, defaults to None
			
 
				         :type query_type: Optional[str], optional
			
 
				+        :param local: If True, the model will be run locally, defaults to False (for huggingface provider)
			
 
				+        :type local: Optional[bool], optional
			
 
				         :raises ValueError: If the template is not valid as template should
			
 
				         contain $context and $query (and optionally $history)
			
 
				         :raises ValueError: Stream is not boolean
			
@@ -165,6 +168,7 @@ class BaseLlmConfig(BaseConfig):
 
				         self.api_key = api_key
			
 
				         self.endpoint = endpoint
			
 
				         self.model_kwargs = model_kwargs
			
 
				+        self.local = local
			
 
				 
			
 
				         if isinstance(prompt, str):
			
 
				             prompt = Template(prompt)
			
--- a/embedchain/llm/huggingface.py
+++ b/embedchain/llm/huggingface.py
@@ -5,6 +5,7 @@ from typing import Optional
 
				 
			
 
				 from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
			
 
				 from langchain_community.llms.huggingface_hub import HuggingFaceHub
			
 
				+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
			
 
				 
			
 
				 from embedchain.config import BaseLlmConfig
			
 
				 from embedchain.helpers.json_serializable import register_deserializable
			
@@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm):
 
				 
			
 
				     @staticmethod
			
 
				     def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
			
 
				-        if config.model:
			
 
				+        # If the user wants to run the model locally, they can do so by setting the `local` flag to True
			
 
				+        if config.model and config.local:
			
 
				+            return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config)
			
 
				+        elif config.model:
			
 
				             return HuggingFaceLlm._from_model(prompt=prompt, config=config)
			
 
				         elif config.endpoint:
			
 
				             return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
			
 
				         else:
			
 
				-            raise ValueError("Either `model` or `endpoint` must be set")
			
 
				+            raise ValueError("Either `model` or `endpoint` must be set in config")
			
 
				 
			
 
				     @staticmethod
			
 
				     def _from_model(prompt: str, config: BaseLlmConfig) -> str:
			
@@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm):
 
				         else:
			
 
				             raise ValueError("`top_p` must be > 0.0 and < 1.0")
			
 
				 
			
 
				-        model = config.model or "google/flan-t5-xxl"
			
 
				+        model = config.model
			
 
				         logging.info(f"Using HuggingFaceHub with model {model}")
			
 
				         llm = HuggingFaceHub(
			
 
				             huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
			
 
				             repo_id=model,
			
 
				             model_kwargs=model_kwargs,
			
 
				         )
			
 
				-
			
 
				-        return llm(prompt)
			
 
				+        return llm.invoke(prompt)
			
 
				 
			
 
				     @staticmethod
			
 
				     def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
			
@@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm):
 
				             task="text-generation",
			
 
				             model_kwargs=config.model_kwargs,
			
 
				         )
			
 
				-        return llm(prompt)
			
 
				+        return llm.invoke(prompt)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str:
			
 
				+        model_kwargs = {
			
 
				+            "temperature": config.temperature or 0.1,
			
 
				+            "max_new_tokens": config.max_tokens,
			
 
				+        }
			
 
				+
			
 
				+        if 0.0 < config.top_p < 1.0:
			
 
				+            model_kwargs["top_p"] = config.top_p
			
 
				+        else:
			
 
				+            raise ValueError("`top_p` must be > 0.0 and < 1.0")
			
 
				+
			
 
				+        llm = HuggingFacePipeline.from_model_id(
			
 
				+            model_id=config.model,
			
 
				+            task="text-generation",
			
 
				+            pipeline_kwargs=model_kwargs,
			
 
				+        )
			
 
				+        return llm.invoke(prompt)
			
--- a/embedchain/utils/misc.py
+++ b/embedchain/utils/misc.py
@@ -425,6 +425,7 @@ def validate_config(config_data):
 
				                     Optional("api_key"): str,
			
 
				                     Optional("endpoint"): str,
			
 
				                     Optional("model_kwargs"): dict,
			
 
				+                    Optional("local"): bool,
			
 
				                 },
			
 
				             },
			
 
				             Optional("vectordb"): {
			
--- a/tests/llm/test_huggingface.py
+++ b/tests/llm/test_huggingface.py
@@ -62,18 +62,19 @@ def test_get_llm_model_answer(huggingface_llm_config, mocker):
 
				 
			
 
				 def test_hugging_face_mock(huggingface_llm_config, mocker):
			
 
				     mock_llm_instance = mocker.Mock(return_value="Test answer")
			
 
				-    mocker.patch("embedchain.llm.huggingface.HuggingFaceHub", return_value=mock_llm_instance)
			
 
				+    mock_hf_hub = mocker.patch("embedchain.llm.huggingface.HuggingFaceHub")
			
 
				+    mock_hf_hub.return_value.invoke = mock_llm_instance
			
 
				 
			
 
				     llm = HuggingFaceLlm(huggingface_llm_config)
			
 
				     answer = llm.get_llm_model_answer("Test query")
			
 
				-
			
 
				     assert answer == "Test answer"
			
 
				     mock_llm_instance.assert_called_once_with("Test query")
			
 
				 
			
 
				 
			
 
				 def test_custom_endpoint(huggingface_endpoint_config, mocker):
			
 
				     mock_llm_instance = mocker.Mock(return_value="Test answer")
			
 
				-    mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint", return_value=mock_llm_instance)
			
 
				+    mock_hf_endpoint = mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint")
			
 
				+    mock_hf_endpoint.return_value.invoke = mock_llm_instance
			
 
				 
			
 
				     llm = HuggingFaceLlm(huggingface_endpoint_config)
			
 
				     answer = llm.get_llm_model_answer("Test query")