|
@@ -5,6 +5,7 @@ from typing import Optional
|
|
|
|
|
|
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
|
|
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
|
|
from langchain_community.llms.huggingface_hub import HuggingFaceHub
|
|
from langchain_community.llms.huggingface_hub import HuggingFaceHub
|
|
|
|
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
|
|
|
|
|
from embedchain.config import BaseLlmConfig
|
|
from embedchain.config import BaseLlmConfig
|
|
from embedchain.helpers.json_serializable import register_deserializable
|
|
from embedchain.helpers.json_serializable import register_deserializable
|
|
@@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm):
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
|
|
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
|
|
- if config.model:
|
|
|
|
|
|
+ # If the user wants to run the model locally, they can do so by setting the `local` flag to True
|
|
|
|
+ if config.model and config.local:
|
|
|
|
+ return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config)
|
|
|
|
+ elif config.model:
|
|
return HuggingFaceLlm._from_model(prompt=prompt, config=config)
|
|
return HuggingFaceLlm._from_model(prompt=prompt, config=config)
|
|
elif config.endpoint:
|
|
elif config.endpoint:
|
|
return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
|
|
return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
|
|
else:
|
|
else:
|
|
- raise ValueError("Either `model` or `endpoint` must be set")
|
|
|
|
|
|
+ raise ValueError("Either `model` or `endpoint` must be set in config")
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
def _from_model(prompt: str, config: BaseLlmConfig) -> str:
|
|
def _from_model(prompt: str, config: BaseLlmConfig) -> str:
|
|
@@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm):
|
|
else:
|
|
else:
|
|
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
|
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
|
|
|
|
|
- model = config.model or "google/flan-t5-xxl"
|
|
|
|
|
|
+ model = config.model
|
|
logging.info(f"Using HuggingFaceHub with model {model}")
|
|
logging.info(f"Using HuggingFaceHub with model {model}")
|
|
llm = HuggingFaceHub(
|
|
llm = HuggingFaceHub(
|
|
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
|
|
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
|
|
repo_id=model,
|
|
repo_id=model,
|
|
model_kwargs=model_kwargs,
|
|
model_kwargs=model_kwargs,
|
|
)
|
|
)
|
|
-
|
|
|
|
- return llm(prompt)
|
|
|
|
|
|
+ return llm.invoke(prompt)
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
|
|
def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
|
|
@@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm):
|
|
task="text-generation",
|
|
task="text-generation",
|
|
model_kwargs=config.model_kwargs,
|
|
model_kwargs=config.model_kwargs,
|
|
)
|
|
)
|
|
- return llm(prompt)
|
|
|
|
|
|
+ return llm.invoke(prompt)
|
|
|
|
+
|
|
|
|
+ @staticmethod
|
|
|
|
+ def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str:
|
|
|
|
+ model_kwargs = {
|
|
|
|
+ "temperature": config.temperature or 0.1,
|
|
|
|
+ "max_new_tokens": config.max_tokens,
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if 0.0 < config.top_p < 1.0:
|
|
|
|
+ model_kwargs["top_p"] = config.top_p
|
|
|
|
+ else:
|
|
|
|
+ raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
|
|
|
+
|
|
|
|
+ llm = HuggingFacePipeline.from_model_id(
|
|
|
|
+ model_id=config.model,
|
|
|
|
+ task="text-generation",
|
|
|
|
+ pipeline_kwargs=model_kwargs,
|
|
|
|
+ )
|
|
|
|
+ return llm.invoke(prompt)
|