huggingface.py 1.4 KB

123456789101112131415161718192021222324252627282930313233
  1. import os
  2. from typing import Optional
  3. from langchain_community.embeddings import HuggingFaceEmbeddings
  4. from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
  5. from embedchain.config import BaseEmbedderConfig
  6. from embedchain.embedder.base import BaseEmbedder
  7. from embedchain.models import VectorDimensions
  8. class HuggingFaceEmbedder(BaseEmbedder):
  9. def __init__(self, config: Optional[BaseEmbedderConfig] = None):
  10. super().__init__(config=config)
  11. if self.config.endpoint:
  12. if not self.config.api_key and "HUGGINGFACE_ACCESS_TOKEN" not in os.environ:
  13. raise ValueError(
  14. "Please set the HUGGINGFACE_ACCESS_TOKEN environment variable or pass API Key in the config."
  15. )
  16. embeddings = HuggingFaceInferenceAPIEmbeddings(
  17. model_name=self.config.model,
  18. api_url=self.config.endpoint,
  19. api_key=self.config.api_key or os.getenv("HUGGINGFACE_ACCESS_TOKEN"),
  20. )
  21. else:
  22. embeddings = HuggingFaceEmbeddings(model_name=self.config.model)
  23. embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
  24. self.set_embedding_fn(embedding_fn=embedding_fn)
  25. vector_dimension = self.config.vector_dimension or VectorDimensions.HUGGING_FACE.value
  26. self.set_vector_dimension(vector_dimension=vector_dimension)