123456789101112131415161718192021222324252627282930313233 |
- import os
- from typing import Optional
- from langchain_community.embeddings import HuggingFaceEmbeddings
- from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
- from embedchain.config import BaseEmbedderConfig
- from embedchain.embedder.base import BaseEmbedder
- from embedchain.models import VectorDimensions
- class HuggingFaceEmbedder(BaseEmbedder):
- def __init__(self, config: Optional[BaseEmbedderConfig] = None):
- super().__init__(config=config)
- if self.config.endpoint:
- if not self.config.api_key and "HUGGINGFACE_ACCESS_TOKEN" not in os.environ:
- raise ValueError(
- "Please set the HUGGINGFACE_ACCESS_TOKEN environment variable or pass API Key in the config."
- )
- embeddings = HuggingFaceInferenceAPIEmbeddings(
- model_name=self.config.model,
- api_url=self.config.endpoint,
- api_key=self.config.api_key or os.getenv("HUGGINGFACE_ACCESS_TOKEN"),
- )
- else:
- embeddings = HuggingFaceEmbeddings(model_name=self.config.model)
- embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
- self.set_embedding_fn(embedding_fn=embedding_fn)
- vector_dimension = self.config.vector_dimension or VectorDimensions.HUGGING_FACE.value
- self.set_vector_dimension(vector_dimension=vector_dimension)
|