openai_embedder.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import os
  2. from typing import Optional
  3. from langchain.embeddings import OpenAIEmbeddings
  4. from embedchain.config import BaseEmbedderConfig
  5. from embedchain.embedder.base_embedder import BaseEmbedder
  6. from embedchain.models import EmbeddingFunctions
  7. try:
  8. from chromadb.utils import embedding_functions
  9. except RuntimeError:
  10. from embedchain.utils import use_pysqlite3
  11. use_pysqlite3()
  12. from chromadb.utils import embedding_functions
  13. class OpenAiEmbedder(BaseEmbedder):
  14. def __init__(self, config: Optional[BaseEmbedderConfig] = None):
  15. super().__init__(config=config)
  16. if self.config.model is None:
  17. self.config.model = "text-embedding-ada-002"
  18. if self.config.deployment_name:
  19. embeddings = OpenAIEmbeddings(deployment=self.config.deployment_name)
  20. embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
  21. else:
  22. if os.getenv("OPENAI_API_KEY") is None and os.getenv("OPENAI_ORGANIZATION") is None:
  23. raise ValueError(
  24. "OPENAI_API_KEY or OPENAI_ORGANIZATION environment variables not provided"
  25. ) # noqa:E501
  26. embedding_fn = embedding_functions.OpenAIEmbeddingFunction(
  27. api_key=os.getenv("OPENAI_API_KEY"),
  28. organization_id=os.getenv("OPENAI_ORGANIZATION"),
  29. model_name=self.config.model,
  30. )
  31. self.set_embedding_fn(embedding_fn=embedding_fn)
  32. self.set_vector_dimension(vector_dimension=EmbeddingFunctions.OPENAI.value)