CustomAppConfig.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. from typing import Any, Optional
  2. from chromadb.api.types import Documents, Embeddings
  3. from dotenv import load_dotenv
  4. from embedchain.config.vectordbs import ElasticsearchDBConfig
  5. from embedchain.helper_classes.json_serializable import register_deserializable
  6. from embedchain.models import (EmbeddingFunctions, Providers, VectorDatabases,
  7. VectorDimensions)
  8. from .BaseAppConfig import BaseAppConfig
  9. load_dotenv()
  10. @register_deserializable
  11. class CustomAppConfig(BaseAppConfig):
  12. """
  13. Config to initialize an embedchain custom `App` instance, with extra config options.
  14. """
  15. def __init__(
  16. self,
  17. log_level=None,
  18. embedding_fn: EmbeddingFunctions = None,
  19. embedding_fn_model=None,
  20. db=None,
  21. host=None,
  22. port=None,
  23. id=None,
  24. collection_name=None,
  25. provider: Providers = None,
  26. open_source_app_config=None,
  27. deployment_name=None,
  28. collect_metrics: Optional[bool] = None,
  29. db_type: VectorDatabases = None,
  30. es_config: ElasticsearchDBConfig = None,
  31. chroma_settings: dict = {},
  32. ):
  33. """
  34. :param log_level: Optional. (String) Debug level
  35. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  36. :param embedding_fn: Optional. Embedding function to use.
  37. :param embedding_fn_model: Optional. Model name to use for embedding function.
  38. :param db: Optional. (Vector) database to use for embeddings.
  39. :param host: Optional. Hostname for the database server.
  40. :param port: Optional. Port for the database server.
  41. :param id: Optional. ID of the app. Document metadata will have this id.
  42. :param collection_name: Optional. Collection name for the database.
  43. :param provider: Optional. (Providers): LLM Provider to use.
  44. :param open_source_app_config: Optional. Config instance needed for open source apps.
  45. :param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
  46. :param db_type: Optional. type of Vector database to use.
  47. :param es_config: Optional. elasticsearch database config to be used for connection
  48. :param chroma_settings: Optional. Chroma settings for connection.
  49. """
  50. if provider:
  51. self.provider = provider
  52. else:
  53. raise ValueError("CustomApp must have a provider assigned.")
  54. self.open_source_app_config = open_source_app_config
  55. super().__init__(
  56. log_level=log_level,
  57. embedding_fn=CustomAppConfig.embedding_function(
  58. embedding_function=embedding_fn, model=embedding_fn_model, deployment_name=deployment_name
  59. ),
  60. db=db,
  61. host=host,
  62. port=port,
  63. id=id,
  64. collection_name=collection_name,
  65. collect_metrics=collect_metrics,
  66. db_type=db_type,
  67. vector_dim=CustomAppConfig.get_vector_dimension(embedding_function=embedding_fn),
  68. es_config=es_config,
  69. chroma_settings=chroma_settings,
  70. )
  71. @staticmethod
  72. def langchain_default_concept(embeddings: Any):
  73. """
  74. Langchains default function layout for embeddings.
  75. """
  76. def embed_function(texts: Documents) -> Embeddings:
  77. return embeddings.embed_documents(texts)
  78. return embed_function
  79. @staticmethod
  80. def embedding_function(embedding_function: EmbeddingFunctions, model: str = None, deployment_name: str = None):
  81. if not isinstance(embedding_function, EmbeddingFunctions):
  82. raise ValueError(
  83. f"Invalid option: '{embedding_function}'. Expecting one of the following options: {list(map(lambda x: x.value, EmbeddingFunctions))}" # noqa: E501
  84. )
  85. if embedding_function == EmbeddingFunctions.OPENAI:
  86. from langchain.embeddings import OpenAIEmbeddings
  87. if model:
  88. embeddings = OpenAIEmbeddings(model=model)
  89. else:
  90. if deployment_name:
  91. embeddings = OpenAIEmbeddings(deployment=deployment_name)
  92. else:
  93. embeddings = OpenAIEmbeddings()
  94. return CustomAppConfig.langchain_default_concept(embeddings)
  95. elif embedding_function == EmbeddingFunctions.HUGGING_FACE:
  96. from langchain.embeddings import HuggingFaceEmbeddings
  97. embeddings = HuggingFaceEmbeddings(model_name=model)
  98. return CustomAppConfig.langchain_default_concept(embeddings)
  99. elif embedding_function == EmbeddingFunctions.VERTEX_AI:
  100. from langchain.embeddings import VertexAIEmbeddings
  101. embeddings = VertexAIEmbeddings(model_name=model)
  102. return CustomAppConfig.langchain_default_concept(embeddings)
  103. elif embedding_function == EmbeddingFunctions.GPT4ALL:
  104. # Note: We could use langchains GPT4ALL embedding, but it's not available in all versions.
  105. from chromadb.utils import embedding_functions
  106. return embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model)
  107. @staticmethod
  108. def get_vector_dimension(embedding_function: EmbeddingFunctions):
  109. if not isinstance(embedding_function, EmbeddingFunctions):
  110. raise ValueError(f"Invalid option: '{embedding_function}'.")
  111. if embedding_function == EmbeddingFunctions.OPENAI:
  112. return VectorDimensions.OPENAI.value
  113. elif embedding_function == EmbeddingFunctions.HUGGING_FACE:
  114. return VectorDimensions.HUGGING_FACE.value
  115. elif embedding_function == EmbeddingFunctions.VERTEX_AI:
  116. return VectorDimensions.VERTEX_AI.value
  117. elif embedding_function == EmbeddingFunctions.GPT4ALL:
  118. return VectorDimensions.GPT4ALL.value