CustomAppConfig.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. from typing import Any
  2. from chromadb.api.types import Documents, Embeddings
  3. from dotenv import load_dotenv
  4. from embedchain.models import EmbeddingFunctions, Providers
  5. from .BaseAppConfig import BaseAppConfig
  6. load_dotenv()
  7. class CustomAppConfig(BaseAppConfig):
  8. """
  9. Config to initialize an embedchain custom `App` instance, with extra config options.
  10. """
  11. def __init__(
  12. self,
  13. log_level=None,
  14. embedding_fn: EmbeddingFunctions = None,
  15. embedding_fn_model=None,
  16. db=None,
  17. host=None,
  18. port=None,
  19. id=None,
  20. provider: Providers = None,
  21. model=None,
  22. open_source_app_config=None,
  23. deployment_name=None,
  24. ):
  25. """
  26. :param log_level: Optional. (String) Debug level
  27. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  28. :param embedding_fn: Optional. Embedding function to use.
  29. :param embedding_fn_model: Optional. Model name to use for embedding function.
  30. :param db: Optional. (Vector) database to use for embeddings.
  31. :param id: Optional. ID of the app. Document metadata will have this id.
  32. :param host: Optional. Hostname for the database server.
  33. :param port: Optional. Port for the database server.
  34. :param provider: Optional. (Providers): LLM Provider to use.
  35. :param open_source_app_config: Optional. Config instance needed for open source apps.
  36. """
  37. if provider:
  38. self.provider = provider
  39. else:
  40. raise ValueError("CustomApp must have a provider assigned.")
  41. self.open_source_app_config = open_source_app_config
  42. super().__init__(
  43. log_level=log_level,
  44. embedding_fn=CustomAppConfig.embedding_function(
  45. embedding_function=embedding_fn, model=embedding_fn_model,
  46. deployment_name=deployment_name
  47. ),
  48. db=db,
  49. host=host,
  50. port=port,
  51. id=id,
  52. )
  53. @staticmethod
  54. def langchain_default_concept(embeddings: Any):
  55. """
  56. Langchains default function layout for embeddings.
  57. """
  58. def embed_function(texts: Documents) -> Embeddings:
  59. return embeddings.embed_documents(texts)
  60. return embed_function
  61. @staticmethod
  62. def embedding_function(embedding_function: EmbeddingFunctions, model: str = None, deployment_name: str = None):
  63. if not isinstance(embedding_function, EmbeddingFunctions):
  64. raise ValueError(
  65. f"Invalid option: '{embedding_function}'. Expecting one of the following options: {list(map(lambda x: x.value, EmbeddingFunctions))}" # noqa: E501
  66. )
  67. if embedding_function == EmbeddingFunctions.OPENAI:
  68. from langchain.embeddings import OpenAIEmbeddings
  69. if model:
  70. embeddings = OpenAIEmbeddings(model=model)
  71. else:
  72. if deployment_name:
  73. embeddings = OpenAIEmbeddings(deployment=deployment_name)
  74. else:
  75. embeddings = OpenAIEmbeddings()
  76. return CustomAppConfig.langchain_default_concept(embeddings)
  77. elif embedding_function == EmbeddingFunctions.HUGGING_FACE:
  78. from langchain.embeddings import HuggingFaceEmbeddings
  79. embeddings = HuggingFaceEmbeddings(model_name=model)
  80. return CustomAppConfig.langchain_default_concept(embeddings)
  81. elif embedding_function == EmbeddingFunctions.VERTEX_AI:
  82. from langchain.embeddings import VertexAIEmbeddings
  83. embeddings = VertexAIEmbeddings(model_name=model)
  84. return CustomAppConfig.langchain_default_concept(embeddings)
  85. elif embedding_function == EmbeddingFunctions.GPT4ALL:
  86. # Note: We could use langchains GPT4ALL embedding, but it's not available in all versions.
  87. from chromadb.utils import embedding_functions
  88. return embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model)