app.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. from typing import Optional
  2. import yaml
  3. from embedchain.config import AppConfig, BaseEmbedderConfig, BaseLlmConfig
  4. from embedchain.config.vectordb.base import BaseVectorDbConfig
  5. from embedchain.embedchain import EmbedChain
  6. from embedchain.embedder.base import BaseEmbedder
  7. from embedchain.embedder.openai import OpenAIEmbedder
  8. from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
  9. from embedchain.helper.json_serializable import register_deserializable
  10. from embedchain.llm.base import BaseLlm
  11. from embedchain.llm.openai import OpenAILlm
  12. from embedchain.utils import validate_yaml_config
  13. from embedchain.vectordb.base import BaseVectorDB
  14. from embedchain.vectordb.chroma import ChromaDB
  15. @register_deserializable
  16. class App(EmbedChain):
  17. """
  18. The EmbedChain app in it's simplest and most straightforward form.
  19. An opinionated choice of LLM, vector database and embedding model.
  20. Methods:
  21. add(source, data_type): adds the data from the given URL to the vector db.
  22. query(query): finds answer to the given query using vector database and LLM.
  23. chat(query): finds answer to the given query using vector database and LLM, with conversation history.
  24. """
  25. def __init__(
  26. self,
  27. config: Optional[AppConfig] = None,
  28. llm: BaseLlm = None,
  29. llm_config: Optional[BaseLlmConfig] = None,
  30. db: BaseVectorDB = None,
  31. db_config: Optional[BaseVectorDbConfig] = None,
  32. embedder: BaseEmbedder = None,
  33. embedder_config: Optional[BaseEmbedderConfig] = None,
  34. system_prompt: Optional[str] = None,
  35. ):
  36. """
  37. Initialize a new `App` instance.
  38. :param config: Config for the app instance., defaults to None
  39. :type config: Optional[AppConfig], optional
  40. :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
  41. :type llm: BaseLlm, optional
  42. :param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
  43. example: `from embedchain.config import BaseLlmConfig`, defaults to None
  44. :type llm_config: Optional[BaseLlmConfig], optional
  45. :param db: The database to use for storing and retrieving embeddings,
  46. example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
  47. :type db: BaseVectorDB, optional
  48. :param db_config: Allows you to configure the vector database,
  49. example: `from embedchain.config import ChromaDbConfig`, defaults to None
  50. :type db_config: Optional[BaseVectorDbConfig], optional
  51. :param embedder: The embedder (embedding model and function) use to calculate embeddings.
  52. example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder
  53. :type embedder: BaseEmbedder, optional
  54. :param embedder_config: Allows you to configure the Embedder.
  55. example: `from embedchain.config import BaseEmbedderConfig`, defaults to None
  56. :type embedder_config: Optional[BaseEmbedderConfig], optional
  57. :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
  58. :type system_prompt: Optional[str], optional
  59. :raises TypeError: LLM, database or embedder or their config is not a valid class instance.
  60. """
  61. # Type check configs
  62. if config and not isinstance(config, AppConfig):
  63. raise TypeError(
  64. "Config is not a `AppConfig` instance. "
  65. "Please make sure the type is right and that you are passing an instance."
  66. )
  67. if llm_config and not isinstance(llm_config, BaseLlmConfig):
  68. raise TypeError(
  69. "`llm_config` is not a `BaseLlmConfig` instance. "
  70. "Please make sure the type is right and that you are passing an instance."
  71. )
  72. if db_config and not isinstance(db_config, BaseVectorDbConfig):
  73. raise TypeError(
  74. "`db_config` is not a `BaseVectorDbConfig` instance. "
  75. "Please make sure the type is right and that you are passing an instance."
  76. )
  77. if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig):
  78. raise TypeError(
  79. "`embedder_config` is not a `BaseEmbedderConfig` instance. "
  80. "Please make sure the type is right and that you are passing an instance."
  81. )
  82. # Assign defaults
  83. if config is None:
  84. config = AppConfig()
  85. if llm is None:
  86. llm = OpenAILlm(config=llm_config)
  87. if db is None:
  88. db = ChromaDB(config=db_config)
  89. if embedder is None:
  90. embedder = OpenAIEmbedder(config=embedder_config)
  91. # Type check assignments
  92. if not isinstance(llm, BaseLlm):
  93. raise TypeError(
  94. "LLM is not a `BaseLlm` instance. "
  95. "Please make sure the type is right and that you are passing an instance."
  96. )
  97. if not isinstance(db, BaseVectorDB):
  98. raise TypeError(
  99. "Database is not a `BaseVectorDB` instance. "
  100. "Please make sure the type is right and that you are passing an instance."
  101. )
  102. if not isinstance(embedder, BaseEmbedder):
  103. raise TypeError(
  104. "Embedder is not a `BaseEmbedder` instance. "
  105. "Please make sure the type is right and that you are passing an instance."
  106. )
  107. super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
  108. @classmethod
  109. def from_config(cls, yaml_path: str):
  110. """
  111. Instantiate an App object from a YAML configuration file.
  112. :param yaml_path: Path to the YAML configuration file.
  113. :type yaml_path: str
  114. :return: An instance of the App class.
  115. :rtype: App
  116. """
  117. with open(yaml_path, "r") as file:
  118. config_data = yaml.safe_load(file)
  119. try:
  120. validate_yaml_config(config_data)
  121. except Exception as e:
  122. raise Exception(f"❌ Error occurred while validating the YAML config. Error: {str(e)}")
  123. app_config_data = config_data.get("app", {})
  124. llm_config_data = config_data.get("llm", {})
  125. db_config_data = config_data.get("vectordb", {})
  126. embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
  127. app_config = AppConfig(**app_config_data.get("config", {}))
  128. llm_provider = llm_config_data.get("provider", "openai")
  129. llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
  130. db_provider = db_config_data.get("provider", "chroma")
  131. db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
  132. embedder_provider = embedding_model_config_data.get("provider", "openai")
  133. embedder = EmbedderFactory.create(embedder_provider, embedding_model_config_data.get("config", {}))
  134. return cls(config=app_config, llm=llm, db=db, embedder=embedder)