app.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. from typing import Optional
  2. import yaml
  3. from embedchain.client import Client
  4. from embedchain.config import (AppConfig, BaseEmbedderConfig, BaseLlmConfig,
  5. ChunkerConfig)
  6. from embedchain.config.vectordb.base import BaseVectorDbConfig
  7. from embedchain.embedchain import EmbedChain
  8. from embedchain.embedder.base import BaseEmbedder
  9. from embedchain.embedder.openai import OpenAIEmbedder
  10. from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
  11. from embedchain.helpers.json_serializable import register_deserializable
  12. from embedchain.llm.base import BaseLlm
  13. from embedchain.llm.openai import OpenAILlm
  14. from embedchain.utils import validate_yaml_config
  15. from embedchain.vectordb.base import BaseVectorDB
  16. from embedchain.vectordb.chroma import ChromaDB
  17. @register_deserializable
  18. class App(EmbedChain):
  19. """
  20. The EmbedChain app in it's simplest and most straightforward form.
  21. An opinionated choice of LLM, vector database and embedding model.
  22. Methods:
  23. add(source, data_type): adds the data from the given URL to the vector db.
  24. query(query): finds answer to the given query using vector database and LLM.
  25. chat(query): finds answer to the given query using vector database and LLM, with conversation history.
  26. """
  27. def __init__(
  28. self,
  29. config: Optional[AppConfig] = None,
  30. llm: BaseLlm = None,
  31. llm_config: Optional[BaseLlmConfig] = None,
  32. db: BaseVectorDB = None,
  33. db_config: Optional[BaseVectorDbConfig] = None,
  34. embedder: BaseEmbedder = None,
  35. embedder_config: Optional[BaseEmbedderConfig] = None,
  36. system_prompt: Optional[str] = None,
  37. chunker: Optional[ChunkerConfig] = None,
  38. ):
  39. """
  40. Initialize a new `App` instance.
  41. :param config: Config for the app instance., defaults to None
  42. :type config: Optional[AppConfig], optional
  43. :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
  44. :type llm: BaseLlm, optional
  45. :param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
  46. example: `from embedchain.config import BaseLlmConfig`, defaults to None
  47. :type llm_config: Optional[BaseLlmConfig], optional
  48. :param db: The database to use for storing and retrieving embeddings,
  49. example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
  50. :type db: BaseVectorDB, optional
  51. :param db_config: Allows you to configure the vector database,
  52. example: `from embedchain.config import ChromaDbConfig`, defaults to None
  53. :type db_config: Optional[BaseVectorDbConfig], optional
  54. :param embedder: The embedder (embedding model and function) use to calculate embeddings.
  55. example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder
  56. :type embedder: BaseEmbedder, optional
  57. :param embedder_config: Allows you to configure the Embedder.
  58. example: `from embedchain.config import BaseEmbedderConfig`, defaults to None
  59. :type embedder_config: Optional[BaseEmbedderConfig], optional
  60. :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
  61. :type system_prompt: Optional[str], optional
  62. :raises TypeError: LLM, database or embedder or their config is not a valid class instance.
  63. """
  64. # Setup user directory if it doesn't exist already
  65. Client.setup_dir()
  66. # Type check configs
  67. if config and not isinstance(config, AppConfig):
  68. raise TypeError(
  69. "Config is not a `AppConfig` instance. "
  70. "Please make sure the type is right and that you are passing an instance."
  71. )
  72. if llm_config and not isinstance(llm_config, BaseLlmConfig):
  73. raise TypeError(
  74. "`llm_config` is not a `BaseLlmConfig` instance. "
  75. "Please make sure the type is right and that you are passing an instance."
  76. )
  77. if db_config and not isinstance(db_config, BaseVectorDbConfig):
  78. raise TypeError(
  79. "`db_config` is not a `BaseVectorDbConfig` instance. "
  80. "Please make sure the type is right and that you are passing an instance."
  81. )
  82. if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig):
  83. raise TypeError(
  84. "`embedder_config` is not a `BaseEmbedderConfig` instance. "
  85. "Please make sure the type is right and that you are passing an instance."
  86. )
  87. # Assign defaults
  88. if config is None:
  89. config = AppConfig()
  90. if llm is None:
  91. llm = OpenAILlm(config=llm_config)
  92. if db is None:
  93. db = ChromaDB(config=db_config)
  94. if embedder is None:
  95. embedder = OpenAIEmbedder(config=embedder_config)
  96. self.chunker = None
  97. if chunker:
  98. self.chunker = ChunkerConfig(**chunker)
  99. # Type check assignments
  100. if not isinstance(llm, BaseLlm):
  101. raise TypeError(
  102. "LLM is not a `BaseLlm` instance. "
  103. "Please make sure the type is right and that you are passing an instance."
  104. )
  105. if not isinstance(db, BaseVectorDB):
  106. raise TypeError(
  107. "Database is not a `BaseVectorDB` instance. "
  108. "Please make sure the type is right and that you are passing an instance."
  109. )
  110. if not isinstance(embedder, BaseEmbedder):
  111. raise TypeError(
  112. "Embedder is not a `BaseEmbedder` instance. "
  113. "Please make sure the type is right and that you are passing an instance."
  114. )
  115. super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
  116. @classmethod
  117. def from_config(cls, yaml_path: str):
  118. """
  119. Instantiate an App object from a YAML configuration file.
  120. :param yaml_path: Path to the YAML configuration file.
  121. :type yaml_path: str
  122. :return: An instance of the App class.
  123. :rtype: App
  124. """
  125. # Setup user directory if it doesn't exist already
  126. Client.setup_dir()
  127. with open(yaml_path, "r") as file:
  128. config_data = yaml.safe_load(file)
  129. try:
  130. validate_yaml_config(config_data)
  131. except Exception as e:
  132. raise Exception(f"❌ Error occurred while validating the YAML config. Error: {str(e)}")
  133. app_config_data = config_data.get("app", {})
  134. llm_config_data = config_data.get("llm", {})
  135. db_config_data = config_data.get("vectordb", {})
  136. embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
  137. chunker_config_data = config_data.get("chunker", {})
  138. app_config = AppConfig(**app_config_data.get("config", {}))
  139. llm_provider = llm_config_data.get("provider", "openai")
  140. llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
  141. db_provider = db_config_data.get("provider", "chroma")
  142. db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
  143. embedder_provider = embedding_model_config_data.get("provider", "openai")
  144. embedder = EmbedderFactory.create(embedder_provider, embedding_model_config_data.get("config", {}))
  145. return cls(config=app_config, llm=llm, db=db, embedder=embedder, chunker=chunker_config_data)