app.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. from typing import Optional
  2. import yaml
  3. from embedchain.config import (AppConfig, BaseEmbedderConfig, BaseLlmConfig,
  4. ChunkerConfig)
  5. from embedchain.config.vectordb.base import BaseVectorDbConfig
  6. from embedchain.embedchain import EmbedChain
  7. from embedchain.embedder.base import BaseEmbedder
  8. from embedchain.embedder.openai import OpenAIEmbedder
  9. from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
  10. from embedchain.helper.json_serializable import register_deserializable
  11. from embedchain.llm.base import BaseLlm
  12. from embedchain.llm.openai import OpenAILlm
  13. from embedchain.utils import validate_yaml_config
  14. from embedchain.vectordb.base import BaseVectorDB
  15. from embedchain.vectordb.chroma import ChromaDB
  16. @register_deserializable
  17. class App(EmbedChain):
  18. """
  19. The EmbedChain app in it's simplest and most straightforward form.
  20. An opinionated choice of LLM, vector database and embedding model.
  21. Methods:
  22. add(source, data_type): adds the data from the given URL to the vector db.
  23. query(query): finds answer to the given query using vector database and LLM.
  24. chat(query): finds answer to the given query using vector database and LLM, with conversation history.
  25. """
  26. def __init__(
  27. self,
  28. config: Optional[AppConfig] = None,
  29. llm: BaseLlm = None,
  30. llm_config: Optional[BaseLlmConfig] = None,
  31. db: BaseVectorDB = None,
  32. db_config: Optional[BaseVectorDbConfig] = None,
  33. embedder: BaseEmbedder = None,
  34. embedder_config: Optional[BaseEmbedderConfig] = None,
  35. system_prompt: Optional[str] = None,
  36. chunker: Optional[ChunkerConfig] = None,
  37. ):
  38. """
  39. Initialize a new `App` instance.
  40. :param config: Config for the app instance., defaults to None
  41. :type config: Optional[AppConfig], optional
  42. :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
  43. :type llm: BaseLlm, optional
  44. :param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
  45. example: `from embedchain.config import BaseLlmConfig`, defaults to None
  46. :type llm_config: Optional[BaseLlmConfig], optional
  47. :param db: The database to use for storing and retrieving embeddings,
  48. example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
  49. :type db: BaseVectorDB, optional
  50. :param db_config: Allows you to configure the vector database,
  51. example: `from embedchain.config import ChromaDbConfig`, defaults to None
  52. :type db_config: Optional[BaseVectorDbConfig], optional
  53. :param embedder: The embedder (embedding model and function) use to calculate embeddings.
  54. example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder
  55. :type embedder: BaseEmbedder, optional
  56. :param embedder_config: Allows you to configure the Embedder.
  57. example: `from embedchain.config import BaseEmbedderConfig`, defaults to None
  58. :type embedder_config: Optional[BaseEmbedderConfig], optional
  59. :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
  60. :type system_prompt: Optional[str], optional
  61. :raises TypeError: LLM, database or embedder or their config is not a valid class instance.
  62. """
  63. # Type check configs
  64. if config and not isinstance(config, AppConfig):
  65. raise TypeError(
  66. "Config is not a `AppConfig` instance. "
  67. "Please make sure the type is right and that you are passing an instance."
  68. )
  69. if llm_config and not isinstance(llm_config, BaseLlmConfig):
  70. raise TypeError(
  71. "`llm_config` is not a `BaseLlmConfig` instance. "
  72. "Please make sure the type is right and that you are passing an instance."
  73. )
  74. if db_config and not isinstance(db_config, BaseVectorDbConfig):
  75. raise TypeError(
  76. "`db_config` is not a `BaseVectorDbConfig` instance. "
  77. "Please make sure the type is right and that you are passing an instance."
  78. )
  79. if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig):
  80. raise TypeError(
  81. "`embedder_config` is not a `BaseEmbedderConfig` instance. "
  82. "Please make sure the type is right and that you are passing an instance."
  83. )
  84. # Assign defaults
  85. if config is None:
  86. config = AppConfig()
  87. if llm is None:
  88. llm = OpenAILlm(config=llm_config)
  89. if db is None:
  90. db = ChromaDB(config=db_config)
  91. if embedder is None:
  92. embedder = OpenAIEmbedder(config=embedder_config)
  93. self.chunker = None
  94. if chunker:
  95. self.chunker = ChunkerConfig(**chunker)
  96. # Type check assignments
  97. if not isinstance(llm, BaseLlm):
  98. raise TypeError(
  99. "LLM is not a `BaseLlm` instance. "
  100. "Please make sure the type is right and that you are passing an instance."
  101. )
  102. if not isinstance(db, BaseVectorDB):
  103. raise TypeError(
  104. "Database is not a `BaseVectorDB` instance. "
  105. "Please make sure the type is right and that you are passing an instance."
  106. )
  107. if not isinstance(embedder, BaseEmbedder):
  108. raise TypeError(
  109. "Embedder is not a `BaseEmbedder` instance. "
  110. "Please make sure the type is right and that you are passing an instance."
  111. )
  112. super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
  113. @classmethod
  114. def from_config(cls, yaml_path: str):
  115. """
  116. Instantiate an App object from a YAML configuration file.
  117. :param yaml_path: Path to the YAML configuration file.
  118. :type yaml_path: str
  119. :return: An instance of the App class.
  120. :rtype: App
  121. """
  122. with open(yaml_path, "r") as file:
  123. config_data = yaml.safe_load(file)
  124. try:
  125. validate_yaml_config(config_data)
  126. except Exception as e:
  127. raise Exception(f"❌ Error occurred while validating the YAML config. Error: {str(e)}")
  128. app_config_data = config_data.get("app", {})
  129. llm_config_data = config_data.get("llm", {})
  130. db_config_data = config_data.get("vectordb", {})
  131. embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
  132. chunker_config_data = config_data.get("chunker", {})
  133. app_config = AppConfig(**app_config_data.get("config", {}))
  134. llm_provider = llm_config_data.get("provider", "openai")
  135. llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
  136. db_provider = db_config_data.get("provider", "chroma")
  137. db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
  138. embedder_provider = embedding_model_config_data.get("provider", "openai")
  139. embedder = EmbedderFactory.create(embedder_provider, embedding_model_config_data.get("config", {}))
  140. return cls(config=app_config, llm=llm, db=db, embedder=embedder, chunker=chunker_config_data)