open_source_app.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import logging
  2. from typing import Optional
  3. from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
  4. ChromaDbConfig, OpenSourceAppConfig)
  5. from embedchain.embedchain import EmbedChain
  6. from embedchain.embedder.gpt4all import GPT4AllEmbedder
  7. from embedchain.helper.json_serializable import register_deserializable
  8. from embedchain.llm.gpt4all import GPT4ALLLlm
  9. from embedchain.vectordb.chroma import ChromaDB
  10. gpt4all_model = None
  11. @register_deserializable
  12. class OpenSourceApp(EmbedChain):
  13. """
  14. The embedchain Open Source App.
  15. Comes preconfigured with the best open source LLM, embedding model, database.
  16. Methods:
  17. add(source, data_type): adds the data from the given URL to the vector db.
  18. query(query): finds answer to the given query using vector database and LLM.
  19. chat(query): finds answer to the given query using vector database and LLM, with conversation history.
  20. """
  21. def __init__(
  22. self,
  23. config: OpenSourceAppConfig = None,
  24. llm_config: BaseLlmConfig = None,
  25. chromadb_config: Optional[ChromaDbConfig] = None,
  26. system_prompt: Optional[str] = None,
  27. ):
  28. """
  29. Initialize a new `CustomApp` instance.
  30. Since it's opinionated you don't have to choose a LLM, database and embedder.
  31. However, you can configure those.
  32. :param config: Config for the app instance. This is the most basic configuration,
  33. that does not fall into the LLM, database or embedder category, defaults to None
  34. :type config: OpenSourceAppConfig, optional
  35. :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
  36. example: `from embedchain.config import LlmConfig`, defaults to None
  37. :type llm_config: BaseLlmConfig, optional
  38. :param chromadb_config: Allows you to configure the open source database,
  39. example: `from embedchain.config import ChromaDbConfig`, defaults to None
  40. :type chromadb_config: Optional[ChromaDbConfig], optional
  41. :param system_prompt: System prompt that will be provided to the LLM as such.
  42. Please don't use for the time being, as it's not supported., defaults to None
  43. :type system_prompt: Optional[str], optional
  44. :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
  45. """
  46. logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
  47. if not config:
  48. config = OpenSourceAppConfig()
  49. if not isinstance(config, OpenSourceAppConfig):
  50. raise TypeError(
  51. "OpenSourceApp needs a OpenSourceAppConfig passed to it. "
  52. "You can import it with `from embedchain.config import OpenSourceAppConfig`"
  53. )
  54. if not llm_config:
  55. llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
  56. elif not isinstance(llm_config, BaseLlmConfig):
  57. raise TypeError(
  58. "The LlmConfig passed to OpenSourceApp is invalid. "
  59. "You can import it with `from embedchain.config import LlmConfig`"
  60. )
  61. elif not llm_config.model:
  62. llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"
  63. llm = GPT4ALLLlm(config=llm_config)
  64. embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
  65. logging.error("Successfully loaded open source embedding model.")
  66. database = ChromaDB(config=chromadb_config)
  67. super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)