OpenSourceApp.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import logging
  2. from typing import Optional
  3. from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
  4. ChromaDbConfig, OpenSourceAppConfig)
  5. from embedchain.embedchain import EmbedChain
  6. from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder
  7. from embedchain.helper_classes.json_serializable import register_deserializable
  8. from embedchain.llm.gpt4all_llm import GPT4ALLLlm
  9. from embedchain.vectordb.chroma_db import ChromaDB
  10. gpt4all_model = None
  11. @register_deserializable
  12. class OpenSourceApp(EmbedChain):
  13. """
  14. The embedchain Open Source App.
  15. Comes preconfigured with the best open source LLM, embedding model, database.
  16. Methods:
  17. add(source, data_type): adds the data from the given URL to the vector db.
  18. query(query): finds answer to the given query using vector database and LLM.
  19. chat(query): finds answer to the given query using vector database and LLM, with conversation history.
  20. """
  21. def __init__(
  22. self,
  23. config: OpenSourceAppConfig = None,
  24. llm_config: BaseLlmConfig = None,
  25. chromadb_config: Optional[ChromaDbConfig] = None,
  26. system_prompt: Optional[str] = None,
  27. ):
  28. """
  29. Initialize a new `CustomApp` instance.
  30. Since it's opinionated you don't have to choose a LLM, database and embedder.
  31. However, you can configure those.
  32. :param config: Config for the app instance. This is the most basic configuration,
  33. that does not fall into the LLM, database or embedder category, defaults to None
  34. :type config: OpenSourceAppConfig, optional
  35. :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
  36. example: `from embedchain.config import LlmConfig`, defaults to None
  37. :type llm_config: BaseLlmConfig, optional
  38. :param chromadb_config: Allows you to configure the open source database,
  39. example: `from embedchain.config import ChromaDbConfig`, defaults to None
  40. :type chromadb_config: Optional[ChromaDbConfig], optional
  41. :param system_prompt: System prompt that will be provided to the LLM as such.
  42. Please don't use for the time being, as it's not supported., defaults to None
  43. :type system_prompt: Optional[str], optional
  44. :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
  45. """
  46. logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
  47. if not config:
  48. config = OpenSourceAppConfig()
  49. if not isinstance(config, OpenSourceAppConfig):
  50. raise TypeError(
  51. "OpenSourceApp needs a OpenSourceAppConfig passed to it. "
  52. "You can import it with `from embedchain.config import OpenSourceAppConfig`"
  53. )
  54. if not llm_config:
  55. llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
  56. elif not isinstance(llm_config, BaseLlmConfig):
  57. raise TypeError(
  58. "The LlmConfig passed to OpenSourceApp is invalid. "
  59. "You can import it with `from embedchain.config import LlmConfig`"
  60. )
  61. elif not llm_config.model:
  62. llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"
  63. llm = GPT4ALLLlm(config=llm_config)
  64. embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
  65. logging.error("Successfully loaded open source embedding model.")
  66. database = ChromaDB(config=chromadb_config)
  67. super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)