OpenSourceApp.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import logging
  2. from typing import Optional
  3. from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
  4. ChromaDbConfig, OpenSourceAppConfig)
  5. from embedchain.embedchain import EmbedChain
  6. from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder
  7. from embedchain.helper_classes.json_serializable import register_deserializable
  8. from embedchain.llm.gpt4all_llm import GPT4ALLLlm
  9. from embedchain.vectordb.chroma_db import ChromaDB
  10. gpt4all_model = None
  11. @register_deserializable
  12. class OpenSourceApp(EmbedChain):
  13. """
  14. The OpenSource app.
  15. Same as App, but uses an open source embedding model and LLM.
  16. Has two function: add and query.
  17. adds(data_type, url): adds the data from the given URL to the vector db.
  18. query(query): finds answer to the given query using vector database and LLM.
  19. """
  20. def __init__(
  21. self,
  22. config: OpenSourceAppConfig = None,
  23. chromadb_config: Optional[ChromaDbConfig] = None,
  24. system_prompt: Optional[str] = None,
  25. ):
  26. """
  27. :param config: OpenSourceAppConfig instance to load as configuration. Optional.
  28. `ef` defaults to open source.
  29. :param system_prompt: System prompt string. Optional.
  30. """
  31. logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
  32. if not config:
  33. config = OpenSourceAppConfig()
  34. if not isinstance(config, OpenSourceAppConfig):
  35. raise ValueError(
  36. "OpenSourceApp needs a OpenSourceAppConfig passed to it. "
  37. "You can import it with `from embedchain.config import OpenSourceAppConfig`"
  38. )
  39. if not config.model:
  40. raise ValueError("OpenSourceApp needs a model to be instantiated. Maybe you passed the wrong config type?")
  41. logging.info("Successfully loaded open source embedding model.")
  42. llm = GPT4ALLLlm(config=BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin"))
  43. embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
  44. database = ChromaDB(config=chromadb_config)
  45. super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)