123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- import logging
- from typing import Optional
- from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
- ChromaDbConfig, OpenSourceAppConfig)
- from embedchain.embedchain import EmbedChain
- from embedchain.embedder.gpt4all import GPT4AllEmbedder
- from embedchain.helper.json_serializable import register_deserializable
- from embedchain.llm.gpt4all import GPT4ALLLlm
- from embedchain.vectordb.chroma import ChromaDB
- gpt4all_model = None
- @register_deserializable
- class OpenSourceApp(EmbedChain):
- """
- The embedchain Open Source App.
- Comes preconfigured with the best open source LLM, embedding model, database.
- Methods:
- add(source, data_type): adds the data from the given URL to the vector db.
- query(query): finds answer to the given query using vector database and LLM.
- chat(query): finds answer to the given query using vector database and LLM, with conversation history.
- """
- def __init__(
- self,
- config: OpenSourceAppConfig = None,
- llm_config: BaseLlmConfig = None,
- chromadb_config: Optional[ChromaDbConfig] = None,
- system_prompt: Optional[str] = None,
- ):
- """
- Initialize a new `CustomApp` instance.
- Since it's opinionated you don't have to choose a LLM, database and embedder.
- However, you can configure those.
- :param config: Config for the app instance. This is the most basic configuration,
- that does not fall into the LLM, database or embedder category, defaults to None
- :type config: OpenSourceAppConfig, optional
- :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
- example: `from embedchain.config import LlmConfig`, defaults to None
- :type llm_config: BaseLlmConfig, optional
- :param chromadb_config: Allows you to configure the open source database,
- example: `from embedchain.config import ChromaDbConfig`, defaults to None
- :type chromadb_config: Optional[ChromaDbConfig], optional
- :param system_prompt: System prompt that will be provided to the LLM as such.
- Please don't use for the time being, as it's not supported., defaults to None
- :type system_prompt: Optional[str], optional
- :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
- """
- logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
- if not config:
- config = OpenSourceAppConfig()
- if not isinstance(config, OpenSourceAppConfig):
- raise TypeError(
- "OpenSourceApp needs a OpenSourceAppConfig passed to it. "
- "You can import it with `from embedchain.config import OpenSourceAppConfig`"
- )
- if not llm_config:
- llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
- elif not isinstance(llm_config, BaseLlmConfig):
- raise TypeError(
- "The LlmConfig passed to OpenSourceApp is invalid. "
- "You can import it with `from embedchain.config import LlmConfig`"
- )
- elif not llm_config.model:
- llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"
- llm = GPT4ALLLlm(config=llm_config)
- embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
- logging.error("Successfully loaded open source embedding model.")
- database = ChromaDB(config=chromadb_config)
- super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)
|