from typing import Optional from chromadb.utils import embedding_functions from embedchain.helper_classes.json_serializable import register_deserializable from .BaseAppConfig import BaseAppConfig @register_deserializable class OpenSourceAppConfig(BaseAppConfig): """ Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options. """ def __init__( self, log_level=None, host=None, port=None, id=None, collection_name=None, collect_metrics: Optional[bool] = None, model=None, ): """ :param log_level: Optional. (String) Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']. :param id: Optional. ID of the app. Document metadata will have this id. :param collection_name: Optional. Collection name for the database. :param host: Optional. Hostname for the database server. :param port: Optional. Port for the database server. :param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain. :param model: Optional. GPT4ALL uses the model to instantiate the class. So unlike `App`, it has to be provided before querying. """ self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin" super().__init__( log_level=log_level, embedding_fn=OpenSourceAppConfig.default_embedding_function(), host=host, port=port, id=id, collection_name=collection_name, collect_metrics=collect_metrics, ) @staticmethod def default_embedding_function(): """ Sets embedding function to default (`all-MiniLM-L6-v2`). :returns: The default embedding function """ try: return embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2") except ValueError as e: print(e) raise ModuleNotFoundError( "The open source app requires extra dependencies. Install with `pip install embedchain[opensource]`" ) from None