BaseAppConfig.py 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import logging
  2. from embedchain.config.BaseConfig import BaseConfig
  3. from embedchain.config.vectordbs import ElasticsearchDBConfig
  4. from embedchain.models import VectorDatabases, VectorDimensions
  5. class BaseAppConfig(BaseConfig):
  6. """
  7. Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
  8. """
  9. def __init__(
  10. self,
  11. log_level=None,
  12. embedding_fn=None,
  13. db=None,
  14. host=None,
  15. port=None,
  16. id=None,
  17. collection_name=None,
  18. db_type: VectorDatabases = None,
  19. vector_dim: VectorDimensions = None,
  20. es_config: ElasticsearchDBConfig = None,
  21. ):
  22. """
  23. :param log_level: Optional. (String) Debug level
  24. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  25. :param embedding_fn: Embedding function to use.
  26. :param db: Optional. (Vector) database instance to use for embeddings.
  27. :param host: Optional. Hostname for the database server.
  28. :param port: Optional. Port for the database server.
  29. :param id: Optional. ID of the app. Document metadata will have this id.
  30. :param collection_name: Optional. Collection name for the database.
  31. :param db_type: Optional. type of Vector database to use
  32. :param vector_dim: Vector dimension generated by embedding fn
  33. :param es_config: Optional. elasticsearch database config to be used for connection
  34. """
  35. self._setup_logging(log_level)
  36. self.collection_name = collection_name if collection_name else "embedchain_store"
  37. self.db = BaseAppConfig.get_db(
  38. db=db,
  39. embedding_fn=embedding_fn,
  40. host=host,
  41. port=port,
  42. db_type=db_type,
  43. vector_dim=vector_dim,
  44. collection_name=self.collection_name,
  45. es_config=es_config,
  46. )
  47. self.id = id
  48. return
  49. @staticmethod
  50. def get_db(db, embedding_fn, host, port, db_type, vector_dim, collection_name, es_config):
  51. """
  52. Get db based on db_type, db with default database (`ChromaDb`)
  53. :param Optional. (Vector) database to use for embeddings.
  54. :param embedding_fn: Embedding function to use in database.
  55. :param host: Optional. Hostname for the database server.
  56. :param port: Optional. Port for the database server.
  57. :param db_type: Optional. db type to use. Supported values (`es`, `chroma`)
  58. :param vector_dim: Vector dimension generated by embedding fn
  59. :param collection_name: Optional. Collection name for the database.
  60. :param es_config: Optional. elasticsearch database config to be used for connection
  61. :raises ValueError: BaseAppConfig knows no default embedding function.
  62. :returns: database instance
  63. """
  64. if db:
  65. return db
  66. if embedding_fn is None:
  67. raise ValueError("ChromaDb cannot be instantiated without an embedding function")
  68. if db_type == VectorDatabases.ELASTICSEARCH:
  69. from embedchain.vectordb.elasticsearch_db import ElasticsearchDB
  70. return ElasticsearchDB(
  71. embedding_fn=embedding_fn, vector_dim=vector_dim, collection_name=collection_name, es_config=es_config
  72. )
  73. from embedchain.vectordb.chroma_db import ChromaDB
  74. return ChromaDB(embedding_fn=embedding_fn, host=host, port=port)
  75. def _setup_logging(self, debug_level):
  76. level = logging.WARNING # Default level
  77. if debug_level is not None:
  78. level = getattr(logging, debug_level.upper(), None)
  79. if not isinstance(level, int):
  80. raise ValueError(f"Invalid log level: {debug_level}")
  81. logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
  82. self.logger = logging.getLogger(__name__)
  83. return