BaseAppConfig.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import logging
  2. from embedchain.config.BaseConfig import BaseConfig
  3. from embedchain.config.vectordbs import ElasticsearchDBConfig
  4. from embedchain.helper_classes.json_serializable import JSONSerializable
  5. from embedchain.models import VectorDatabases, VectorDimensions
  6. class BaseAppConfig(BaseConfig, JSONSerializable):
  7. """
  8. Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
  9. """
  10. def __init__(
  11. self,
  12. log_level=None,
  13. embedding_fn=None,
  14. db=None,
  15. host=None,
  16. port=None,
  17. id=None,
  18. collection_name=None,
  19. collect_metrics: bool = True,
  20. db_type: VectorDatabases = None,
  21. vector_dim: VectorDimensions = None,
  22. es_config: ElasticsearchDBConfig = None,
  23. chroma_settings: dict = {},
  24. ):
  25. """
  26. :param log_level: Optional. (String) Debug level
  27. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  28. :param embedding_fn: Embedding function to use.
  29. :param db: Optional. (Vector) database instance to use for embeddings.
  30. :param host: Optional. Hostname for the database server.
  31. :param port: Optional. Port for the database server.
  32. :param id: Optional. ID of the app. Document metadata will have this id.
  33. :param collection_name: Optional. Collection name for the database.
  34. :param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
  35. :param db_type: Optional. type of Vector database to use
  36. :param vector_dim: Vector dimension generated by embedding fn
  37. :param es_config: Optional. elasticsearch database config to be used for connection
  38. :param chroma_settings: Optional. Chroma settings for connection.
  39. """
  40. self._setup_logging(log_level)
  41. self.collection_name = collection_name if collection_name else "embedchain_store"
  42. self.db = BaseAppConfig.get_db(
  43. db=db,
  44. embedding_fn=embedding_fn,
  45. host=host,
  46. port=port,
  47. db_type=db_type,
  48. vector_dim=vector_dim,
  49. collection_name=self.collection_name,
  50. es_config=es_config,
  51. chroma_settings=chroma_settings,
  52. )
  53. self.id = id
  54. self.collect_metrics = True if (collect_metrics is True or collect_metrics is None) else False
  55. return
  56. @staticmethod
  57. def get_db(db, embedding_fn, host, port, db_type, vector_dim, collection_name, es_config, chroma_settings):
  58. """
  59. Get db based on db_type, db with default database (`ChromaDb`)
  60. :param Optional. (Vector) database to use for embeddings.
  61. :param embedding_fn: Embedding function to use in database.
  62. :param host: Optional. Hostname for the database server.
  63. :param port: Optional. Port for the database server.
  64. :param db_type: Optional. db type to use. Supported values (`es`, `chroma`)
  65. :param vector_dim: Vector dimension generated by embedding fn
  66. :param collection_name: Optional. Collection name for the database.
  67. :param es_config: Optional. elasticsearch database config to be used for connection
  68. :raises ValueError: BaseAppConfig knows no default embedding function.
  69. :returns: database instance
  70. """
  71. if db:
  72. return db
  73. if embedding_fn is None:
  74. raise ValueError("ChromaDb cannot be instantiated without an embedding function")
  75. if db_type == VectorDatabases.ELASTICSEARCH:
  76. from embedchain.vectordb.elasticsearch_db import ElasticsearchDB
  77. return ElasticsearchDB(
  78. embedding_fn=embedding_fn, vector_dim=vector_dim, collection_name=collection_name, es_config=es_config
  79. )
  80. from embedchain.vectordb.chroma_db import ChromaDB
  81. return ChromaDB(embedding_fn=embedding_fn, host=host, port=port, chroma_settings=chroma_settings)
  82. def _setup_logging(self, debug_level):
  83. level = logging.WARNING # Default level
  84. if debug_level is not None:
  85. level = getattr(logging, debug_level.upper(), None)
  86. if not isinstance(level, int):
  87. raise ValueError(f"Invalid log level: {debug_level}")
  88. logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
  89. self.logger = logging.getLogger(__name__)
  90. return