BaseAppConfig.py 4.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import logging
  2. from embedchain.config.BaseConfig import BaseConfig
  3. from embedchain.config.vectordbs import ElasticsearchDBConfig
  4. from embedchain.models import VectorDatabases, VectorDimensions
  5. class BaseAppConfig(BaseConfig):
  6. """
  7. Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
  8. """
  9. def __init__(
  10. self,
  11. log_level=None,
  12. embedding_fn=None,
  13. db=None,
  14. host=None,
  15. port=None,
  16. id=None,
  17. collection_name=None,
  18. collect_metrics: bool = True,
  19. db_type: VectorDatabases = None,
  20. vector_dim: VectorDimensions = None,
  21. es_config: ElasticsearchDBConfig = None,
  22. ):
  23. """
  24. :param log_level: Optional. (String) Debug level
  25. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  26. :param embedding_fn: Embedding function to use.
  27. :param db: Optional. (Vector) database instance to use for embeddings.
  28. :param host: Optional. Hostname for the database server.
  29. :param port: Optional. Port for the database server.
  30. :param id: Optional. ID of the app. Document metadata will have this id.
  31. :param collection_name: Optional. Collection name for the database.
  32. :param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
  33. :param db_type: Optional. type of Vector database to use
  34. :param vector_dim: Vector dimension generated by embedding fn
  35. :param es_config: Optional. elasticsearch database config to be used for connection
  36. """
  37. self._setup_logging(log_level)
  38. self.collection_name = collection_name if collection_name else "embedchain_store"
  39. self.db = BaseAppConfig.get_db(
  40. db=db,
  41. embedding_fn=embedding_fn,
  42. host=host,
  43. port=port,
  44. db_type=db_type,
  45. vector_dim=vector_dim,
  46. collection_name=self.collection_name,
  47. es_config=es_config,
  48. )
  49. self.id = id
  50. self.collect_metrics = True if (collect_metrics is True or collect_metrics is None) else False
  51. return
  52. @staticmethod
  53. def get_db(db, embedding_fn, host, port, db_type, vector_dim, collection_name, es_config):
  54. """
  55. Get db based on db_type, db with default database (`ChromaDb`)
  56. :param Optional. (Vector) database to use for embeddings.
  57. :param embedding_fn: Embedding function to use in database.
  58. :param host: Optional. Hostname for the database server.
  59. :param port: Optional. Port for the database server.
  60. :param db_type: Optional. db type to use. Supported values (`es`, `chroma`)
  61. :param vector_dim: Vector dimension generated by embedding fn
  62. :param collection_name: Optional. Collection name for the database.
  63. :param es_config: Optional. elasticsearch database config to be used for connection
  64. :raises ValueError: BaseAppConfig knows no default embedding function.
  65. :returns: database instance
  66. """
  67. if db:
  68. return db
  69. if embedding_fn is None:
  70. raise ValueError("ChromaDb cannot be instantiated without an embedding function")
  71. if db_type == VectorDatabases.ELASTICSEARCH:
  72. from embedchain.vectordb.elasticsearch_db import ElasticsearchDB
  73. return ElasticsearchDB(
  74. embedding_fn=embedding_fn, vector_dim=vector_dim, collection_name=collection_name, es_config=es_config
  75. )
  76. from embedchain.vectordb.chroma_db import ChromaDB
  77. return ChromaDB(embedding_fn=embedding_fn, host=host, port=port)
  78. def _setup_logging(self, debug_level):
  79. level = logging.WARNING # Default level
  80. if debug_level is not None:
  81. level = getattr(logging, debug_level.upper(), None)
  82. if not isinstance(level, int):
  83. raise ValueError(f"Invalid log level: {debug_level}")
  84. logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
  85. self.logger = logging.getLogger(__name__)
  86. return