BaseAppConfig.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. import logging
  2. from embedchain.config.BaseConfig import BaseConfig
  3. from embedchain.config.vectordbs import ElasticsearchDBConfig
  4. from embedchain.helper_classes.json_serializable import JSONSerializable
  5. from embedchain.models import VectorDatabases, VectorDimensions
  6. class BaseAppConfig(BaseConfig, JSONSerializable):
  7. """
  8. Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
  9. """
  10. def __init__(
  11. self,
  12. log_level=None,
  13. embedding_fn=None,
  14. db=None,
  15. host=None,
  16. port=None,
  17. id=None,
  18. collection_name=None,
  19. collect_metrics: bool = True,
  20. db_type: VectorDatabases = None,
  21. vector_dim: VectorDimensions = None,
  22. es_config: ElasticsearchDBConfig = None,
  23. ):
  24. """
  25. :param log_level: Optional. (String) Debug level
  26. ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
  27. :param embedding_fn: Embedding function to use.
  28. :param db: Optional. (Vector) database instance to use for embeddings.
  29. :param host: Optional. Hostname for the database server.
  30. :param port: Optional. Port for the database server.
  31. :param id: Optional. ID of the app. Document metadata will have this id.
  32. :param collection_name: Optional. Collection name for the database.
  33. :param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
  34. :param db_type: Optional. type of Vector database to use
  35. :param vector_dim: Vector dimension generated by embedding fn
  36. :param es_config: Optional. elasticsearch database config to be used for connection
  37. """
  38. self._setup_logging(log_level)
  39. self.collection_name = collection_name if collection_name else "embedchain_store"
  40. self.db = BaseAppConfig.get_db(
  41. db=db,
  42. embedding_fn=embedding_fn,
  43. host=host,
  44. port=port,
  45. db_type=db_type,
  46. vector_dim=vector_dim,
  47. collection_name=self.collection_name,
  48. es_config=es_config,
  49. )
  50. self.id = id
  51. self.collect_metrics = True if (collect_metrics is True or collect_metrics is None) else False
  52. return
  53. @staticmethod
  54. def get_db(db, embedding_fn, host, port, db_type, vector_dim, collection_name, es_config):
  55. """
  56. Get db based on db_type, db with default database (`ChromaDb`)
  57. :param Optional. (Vector) database to use for embeddings.
  58. :param embedding_fn: Embedding function to use in database.
  59. :param host: Optional. Hostname for the database server.
  60. :param port: Optional. Port for the database server.
  61. :param db_type: Optional. db type to use. Supported values (`es`, `chroma`)
  62. :param vector_dim: Vector dimension generated by embedding fn
  63. :param collection_name: Optional. Collection name for the database.
  64. :param es_config: Optional. elasticsearch database config to be used for connection
  65. :raises ValueError: BaseAppConfig knows no default embedding function.
  66. :returns: database instance
  67. """
  68. if db:
  69. return db
  70. if embedding_fn is None:
  71. raise ValueError("ChromaDb cannot be instantiated without an embedding function")
  72. if db_type == VectorDatabases.ELASTICSEARCH:
  73. from embedchain.vectordb.elasticsearch_db import ElasticsearchDB
  74. return ElasticsearchDB(
  75. embedding_fn=embedding_fn, vector_dim=vector_dim, collection_name=collection_name, es_config=es_config
  76. )
  77. from embedchain.vectordb.chroma_db import ChromaDB
  78. return ChromaDB(embedding_fn=embedding_fn, host=host, port=port)
  79. def _setup_logging(self, debug_level):
  80. level = logging.WARNING # Default level
  81. if debug_level is not None:
  82. level = getattr(logging, debug_level.upper(), None)
  83. if not isinstance(level, int):
  84. raise ValueError(f"Invalid log level: {debug_level}")
  85. logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
  86. self.logger = logging.getLogger(__name__)
  87. return