elasticsearch.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import os
  2. from typing import Optional, Union
  3. from embedchain.config.vectordb.base import BaseVectorDbConfig
  4. from embedchain.helpers.json_serializable import register_deserializable
  5. @register_deserializable
  6. class ElasticsearchDBConfig(BaseVectorDbConfig):
  7. def __init__(
  8. self,
  9. collection_name: Optional[str] = None,
  10. dir: Optional[str] = None,
  11. es_url: Union[str, list[str]] = None,
  12. cloud_id: Optional[str] = None,
  13. batch_size: Optional[int] = 100,
  14. **ES_EXTRA_PARAMS: dict[str, any],
  15. ):
  16. """
  17. Initializes a configuration class instance for an Elasticsearch client.
  18. :param collection_name: Default name for the collection, defaults to None
  19. :type collection_name: Optional[str], optional
  20. :param dir: Path to the database directory, where the database is stored, defaults to None
  21. :type dir: Optional[str], optional
  22. :param es_url: elasticsearch url or list of nodes url to be used for connection, defaults to None
  23. :type es_url: Union[str, list[str]], optional
  24. :param cloud_id: cloud id of the elasticsearch cluster, defaults to None
  25. :type cloud_id: Optional[str], optional
  26. :param batch_size: Number of items to insert in one batch, defaults to 100
  27. :type batch_size: Optional[int], optional
  28. :param ES_EXTRA_PARAMS: extra params dict that can be passed to elasticsearch.
  29. :type ES_EXTRA_PARAMS: dict[str, Any], optional
  30. """
  31. if es_url and cloud_id:
  32. raise ValueError("Only one of `es_url` and `cloud_id` can be set.")
  33. # self, es_url: Union[str, list[str]] = None, **ES_EXTRA_PARAMS: dict[str, any]):
  34. self.ES_URL = es_url or os.environ.get("ELASTICSEARCH_URL")
  35. self.CLOUD_ID = cloud_id or os.environ.get("ELASTICSEARCH_CLOUD_ID")
  36. if not self.ES_URL and not self.CLOUD_ID:
  37. raise AttributeError(
  38. "Elasticsearch needs a URL or CLOUD_ID attribute, "
  39. "this can either be passed to `ElasticsearchDBConfig` or as `ELASTICSEARCH_URL` or `ELASTICSEARCH_CLOUD_ID` in `.env`" # noqa: E501
  40. )
  41. self.ES_EXTRA_PARAMS = ES_EXTRA_PARAMS
  42. # Load API key from .env if it's not explicitly passed.
  43. # Can only set one of 'api_key', 'basic_auth', and 'bearer_auth'
  44. if (
  45. not self.ES_EXTRA_PARAMS.get("api_key")
  46. and not self.ES_EXTRA_PARAMS.get("basic_auth")
  47. and not self.ES_EXTRA_PARAMS.get("bearer_auth")
  48. ):
  49. self.ES_EXTRA_PARAMS["api_key"] = os.environ.get("ELASTICSEARCH_API_KEY")
  50. self.batch_size = batch_size
  51. super().__init__(collection_name=collection_name, dir=dir)