add_config.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import builtins
  2. import logging
  3. from importlib import import_module
  4. from typing import Callable, Optional
  5. from embedchain.config.base_config import BaseConfig
  6. from embedchain.helpers.json_serializable import register_deserializable
  7. @register_deserializable
  8. class ChunkerConfig(BaseConfig):
  9. """
  10. Config for the chunker used in `add` method
  11. """
  12. def __init__(
  13. self,
  14. chunk_size: Optional[int] = 2000,
  15. chunk_overlap: Optional[int] = 0,
  16. length_function: Optional[Callable[[str], int]] = None,
  17. min_chunk_size: Optional[int] = 0,
  18. ):
  19. self.chunk_size = chunk_size
  20. self.chunk_overlap = chunk_overlap
  21. self.min_chunk_size = min_chunk_size
  22. if self.min_chunk_size >= self.chunk_size:
  23. raise ValueError(f"min_chunk_size {min_chunk_size} should be less than chunk_size {chunk_size}")
  24. if self.min_chunk_size < self.chunk_overlap:
  25. logging.warning(
  26. f"min_chunk_size {min_chunk_size} should be greater than chunk_overlap {chunk_overlap}, otherwise it is redundant." # noqa:E501
  27. )
  28. if isinstance(length_function, str):
  29. self.length_function = self.load_func(length_function)
  30. else:
  31. self.length_function = length_function if length_function else len
  32. @staticmethod
  33. def load_func(dotpath: str):
  34. if "." not in dotpath:
  35. return getattr(builtins, dotpath)
  36. else:
  37. module_, func = dotpath.rsplit(".", maxsplit=1)
  38. m = import_module(module_)
  39. return getattr(m, func)
  40. @register_deserializable
  41. class LoaderConfig(BaseConfig):
  42. """
  43. Config for the loader used in `add` method
  44. """
  45. def __init__(self):
  46. pass
  47. @register_deserializable
  48. class AddConfig(BaseConfig):
  49. """
  50. Config for the `add` method.
  51. """
  52. def __init__(
  53. self,
  54. chunker: Optional[ChunkerConfig] = None,
  55. loader: Optional[LoaderConfig] = None,
  56. ):
  57. """
  58. Initializes a configuration class instance for the `add` method.
  59. :param chunker: Chunker config, defaults to None
  60. :type chunker: Optional[ChunkerConfig], optional
  61. :param loader: Loader config, defaults to None
  62. :type loader: Optional[LoaderConfig], optional
  63. """
  64. self.loader = loader
  65. self.chunker = chunker