add_config.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import builtins
  2. import logging
  3. from importlib import import_module
  4. from typing import Callable, Optional
  5. from embedchain.config.base_config import BaseConfig
  6. from embedchain.helpers.json_serializable import register_deserializable
  7. @register_deserializable
  8. class ChunkerConfig(BaseConfig):
  9. """
  10. Config for the chunker used in `add` method
  11. """
  12. def __init__(
  13. self,
  14. chunk_size: Optional[int] = 2000,
  15. chunk_overlap: Optional[int] = 0,
  16. length_function: Optional[Callable[[str], int]] = None,
  17. min_chunk_size: Optional[int] = 0,
  18. ):
  19. self.chunk_size = chunk_size
  20. self.chunk_overlap = chunk_overlap
  21. self.min_chunk_size = min_chunk_size
  22. if self.min_chunk_size >= self.chunk_size:
  23. raise ValueError(f"min_chunk_size {min_chunk_size} should be less than chunk_size {chunk_size}")
  24. if self.min_chunk_size < self.chunk_overlap:
  25. logging.warn(
  26. f"min_chunk_size {min_chunk_size} should be greater than chunk_overlap {chunk_overlap}, otherwise it is redundant." # noqa:E501
  27. )
  28. if isinstance(length_function, str):
  29. self.length_function = self.load_func(length_function)
  30. else:
  31. self.length_function = length_function if length_function else len
  32. def load_func(self, dotpath: str):
  33. if "." not in dotpath:
  34. return getattr(builtins, dotpath)
  35. else:
  36. module_, func = dotpath.rsplit(".", maxsplit=1)
  37. m = import_module(module_)
  38. return getattr(m, func)
  39. @register_deserializable
  40. class LoaderConfig(BaseConfig):
  41. """
  42. Config for the loader used in `add` method
  43. """
  44. def __init__(self):
  45. pass
  46. @register_deserializable
  47. class AddConfig(BaseConfig):
  48. """
  49. Config for the `add` method.
  50. """
  51. def __init__(
  52. self,
  53. chunker: Optional[ChunkerConfig] = None,
  54. loader: Optional[LoaderConfig] = None,
  55. ):
  56. """
  57. Initializes a configuration class instance for the `add` method.
  58. :param chunker: Chunker config, defaults to None
  59. :type chunker: Optional[ChunkerConfig], optional
  60. :param loader: Loader config, defaults to None
  61. :type loader: Optional[LoaderConfig], optional
  62. """
  63. self.loader = loader
  64. self.chunker = chunker