table.py 749 B

1234567891011121314151617181920
  1. from typing import Optional
  2. from langchain.text_splitter import RecursiveCharacterTextSplitter
  3. from embedchain.chunkers.base_chunker import BaseChunker
  4. from embedchain.config.add_config import ChunkerConfig
  5. class TableChunker(BaseChunker):
  6. """Chunker for tables, for instance csv, google sheets or databases."""
  7. def __init__(self, config: Optional[ChunkerConfig] = None):
  8. if config is None:
  9. config = ChunkerConfig(chunk_size=300, chunk_overlap=0, length_function=len)
  10. text_splitter = RecursiveCharacterTextSplitter(
  11. chunk_size=config.chunk_size,
  12. chunk_overlap=config.chunk_overlap,
  13. length_function=config.length_function,
  14. )
  15. super().__init__(text_splitter)