test_text.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. # ruff: noqa: E501
  2. import unittest
  3. from embedchain.chunkers.text import TextChunker
  4. from embedchain.config import ChunkerConfig
  5. class TestTextChunker(unittest.TestCase):
  6. def test_chunks(self):
  7. """
  8. Test the chunks generated by TextChunker.
  9. # TODO: Not a very precise test.
  10. """
  11. chunker_config = ChunkerConfig(chunk_size=10, chunk_overlap=0, length_function=len)
  12. chunker = TextChunker(config=chunker_config)
  13. text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
  14. result = chunker.create_chunks(MockLoader(), text)
  15. documents = result["documents"]
  16. self.assertGreaterEqual(len(documents), 5)
  17. # Additional test cases can be added to cover different scenarios
  18. class MockLoader:
  19. def load_data(self, src):
  20. """
  21. Mock loader that returns a list of data dictionaries.
  22. Adjust this method to return different data for testing.
  23. """
  24. return [
  25. {
  26. "content": src,
  27. "meta_data": {"url": "none"},
  28. }
  29. ]