test_text.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. # ruff: noqa: E501
  2. import unittest
  3. from embedchain.chunkers.text import TextChunker
  4. class TestTextChunker(unittest.TestCase):
  5. def test_chunks(self):
  6. """
  7. Test the chunks generated by TextChunker.
  8. # TODO: Not a very precise test.
  9. """
  10. chunker_config = {
  11. "chunk_size": 10,
  12. "chunk_overlap": 0,
  13. "length_function": len,
  14. }
  15. chunker = TextChunker(config=chunker_config)
  16. text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
  17. result = chunker.create_chunks(MockLoader(), text)
  18. documents = result["documents"]
  19. self.assertGreaterEqual(len(documents), 5)
  20. # Additional test cases can be added to cover different scenarios
  21. class MockLoader:
  22. def load_data(self, src):
  23. """
  24. Mock loader that returns a list of data dictionaries.
  25. Adjust this method to return different data for testing.
  26. """
  27. return [
  28. {
  29. "content": src,
  30. "meta_data": {"url": "none"},
  31. }
  32. ]