test_add.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import os
  2. import unittest
  3. from unittest.mock import MagicMock, patch
  4. from embedchain import App
  5. from embedchain.config import AddConfig, AppConfig, ChunkerConfig
  6. from embedchain.models.data_type import DataType
  7. class TestApp(unittest.TestCase):
  8. os.environ["OPENAI_API_KEY"] = "test_key"
  9. def setUp(self):
  10. self.app = App(config=AppConfig(collect_metrics=False))
  11. @patch("chromadb.api.models.Collection.Collection.add", MagicMock)
  12. def test_add(self):
  13. """
  14. This test checks the functionality of the 'add' method in the App class.
  15. It begins by simulating the addition of a web page with a specific URL to the application instance.
  16. The 'add' method is expected to append the input type and URL to the 'user_asks' attribute of the App instance.
  17. By asserting that 'user_asks' is updated correctly after the 'add' method is called, we can confirm that the
  18. method is working as intended.
  19. The Collection.add method from the chromadb library is mocked during this test to isolate the behavior of the
  20. 'add' method.
  21. """
  22. self.app.add("https://example.com", metadata={"meta": "meta-data"})
  23. self.assertEqual(self.app.user_asks, [["https://example.com", "web_page", {"meta": "meta-data"}]])
  24. @patch("chromadb.api.models.Collection.Collection.add", MagicMock)
  25. def test_add_forced_type(self):
  26. """
  27. Test that you can also force a data_type with `add`.
  28. """
  29. data_type = "text"
  30. self.app.add("https://example.com", data_type=data_type, metadata={"meta": "meta-data"})
  31. self.assertEqual(self.app.user_asks, [["https://example.com", data_type, {"meta": "meta-data"}]])
  32. @patch("chromadb.api.models.Collection.Collection.add", MagicMock)
  33. def test_dry_run(self):
  34. """
  35. Test that if dry_run == True then data chunks are returned.
  36. """
  37. chunker_config = ChunkerConfig(chunk_size=1, chunk_overlap=0)
  38. # We can't test with lorem ipsum because chunks are deduped, so would be recurring characters.
  39. text = """0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"""
  40. result = self.app.add(source=text, config=AddConfig(chunker=chunker_config), dry_run=True)
  41. chunks = result["chunks"]
  42. metadata = result["metadata"]
  43. count = result["count"]
  44. data_type = result["type"]
  45. self.assertEqual(len(chunks), len(text))
  46. self.assertEqual(count, len(text))
  47. self.assertEqual(data_type, DataType.TEXT)
  48. for item in metadata:
  49. self.assertIsInstance(item, dict)
  50. self.assertIn(item["url"], "local")
  51. self.assertIn(item["data_type"], "text")