test_pdf_file.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import pytest
  2. from langchain.schema import Document
  3. def test_load_data(loader, mocker):
  4. mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
  5. mocked_pypdfloader.return_value.load_and_split.return_value = [
  6. Document(page_content="Page 0 Content", metadata={"source": "example.pdf", "page": 0}),
  7. Document(page_content="Page 1 Content", metadata={"source": "example.pdf", "page": 1}),
  8. ]
  9. mock_sha256 = mocker.patch("embedchain.loaders.docs_site_loader.hashlib.sha256")
  10. doc_id = "mocked_hash"
  11. mock_sha256.return_value.hexdigest.return_value = doc_id
  12. result = loader.load_data("dummy_url")
  13. assert result["doc_id"] is doc_id
  14. assert result["data"] == [
  15. {"content": "Page 0 Content", "meta_data": {"source": "example.pdf", "page": 0, "url": "dummy_url"}},
  16. {"content": "Page 1 Content", "meta_data": {"source": "example.pdf", "page": 1, "url": "dummy_url"}},
  17. ]
  18. def test_load_data_fails_to_find_data(loader, mocker):
  19. mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
  20. mocked_pypdfloader.return_value.load_and_split.return_value = []
  21. with pytest.raises(ValueError):
  22. loader.load_data("dummy_url")
  23. @pytest.fixture
  24. def loader():
  25. from embedchain.loaders.pdf_file import PdfFileLoader
  26. return PdfFileLoader()