Przeglądaj źródła

[tests]: add more tests for pdf_file loader (#769)

SerSamgy 1 rok temu
rodzic
commit
d2fd3ce434
1 zmienionych plików z 36 dodań i 0 usunięć
  1. 36 0
      tests/loaders/test_pdf_file.py

+ 36 - 0
tests/loaders/test_pdf_file.py

@@ -0,0 +1,36 @@
+import pytest
+from langchain.schema import Document
+
+
+def test_load_data(loader, mocker):
+    mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
+    mocked_pypdfloader.return_value.load_and_split.return_value = [
+        Document(page_content="Page 0 Content", metadata={"source": "example.pdf", "page": 0}),
+        Document(page_content="Page 1 Content", metadata={"source": "example.pdf", "page": 1}),
+    ]
+
+    mock_sha256 = mocker.patch("embedchain.loaders.docs_site_loader.hashlib.sha256")
+    doc_id = "mocked_hash"
+    mock_sha256.return_value.hexdigest.return_value = doc_id
+
+    result = loader.load_data("dummy_url")
+    assert result["doc_id"] is doc_id
+    assert result["data"] == [
+        {"content": "Page 0 Content", "meta_data": {"source": "example.pdf", "page": 0, "url": "dummy_url"}},
+        {"content": "Page 1 Content", "meta_data": {"source": "example.pdf", "page": 1, "url": "dummy_url"}},
+    ]
+
+
+def test_load_data_fails_to_find_data(loader, mocker):
+    mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
+    mocked_pypdfloader.return_value.load_and_split.return_value = []
+
+    with pytest.raises(ValueError):
+        loader.load_data("dummy_url")
+
+
+@pytest.fixture
+def loader():
+    from embedchain.loaders.pdf_file import PdfFileLoader
+
+    return PdfFileLoader()