|
@@ -0,0 +1,36 @@
|
|
|
+import pytest
|
|
|
+from langchain.schema import Document
|
|
|
+
|
|
|
+
|
|
|
+def test_load_data(loader, mocker):
|
|
|
+ mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
|
|
|
+ mocked_pypdfloader.return_value.load_and_split.return_value = [
|
|
|
+ Document(page_content="Page 0 Content", metadata={"source": "example.pdf", "page": 0}),
|
|
|
+ Document(page_content="Page 1 Content", metadata={"source": "example.pdf", "page": 1}),
|
|
|
+ ]
|
|
|
+
|
|
|
+ mock_sha256 = mocker.patch("embedchain.loaders.docs_site_loader.hashlib.sha256")
|
|
|
+ doc_id = "mocked_hash"
|
|
|
+ mock_sha256.return_value.hexdigest.return_value = doc_id
|
|
|
+
|
|
|
+ result = loader.load_data("dummy_url")
|
|
|
+ assert result["doc_id"] is doc_id
|
|
|
+ assert result["data"] == [
|
|
|
+ {"content": "Page 0 Content", "meta_data": {"source": "example.pdf", "page": 0, "url": "dummy_url"}},
|
|
|
+ {"content": "Page 1 Content", "meta_data": {"source": "example.pdf", "page": 1, "url": "dummy_url"}},
|
|
|
+ ]
|
|
|
+
|
|
|
+
|
|
|
+def test_load_data_fails_to_find_data(loader, mocker):
|
|
|
+ mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
|
|
|
+ mocked_pypdfloader.return_value.load_and_split.return_value = []
|
|
|
+
|
|
|
+ with pytest.raises(ValueError):
|
|
|
+ loader.load_data("dummy_url")
|
|
|
+
|
|
|
+
|
|
|
+@pytest.fixture
|
|
|
+def loader():
|
|
|
+ from embedchain.loaders.pdf_file import PdfFileLoader
|
|
|
+
|
|
|
+ return PdfFileLoader()
|