123456789101112131415161718192021222324252627282930313233343536 |
- import pytest
- from langchain.schema import Document
- def test_load_data(loader, mocker):
- mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
- mocked_pypdfloader.return_value.load_and_split.return_value = [
- Document(page_content="Page 0 Content", metadata={"source": "example.pdf", "page": 0}),
- Document(page_content="Page 1 Content", metadata={"source": "example.pdf", "page": 1}),
- ]
- mock_sha256 = mocker.patch("embedchain.loaders.docs_site_loader.hashlib.sha256")
- doc_id = "mocked_hash"
- mock_sha256.return_value.hexdigest.return_value = doc_id
- result = loader.load_data("dummy_url")
- assert result["doc_id"] is doc_id
- assert result["data"] == [
- {"content": "Page 0 Content", "meta_data": {"source": "example.pdf", "page": 0, "url": "dummy_url"}},
- {"content": "Page 1 Content", "meta_data": {"source": "example.pdf", "page": 1, "url": "dummy_url"}},
- ]
- def test_load_data_fails_to_find_data(loader, mocker):
- mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader")
- mocked_pypdfloader.return_value.load_and_split.return_value = []
- with pytest.raises(ValueError):
- loader.load_data("dummy_url")
- @pytest.fixture
- def loader():
- from embedchain.loaders.pdf_file import PdfFileLoader
- return PdfFileLoader()
|