test_docx_file.py 1.0 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import hashlib
  2. from unittest.mock import MagicMock, patch
  3. import pytest
  4. from embedchain.loaders.docx_file import DocxFileLoader
  5. @pytest.fixture
  6. def mock_docx2txt_loader():
  7. with patch("embedchain.loaders.docx_file.Docx2txtLoader") as mock_loader:
  8. yield mock_loader
  9. @pytest.fixture
  10. def docx_file_loader():
  11. return DocxFileLoader()
  12. def test_load_data(mock_docx2txt_loader, docx_file_loader):
  13. mock_url = "mock_docx_file.docx"
  14. mock_loader = MagicMock()
  15. mock_loader.load.return_value = [MagicMock(page_content="Sample Docx Content", metadata={"url": "local"})]
  16. mock_docx2txt_loader.return_value = mock_loader
  17. result = docx_file_loader.load_data(mock_url)
  18. assert "doc_id" in result
  19. assert "data" in result
  20. expected_content = "Sample Docx Content"
  21. assert result["data"][0]["content"] == expected_content
  22. assert result["data"][0]["meta_data"]["url"] == "local"
  23. expected_doc_id = hashlib.sha256((expected_content + mock_url).encode()).hexdigest()
  24. assert result["doc_id"] == expected_doc_id