test_csv.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import csv
  2. import os
  3. import pathlib
  4. import tempfile
  5. from unittest.mock import MagicMock, patch
  6. import pytest
  7. from embedchain.loaders.csv import CsvLoader
  8. @pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
  9. def test_load_data(delimiter):
  10. """
  11. Test csv loader
  12. Tests that file is loaded, metadata is correct and content is correct
  13. """
  14. # Creating temporary CSV file
  15. with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
  16. writer = csv.writer(tmpfile, delimiter=delimiter)
  17. writer.writerow(["Name", "Age", "Occupation"])
  18. writer.writerow(["Alice", "28", "Engineer"])
  19. writer.writerow(["Bob", "35", "Doctor"])
  20. writer.writerow(["Charlie", "22", "Student"])
  21. tmpfile.seek(0)
  22. filename = tmpfile.name
  23. # Loading CSV using CsvLoader
  24. loader = CsvLoader()
  25. result = loader.load_data(filename)
  26. data = result["data"]
  27. # Assertions
  28. assert len(data) == 3
  29. assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
  30. assert data[0]["meta_data"]["url"] == filename
  31. assert data[0]["meta_data"]["row"] == 1
  32. assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
  33. assert data[1]["meta_data"]["url"] == filename
  34. assert data[1]["meta_data"]["row"] == 2
  35. assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
  36. assert data[2]["meta_data"]["url"] == filename
  37. assert data[2]["meta_data"]["row"] == 3
  38. # Cleaning up the temporary file
  39. os.unlink(filename)
  40. @pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
  41. def test_load_data_with_file_uri(delimiter):
  42. """
  43. Test csv loader with file URI
  44. Tests that file is loaded, metadata is correct and content is correct
  45. """
  46. # Creating temporary CSV file
  47. with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
  48. writer = csv.writer(tmpfile, delimiter=delimiter)
  49. writer.writerow(["Name", "Age", "Occupation"])
  50. writer.writerow(["Alice", "28", "Engineer"])
  51. writer.writerow(["Bob", "35", "Doctor"])
  52. writer.writerow(["Charlie", "22", "Student"])
  53. tmpfile.seek(0)
  54. filename = pathlib.Path(tmpfile.name).as_uri() # Convert path to file URI
  55. # Loading CSV using CsvLoader
  56. loader = CsvLoader()
  57. result = loader.load_data(filename)
  58. data = result["data"]
  59. # Assertions
  60. assert len(data) == 3
  61. assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
  62. assert data[0]["meta_data"]["url"] == filename
  63. assert data[0]["meta_data"]["row"] == 1
  64. assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
  65. assert data[1]["meta_data"]["url"] == filename
  66. assert data[1]["meta_data"]["row"] == 2
  67. assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
  68. assert data[2]["meta_data"]["url"] == filename
  69. assert data[2]["meta_data"]["row"] == 3
  70. # Cleaning up the temporary file
  71. os.unlink(tmpfile.name)
  72. @pytest.mark.parametrize("content", ["ftp://example.com", "sftp://example.com", "mailto://example.com"])
  73. def test_get_file_content(content):
  74. with pytest.raises(ValueError):
  75. loader = CsvLoader()
  76. loader._get_file_content(content)
  77. @pytest.mark.parametrize("content", ["http://example.com", "https://example.com"])
  78. def test_get_file_content_http(content):
  79. """
  80. Test _get_file_content method of CsvLoader for http and https URLs
  81. """
  82. with patch("requests.get") as mock_get:
  83. mock_response = MagicMock()
  84. mock_response.text = "Name,Age,Occupation\nAlice,28,Engineer\nBob,35,Doctor\nCharlie,22,Student"
  85. mock_get.return_value = mock_response
  86. loader = CsvLoader()
  87. file_content = loader._get_file_content(content)
  88. mock_get.assert_called_once_with(content)
  89. mock_response.raise_for_status.assert_called_once()
  90. assert file_content.read() == mock_response.text