test_csv.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import csv
  2. import os
  3. import pathlib
  4. import tempfile
  5. import pytest
  6. from embedchain.loaders.csv import CsvLoader
  7. @pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
  8. def test_load_data(delimiter):
  9. """
  10. Test csv loader
  11. Tests that file is loaded, metadata is correct and content is correct
  12. """
  13. # Creating temporary CSV file
  14. with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
  15. writer = csv.writer(tmpfile, delimiter=delimiter)
  16. writer.writerow(["Name", "Age", "Occupation"])
  17. writer.writerow(["Alice", "28", "Engineer"])
  18. writer.writerow(["Bob", "35", "Doctor"])
  19. writer.writerow(["Charlie", "22", "Student"])
  20. tmpfile.seek(0)
  21. filename = tmpfile.name
  22. # Loading CSV using CsvLoader
  23. loader = CsvLoader()
  24. result = loader.load_data(filename)
  25. # Assertions
  26. assert len(result) == 3
  27. assert result[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
  28. assert result[0]["meta_data"]["url"] == filename
  29. assert result[0]["meta_data"]["row"] == 1
  30. assert result[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
  31. assert result[1]["meta_data"]["url"] == filename
  32. assert result[1]["meta_data"]["row"] == 2
  33. assert result[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
  34. assert result[2]["meta_data"]["url"] == filename
  35. assert result[2]["meta_data"]["row"] == 3
  36. # Cleaning up the temporary file
  37. os.unlink(filename)
  38. @pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
  39. def test_load_data_with_file_uri(delimiter):
  40. """
  41. Test csv loader with file URI
  42. Tests that file is loaded, metadata is correct and content is correct
  43. """
  44. # Creating temporary CSV file
  45. with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
  46. writer = csv.writer(tmpfile, delimiter=delimiter)
  47. writer.writerow(["Name", "Age", "Occupation"])
  48. writer.writerow(["Alice", "28", "Engineer"])
  49. writer.writerow(["Bob", "35", "Doctor"])
  50. writer.writerow(["Charlie", "22", "Student"])
  51. tmpfile.seek(0)
  52. filename = pathlib.Path(tmpfile.name).as_uri() # Convert path to file URI
  53. # Loading CSV using CsvLoader
  54. loader = CsvLoader()
  55. result = loader.load_data(filename)
  56. # Assertions
  57. assert len(result) == 3
  58. assert result[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
  59. assert result[0]["meta_data"]["url"] == filename
  60. assert result[0]["meta_data"]["row"] == 1
  61. assert result[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
  62. assert result[1]["meta_data"]["url"] == filename
  63. assert result[1]["meta_data"]["row"] == 2
  64. assert result[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
  65. assert result[2]["meta_data"]["url"] == filename
  66. assert result[2]["meta_data"]["row"] == 3
  67. # Cleaning up the temporary file
  68. os.unlink(tmpfile.name)