test_youtube_video.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import hashlib
  2. import json
  3. from unittest.mock import MagicMock, Mock, patch
  4. import pytest
  5. from embedchain.loaders.youtube_video import YoutubeVideoLoader
  6. @pytest.fixture
  7. def youtube_video_loader():
  8. return YoutubeVideoLoader()
  9. def test_load_data(youtube_video_loader):
  10. video_url = "https://www.youtube.com/watch?v=VIDEO_ID"
  11. mock_loader = Mock()
  12. mock_page_content = "This is a YouTube video content."
  13. mock_loader.load.return_value = [
  14. MagicMock(
  15. page_content=mock_page_content,
  16. metadata={"url": video_url, "title": "Test Video"},
  17. )
  18. ]
  19. mock_transcript = [{"text": "sample text", "start": 0.0, "duration": 5.0}]
  20. with patch("embedchain.loaders.youtube_video.YoutubeLoader.from_youtube_url", return_value=mock_loader), patch(
  21. "embedchain.loaders.youtube_video.YouTubeTranscriptApi.get_transcript", return_value=mock_transcript
  22. ):
  23. result = youtube_video_loader.load_data(video_url)
  24. expected_doc_id = hashlib.sha256((mock_page_content + video_url).encode()).hexdigest()
  25. assert result["doc_id"] == expected_doc_id
  26. expected_data = [
  27. {
  28. "content": "This is a YouTube video content.",
  29. "meta_data": {
  30. "url": video_url,
  31. "title": "Test Video",
  32. "transcript": json.dumps(mock_transcript, ensure_ascii=True),
  33. },
  34. }
  35. ]
  36. assert result["data"] == expected_data
  37. def test_load_data_with_empty_doc(youtube_video_loader):
  38. video_url = "https://www.youtube.com/watch?v=VIDEO_ID"
  39. mock_loader = Mock()
  40. mock_loader.load.return_value = []
  41. with patch("embedchain.loaders.youtube_video.YoutubeLoader.from_youtube_url", return_value=mock_loader):
  42. with pytest.raises(ValueError):
  43. youtube_video_loader.load_data(video_url)