test_youtube_video.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import hashlib
  2. from unittest.mock import MagicMock, Mock, patch
  3. import pytest
  4. from embedchain.loaders.youtube_video import YoutubeVideoLoader
  5. @pytest.fixture
  6. def youtube_video_loader():
  7. return YoutubeVideoLoader()
  8. def test_load_data(youtube_video_loader):
  9. video_url = "https://www.youtube.com/watch?v=VIDEO_ID"
  10. mock_loader = Mock()
  11. mock_page_content = "This is a YouTube video content."
  12. mock_loader.load.return_value = [
  13. MagicMock(
  14. page_content=mock_page_content,
  15. metadata={"url": video_url, "title": "Test Video"},
  16. )
  17. ]
  18. mock_transcript = [{"text": "sample text", "start": 0.0, "duration": 5.0}]
  19. with patch("embedchain.loaders.youtube_video.YoutubeLoader.from_youtube_url", return_value=mock_loader), patch(
  20. "embedchain.loaders.youtube_video.YouTubeTranscriptApi.get_transcript", return_value=mock_transcript
  21. ):
  22. result = youtube_video_loader.load_data(video_url)
  23. expected_doc_id = hashlib.sha256((mock_page_content + video_url).encode()).hexdigest()
  24. assert result["doc_id"] == expected_doc_id
  25. expected_data = [
  26. {
  27. "content": "This is a YouTube video content.",
  28. "meta_data": {"url": video_url, "title": "Test Video", "transcript": "Unavailable"},
  29. }
  30. ]
  31. assert result["data"] == expected_data
  32. def test_load_data_with_empty_doc(youtube_video_loader):
  33. video_url = "https://www.youtube.com/watch?v=VIDEO_ID"
  34. mock_loader = Mock()
  35. mock_loader.load.return_value = []
  36. with patch("embedchain.loaders.youtube_video.YoutubeLoader.from_youtube_url", return_value=mock_loader):
  37. with pytest.raises(ValueError):
  38. youtube_video_loader.load_data(video_url)