youtube_video.py 763 B

12345678910111213141516171819202122232425
  1. from langchain.document_loaders import YoutubeLoader
  2. from embedchain.loaders.base_loader import BaseLoader
  3. from embedchain.utils import clean_string
  4. class YoutubeVideoLoader(BaseLoader):
  5. def load_data(self, url):
  6. """Load data from a Youtube video."""
  7. loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
  8. doc = loader.load()
  9. output = []
  10. if not len(doc):
  11. raise ValueError("No data found")
  12. content = doc[0].page_content
  13. content = clean_string(content)
  14. meta_data = doc[0].metadata
  15. meta_data["url"] = url
  16. output.append(
  17. {
  18. "content": content,
  19. "meta_data": meta_data,
  20. }
  21. )
  22. return output