youtube_video.py 868 B

123456789101112131415161718192021222324252627
  1. from langchain.document_loaders import YoutubeLoader
  2. from embedchain.helper_classes.json_serializable import register_deserializable
  3. from embedchain.loaders.base_loader import BaseLoader
  4. from embedchain.utils import clean_string
  5. @register_deserializable
  6. class YoutubeVideoLoader(BaseLoader):
  7. def load_data(self, url):
  8. """Load data from a Youtube video."""
  9. loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
  10. doc = loader.load()
  11. output = []
  12. if not len(doc):
  13. raise ValueError("No data found")
  14. content = doc[0].page_content
  15. content = clean_string(content)
  16. meta_data = doc[0].metadata
  17. meta_data["url"] = url
  18. output.append(
  19. {
  20. "content": content,
  21. "meta_data": meta_data,
  22. }
  23. )
  24. return output