audio.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import hashlib
  2. import os
  3. import validators
  4. from embedchain.helpers.json_serializable import register_deserializable
  5. from embedchain.loaders.base_loader import BaseLoader
  6. try:
  7. from deepgram import DeepgramClient, PrerecordedOptions
  8. except ImportError:
  9. raise ImportError(
  10. "Audio file requires extra dependencies. Install with `pip install deepgram-sdk==3.2.7`"
  11. ) from None
  12. @register_deserializable
  13. class AudioLoader(BaseLoader):
  14. def __init__(self):
  15. if not os.environ.get("DEEPGRAM_API_KEY"):
  16. raise ValueError("DEEPGRAM_API_KEY is not set")
  17. DG_KEY = os.environ.get("DEEPGRAM_API_KEY")
  18. self.client = DeepgramClient(DG_KEY)
  19. def load_data(self, url: str):
  20. """Load data from a audio file or URL."""
  21. options = PrerecordedOptions(
  22. model="nova-2",
  23. smart_format=True,
  24. )
  25. if validators.url(url):
  26. source = {"url": url}
  27. response = self.client.listen.prerecorded.v("1").transcribe_url(source, options)
  28. else:
  29. with open(url, "rb") as audio:
  30. source = {"buffer": audio}
  31. response = self.client.listen.prerecorded.v("1").transcribe_file(source, options)
  32. content = response["results"]["channels"][0]["alternatives"][0]["transcript"]
  33. doc_id = hashlib.sha256((content + url).encode()).hexdigest()
  34. metadata = {"url": url}
  35. return {
  36. "doc_id": doc_id,
  37. "data": [
  38. {
  39. "content": content,
  40. "meta_data": metadata,
  41. }
  42. ],
  43. }