discord.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. import hashlib
  2. import logging
  3. import os
  4. from embedchain.helpers.json_serializable import register_deserializable
  5. from embedchain.loaders.base_loader import BaseLoader
  6. @register_deserializable
  7. class DiscordLoader(BaseLoader):
  8. """
  9. Load data from a Discord Channel ID.
  10. """
  11. def __init__(self):
  12. if not os.environ.get("DISCORD_TOKEN"):
  13. raise ValueError("DISCORD_TOKEN is not set")
  14. self.token = os.environ.get("DISCORD_TOKEN")
  15. @staticmethod
  16. def _format_message(message):
  17. return {
  18. "message_id": message.id,
  19. "content": message.content,
  20. "author": {
  21. "id": message.author.id,
  22. "name": message.author.name,
  23. "discriminator": message.author.discriminator,
  24. },
  25. "created_at": message.created_at.isoformat(),
  26. "attachments": [
  27. {
  28. "id": attachment.id,
  29. "filename": attachment.filename,
  30. "size": attachment.size,
  31. "url": attachment.url,
  32. "proxy_url": attachment.proxy_url,
  33. "height": attachment.height,
  34. "width": attachment.width,
  35. }
  36. for attachment in message.attachments
  37. ],
  38. "embeds": [
  39. {
  40. "title": embed.title,
  41. "type": embed.type,
  42. "description": embed.description,
  43. "url": embed.url,
  44. "timestamp": embed.timestamp.isoformat(),
  45. "color": embed.color,
  46. "footer": {
  47. "text": embed.footer.text,
  48. "icon_url": embed.footer.icon_url,
  49. "proxy_icon_url": embed.footer.proxy_icon_url,
  50. },
  51. "image": {
  52. "url": embed.image.url,
  53. "proxy_url": embed.image.proxy_url,
  54. "height": embed.image.height,
  55. "width": embed.image.width,
  56. },
  57. "thumbnail": {
  58. "url": embed.thumbnail.url,
  59. "proxy_url": embed.thumbnail.proxy_url,
  60. "height": embed.thumbnail.height,
  61. "width": embed.thumbnail.width,
  62. },
  63. "video": {
  64. "url": embed.video.url,
  65. "height": embed.video.height,
  66. "width": embed.video.width,
  67. },
  68. "provider": {
  69. "name": embed.provider.name,
  70. "url": embed.provider.url,
  71. },
  72. "author": {
  73. "name": embed.author.name,
  74. "url": embed.author.url,
  75. "icon_url": embed.author.icon_url,
  76. "proxy_icon_url": embed.author.proxy_icon_url,
  77. },
  78. "fields": [
  79. {
  80. "name": field.name,
  81. "value": field.value,
  82. "inline": field.inline,
  83. }
  84. for field in embed.fields
  85. ],
  86. }
  87. for embed in message.embeds
  88. ],
  89. }
  90. def load_data(self, channel_id: str):
  91. """Load data from a Discord Channel ID."""
  92. import discord
  93. messages = []
  94. class DiscordClient(discord.Client):
  95. async def on_ready(self) -> None:
  96. logging.info("Logged on as {0}!".format(self.user))
  97. try:
  98. channel = self.get_channel(int(channel_id))
  99. if not isinstance(channel, discord.TextChannel):
  100. raise ValueError(
  101. f"Channel {channel_id} is not a text channel. " "Only text channels are supported for now."
  102. )
  103. threads = {}
  104. for thread in channel.threads:
  105. threads[thread.id] = thread
  106. async for message in channel.history(limit=None):
  107. messages.append(DiscordLoader._format_message(message))
  108. if message.id in threads:
  109. async for thread_message in threads[message.id].history(limit=None):
  110. messages.append(DiscordLoader._format_message(thread_message))
  111. except Exception as e:
  112. logging.error(e)
  113. await self.close()
  114. finally:
  115. await self.close()
  116. intents = discord.Intents.default()
  117. intents.message_content = True
  118. client = DiscordClient(intents=intents)
  119. client.run(self.token)
  120. meta_data = {
  121. "url": channel_id,
  122. }
  123. messages = str(messages)
  124. doc_id = hashlib.sha256((messages + channel_id).encode()).hexdigest()
  125. return {
  126. "doc_id": doc_id,
  127. "data": [
  128. {
  129. "content": messages,
  130. "meta_data": meta_data,
  131. }
  132. ],
  133. }