discord.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import hashlib
  2. import logging
  3. import os
  4. from embedchain.helpers.json_serializable import register_deserializable
  5. from embedchain.loaders.base_loader import BaseLoader
  6. logger = logging.getLogger(__name__)
  7. @register_deserializable
  8. class DiscordLoader(BaseLoader):
  9. """
  10. Load data from a Discord Channel ID.
  11. """
  12. def __init__(self):
  13. if not os.environ.get("DISCORD_TOKEN"):
  14. raise ValueError("DISCORD_TOKEN is not set")
  15. self.token = os.environ.get("DISCORD_TOKEN")
  16. @staticmethod
  17. def _format_message(message):
  18. return {
  19. "message_id": message.id,
  20. "content": message.content,
  21. "author": {
  22. "id": message.author.id,
  23. "name": message.author.name,
  24. "discriminator": message.author.discriminator,
  25. },
  26. "created_at": message.created_at.isoformat(),
  27. "attachments": [
  28. {
  29. "id": attachment.id,
  30. "filename": attachment.filename,
  31. "size": attachment.size,
  32. "url": attachment.url,
  33. "proxy_url": attachment.proxy_url,
  34. "height": attachment.height,
  35. "width": attachment.width,
  36. }
  37. for attachment in message.attachments
  38. ],
  39. "embeds": [
  40. {
  41. "title": embed.title,
  42. "type": embed.type,
  43. "description": embed.description,
  44. "url": embed.url,
  45. "timestamp": embed.timestamp.isoformat(),
  46. "color": embed.color,
  47. "footer": {
  48. "text": embed.footer.text,
  49. "icon_url": embed.footer.icon_url,
  50. "proxy_icon_url": embed.footer.proxy_icon_url,
  51. },
  52. "image": {
  53. "url": embed.image.url,
  54. "proxy_url": embed.image.proxy_url,
  55. "height": embed.image.height,
  56. "width": embed.image.width,
  57. },
  58. "thumbnail": {
  59. "url": embed.thumbnail.url,
  60. "proxy_url": embed.thumbnail.proxy_url,
  61. "height": embed.thumbnail.height,
  62. "width": embed.thumbnail.width,
  63. },
  64. "video": {
  65. "url": embed.video.url,
  66. "height": embed.video.height,
  67. "width": embed.video.width,
  68. },
  69. "provider": {
  70. "name": embed.provider.name,
  71. "url": embed.provider.url,
  72. },
  73. "author": {
  74. "name": embed.author.name,
  75. "url": embed.author.url,
  76. "icon_url": embed.author.icon_url,
  77. "proxy_icon_url": embed.author.proxy_icon_url,
  78. },
  79. "fields": [
  80. {
  81. "name": field.name,
  82. "value": field.value,
  83. "inline": field.inline,
  84. }
  85. for field in embed.fields
  86. ],
  87. }
  88. for embed in message.embeds
  89. ],
  90. }
  91. def load_data(self, channel_id: str):
  92. """Load data from a Discord Channel ID."""
  93. import discord
  94. messages = []
  95. class DiscordClient(discord.Client):
  96. async def on_ready(self) -> None:
  97. logger.info("Logged on as {0}!".format(self.user))
  98. try:
  99. channel = self.get_channel(int(channel_id))
  100. if not isinstance(channel, discord.TextChannel):
  101. raise ValueError(
  102. f"Channel {channel_id} is not a text channel. " "Only text channels are supported for now."
  103. )
  104. threads = {}
  105. for thread in channel.threads:
  106. threads[thread.id] = thread
  107. async for message in channel.history(limit=None):
  108. messages.append(DiscordLoader._format_message(message))
  109. if message.id in threads:
  110. async for thread_message in threads[message.id].history(limit=None):
  111. messages.append(DiscordLoader._format_message(thread_message))
  112. except Exception as e:
  113. logger.error(e)
  114. await self.close()
  115. finally:
  116. await self.close()
  117. intents = discord.Intents.default()
  118. intents.message_content = True
  119. client = DiscordClient(intents=intents)
  120. client.run(self.token)
  121. metadata = {
  122. "url": channel_id,
  123. }
  124. messages = str(messages)
  125. doc_id = hashlib.sha256((messages + channel_id).encode()).hexdigest()
  126. return {
  127. "doc_id": doc_id,
  128. "data": [
  129. {
  130. "content": messages,
  131. "meta_data": metadata,
  132. }
  133. ],
  134. }