|
@@ -0,0 +1,150 @@
|
|
|
+import logging
|
|
|
+import os
|
|
|
+import hashlib
|
|
|
+
|
|
|
+from embedchain.helpers.json_serializable import register_deserializable
|
|
|
+from embedchain.loaders.base_loader import BaseLoader
|
|
|
+
|
|
|
+
|
|
|
+@register_deserializable
|
|
|
+class DiscordLoader(BaseLoader):
|
|
|
+ """
|
|
|
+ Load data from a Discord Channel ID.
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ if not os.environ.get("DISCORD_TOKEN"):
|
|
|
+ raise ValueError("DISCORD_TOKEN is not set")
|
|
|
+
|
|
|
+ self.token = os.environ.get("DISCORD_TOKEN")
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _format_message(message):
|
|
|
+ return {
|
|
|
+ "message_id": message.id,
|
|
|
+ "content": message.content,
|
|
|
+ "author": {
|
|
|
+ "id": message.author.id,
|
|
|
+ "name": message.author.name,
|
|
|
+ "discriminator": message.author.discriminator,
|
|
|
+ },
|
|
|
+ "created_at": message.created_at.isoformat(),
|
|
|
+ "attachments": [
|
|
|
+ {
|
|
|
+ "id": attachment.id,
|
|
|
+ "filename": attachment.filename,
|
|
|
+ "size": attachment.size,
|
|
|
+ "url": attachment.url,
|
|
|
+ "proxy_url": attachment.proxy_url,
|
|
|
+ "height": attachment.height,
|
|
|
+ "width": attachment.width,
|
|
|
+ }
|
|
|
+ for attachment in message.attachments
|
|
|
+ ],
|
|
|
+ "embeds": [
|
|
|
+ {
|
|
|
+ "title": embed.title,
|
|
|
+ "type": embed.type,
|
|
|
+ "description": embed.description,
|
|
|
+ "url": embed.url,
|
|
|
+ "timestamp": embed.timestamp.isoformat(),
|
|
|
+ "color": embed.color,
|
|
|
+ "footer": {
|
|
|
+ "text": embed.footer.text,
|
|
|
+ "icon_url": embed.footer.icon_url,
|
|
|
+ "proxy_icon_url": embed.footer.proxy_icon_url,
|
|
|
+ },
|
|
|
+ "image": {
|
|
|
+ "url": embed.image.url,
|
|
|
+ "proxy_url": embed.image.proxy_url,
|
|
|
+ "height": embed.image.height,
|
|
|
+ "width": embed.image.width,
|
|
|
+ },
|
|
|
+ "thumbnail": {
|
|
|
+ "url": embed.thumbnail.url,
|
|
|
+ "proxy_url": embed.thumbnail.proxy_url,
|
|
|
+ "height": embed.thumbnail.height,
|
|
|
+ "width": embed.thumbnail.width,
|
|
|
+ },
|
|
|
+ "video": {
|
|
|
+ "url": embed.video.url,
|
|
|
+ "height": embed.video.height,
|
|
|
+ "width": embed.video.width,
|
|
|
+ },
|
|
|
+ "provider": {
|
|
|
+ "name": embed.provider.name,
|
|
|
+ "url": embed.provider.url,
|
|
|
+ },
|
|
|
+ "author": {
|
|
|
+ "name": embed.author.name,
|
|
|
+ "url": embed.author.url,
|
|
|
+ "icon_url": embed.author.icon_url,
|
|
|
+ "proxy_icon_url": embed.author.proxy_icon_url,
|
|
|
+ },
|
|
|
+ "fields": [
|
|
|
+ {
|
|
|
+ "name": field.name,
|
|
|
+ "value": field.value,
|
|
|
+ "inline": field.inline,
|
|
|
+ }
|
|
|
+ for field in embed.fields
|
|
|
+ ],
|
|
|
+ }
|
|
|
+ for embed in message.embeds
|
|
|
+ ],
|
|
|
+ }
|
|
|
+
|
|
|
+ def load_data(self, channel_id: str):
|
|
|
+ """Load data from a Discord Channel ID."""
|
|
|
+ import discord
|
|
|
+
|
|
|
+ messages = []
|
|
|
+
|
|
|
+ class DiscordClient(discord.Client):
|
|
|
+ async def on_ready(self) -> None:
|
|
|
+ logging.info("Logged on as {0}!".format(self.user))
|
|
|
+ try:
|
|
|
+ channel = self.get_channel(int(channel_id))
|
|
|
+ if not isinstance(channel, discord.TextChannel):
|
|
|
+ raise ValueError(
|
|
|
+ f"Channel {channel_id} is not a text channel. " "Only text channels are supported for now."
|
|
|
+ )
|
|
|
+ threads = {}
|
|
|
+
|
|
|
+ for thread in channel.threads:
|
|
|
+ threads[thread.id] = thread
|
|
|
+
|
|
|
+ async for message in channel.history(limit=None):
|
|
|
+ messages.append(DiscordLoader._format_message(message))
|
|
|
+ if message.id in threads:
|
|
|
+ async for thread_message in threads[message.id].history(limit=None):
|
|
|
+ messages.append(DiscordLoader._format_message(thread_message))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(e)
|
|
|
+ await self.close()
|
|
|
+ finally:
|
|
|
+ await self.close()
|
|
|
+
|
|
|
+ intents = discord.Intents.default()
|
|
|
+ intents.message_content = True
|
|
|
+ client = DiscordClient(intents=intents)
|
|
|
+ client.run(self.token)
|
|
|
+
|
|
|
+ meta_data = {
|
|
|
+ "url": channel_id,
|
|
|
+ }
|
|
|
+
|
|
|
+ messages = str(messages)
|
|
|
+
|
|
|
+ doc_id = hashlib.sha256((messages + channel_id).encode()).hexdigest()
|
|
|
+
|
|
|
+ return {
|
|
|
+ "doc_id": doc_id,
|
|
|
+ "data": [
|
|
|
+ {
|
|
|
+ "content": messages,
|
|
|
+ "meta_data": meta_data,
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ }
|