slack.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import hashlib
  2. import logging
  3. import os
  4. import ssl
  5. from typing import Any, Optional
  6. import certifi
  7. from embedchain.loaders.base_loader import BaseLoader
  8. from embedchain.utils.misc import clean_string
  9. SLACK_API_BASE_URL = "https://www.slack.com/api/"
  10. class SlackLoader(BaseLoader):
  11. def __init__(self, config: Optional[dict[str, Any]] = None):
  12. super().__init__()
  13. self.config = config if config else {}
  14. if "base_url" not in self.config:
  15. self.config["base_url"] = SLACK_API_BASE_URL
  16. self.client = None
  17. self._setup_loader(self.config)
  18. def _setup_loader(self, config: dict[str, Any]):
  19. try:
  20. from slack_sdk import WebClient
  21. except ImportError as e:
  22. raise ImportError(
  23. "Slack loader requires extra dependencies. \
  24. Install with `pip install --upgrade embedchain[slack]`"
  25. ) from e
  26. if os.getenv("SLACK_USER_TOKEN") is None:
  27. raise ValueError(
  28. "SLACK_USER_TOKEN environment variables not provided. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  29. )
  30. logging.info(f"Creating Slack Loader with config: {config}")
  31. # get slack client config params
  32. slack_bot_token = os.getenv("SLACK_USER_TOKEN")
  33. ssl_cert = ssl.create_default_context(cafile=certifi.where())
  34. base_url = config.get("base_url", SLACK_API_BASE_URL)
  35. headers = config.get("headers")
  36. # for Org-Wide App
  37. team_id = config.get("team_id")
  38. self.client = WebClient(
  39. token=slack_bot_token,
  40. base_url=base_url,
  41. ssl=ssl_cert,
  42. headers=headers,
  43. team_id=team_id,
  44. )
  45. logging.info("Slack Loader setup successful!")
  46. @staticmethod
  47. def _check_query(query):
  48. if not isinstance(query, str):
  49. raise ValueError(
  50. f"Invalid query passed to Slack loader, found: {query}. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  51. )
  52. def load_data(self, query):
  53. self._check_query(query)
  54. try:
  55. data = []
  56. data_content = []
  57. logging.info(f"Searching slack conversations for query: {query}")
  58. results = self.client.search_messages(
  59. query=query,
  60. sort="timestamp",
  61. sort_dir="desc",
  62. count=self.config.get("count", 100),
  63. )
  64. messages = results.get("messages")
  65. num_message = len(messages)
  66. logging.info(f"Found {num_message} messages for query: {query}")
  67. matches = messages.get("matches", [])
  68. for message in matches:
  69. url = message.get("permalink")
  70. text = message.get("text")
  71. content = clean_string(text)
  72. message_meta_data_keys = ["iid", "team", "ts", "type", "user", "username"]
  73. meta_data = {}
  74. for key in message.keys():
  75. if key in message_meta_data_keys:
  76. meta_data[key] = message.get(key)
  77. meta_data.update({"url": url})
  78. data.append(
  79. {
  80. "content": content,
  81. "meta_data": meta_data,
  82. }
  83. )
  84. data_content.append(content)
  85. doc_id = hashlib.md5((query + ", ".join(data_content)).encode()).hexdigest()
  86. return {
  87. "doc_id": doc_id,
  88. "data": data,
  89. }
  90. except Exception as e:
  91. logging.warning(f"Error in loading slack data: {e}")
  92. raise ValueError(
  93. f"Error in loading slack data: {e}. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  94. ) from e