slack.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import hashlib
  2. import logging
  3. import os
  4. import ssl
  5. from typing import Any, Dict, Optional
  6. import certifi
  7. from embedchain.loaders.base_loader import BaseLoader
  8. from embedchain.utils.misc import clean_string
  9. SLACK_API_BASE_URL = "https://www.slack.com/api/"
  10. class SlackLoader(BaseLoader):
  11. def __init__(self, config: Optional[Dict[str, Any]] = None):
  12. super().__init__()
  13. self.config = config if config else {}
  14. if "base_url" not in self.config:
  15. self.config["base_url"] = SLACK_API_BASE_URL
  16. self.client = None
  17. self._setup_loader(self.config)
  18. def _setup_loader(self, config: Dict[str, Any]):
  19. try:
  20. from slack_sdk import WebClient
  21. except ImportError as e:
  22. raise ImportError(
  23. "Slack loader requires extra dependencies. \
  24. Install with `pip install --upgrade embedchain[slack]`"
  25. ) from e
  26. if os.getenv("SLACK_USER_TOKEN") is None:
  27. raise ValueError(
  28. "SLACK_USER_TOKEN environment variables not provided. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  29. )
  30. logging.info(f"Creating Slack Loader with config: {config}")
  31. # get slack client config params
  32. slack_bot_token = os.getenv("SLACK_USER_TOKEN")
  33. ssl_cert = ssl.create_default_context(cafile=certifi.where())
  34. base_url = config.get("base_url", SLACK_API_BASE_URL)
  35. headers = config.get("headers")
  36. # for Org-Wide App
  37. team_id = config.get("team_id")
  38. self.client = WebClient(
  39. token=slack_bot_token,
  40. base_url=base_url,
  41. ssl=ssl_cert,
  42. headers=headers,
  43. team_id=team_id,
  44. )
  45. logging.info("Slack Loader setup successful!")
  46. def _check_query(self, query):
  47. if not isinstance(query, str):
  48. raise ValueError(
  49. f"Invalid query passed to Slack loader, found: {query}. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  50. )
  51. def load_data(self, query):
  52. self._check_query(query)
  53. try:
  54. data = []
  55. data_content = []
  56. logging.info(f"Searching slack conversations for query: {query}")
  57. results = self.client.search_messages(
  58. query=query,
  59. sort="timestamp",
  60. sort_dir="desc",
  61. count=self.config.get("count", 100),
  62. )
  63. messages = results.get("messages")
  64. num_message = results.get("total")
  65. logging.info(f"Found {num_message} messages for query: {query}")
  66. matches = messages.get("matches", [])
  67. for message in matches:
  68. url = message.get("permalink")
  69. text = message.get("text")
  70. content = clean_string(text)
  71. message_meta_data_keys = ["channel", "iid", "team", "ts", "type", "user", "username"]
  72. meta_data = message.fromkeys(message_meta_data_keys, "")
  73. meta_data.update({"url": url})
  74. data.append(
  75. {
  76. "content": content,
  77. "meta_data": meta_data,
  78. }
  79. )
  80. data_content.append(content)
  81. doc_id = hashlib.md5((query + ", ".join(data_content)).encode()).hexdigest()
  82. return {
  83. "doc_id": doc_id,
  84. "data": data,
  85. }
  86. except Exception as e:
  87. logging.warning(f"Error in loading slack data: {e}")
  88. raise ValueError(
  89. f"Error in loading slack data: {e}. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
  90. ) from e