openapi.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import hashlib
  2. from io import StringIO
  3. from urllib.parse import urlparse
  4. import requests
  5. import yaml
  6. from embedchain.loaders.base_loader import BaseLoader
  7. class OpenAPILoader(BaseLoader):
  8. @staticmethod
  9. def _get_file_content(content):
  10. url = urlparse(content)
  11. if all([url.scheme, url.netloc]) and url.scheme not in ["file", "http", "https"]:
  12. raise ValueError("Not a valid URL.")
  13. if url.scheme in ["http", "https"]:
  14. response = requests.get(content)
  15. response.raise_for_status()
  16. return StringIO(response.text)
  17. elif url.scheme == "file":
  18. path = url.path
  19. return open(path)
  20. else:
  21. return open(content)
  22. @staticmethod
  23. def load_data(content):
  24. """Load yaml file of openapi. Each pair is a document."""
  25. data = []
  26. file_path = content
  27. data_content = []
  28. with OpenAPILoader._get_file_content(content=content) as file:
  29. yaml_data = yaml.load(file, Loader=yaml.SafeLoader)
  30. for i, (key, value) in enumerate(yaml_data.items()):
  31. string_data = f"{key}: {value}"
  32. metadata = {"url": file_path, "row": i + 1}
  33. data.append({"content": string_data, "meta_data": metadata})
  34. data_content.append(string_data)
  35. doc_id = hashlib.sha256((content + ", ".join(data_content)).encode()).hexdigest()
  36. return {"doc_id": doc_id, "data": data}