json.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import hashlib
  2. import json
  3. import os
  4. from embedchain.loaders.base_loader import BaseLoader
  5. class JSONLoader(BaseLoader):
  6. @staticmethod
  7. def load_data(content):
  8. """Load a json file. Each data point is a key value pair."""
  9. try:
  10. from llama_hub.jsondata.base import \
  11. JSONDataReader as LLHBUBJSONLoader
  12. except ImportError:
  13. raise Exception(
  14. f"Couldn't import the required packages to load {content}, \
  15. Do `pip install --upgrade 'embedchain[json]`"
  16. )
  17. loader = LLHBUBJSONLoader()
  18. if not isinstance(content, str) and not os.path.isfile(content):
  19. print(f"Invaid content input. Provide the correct path to the json file saved locally in {content}")
  20. data = []
  21. data_content = []
  22. with open(content, "r") as json_file:
  23. json_data = json.load(json_file)
  24. docs = loader.load_data(json_data)
  25. for doc in docs:
  26. doc_content = doc.text
  27. data.append({"content": doc_content, "meta_data": {"url": content}})
  28. data_content.append(doc_content)
  29. doc_id = hashlib.sha256((content + ", ".join(data_content)).encode()).hexdigest()
  30. return {"doc_id": doc_id, "data": data}