text_file.py 903 B

123456789101112131415161718192021222324252627282930
  1. import hashlib
  2. import os
  3. from embedchain.helpers.json_serializable import register_deserializable
  4. from embedchain.loaders.base_loader import BaseLoader
  5. @register_deserializable
  6. class TextFileLoader(BaseLoader):
  7. def load_data(self, url: str):
  8. """Load data from a text file located at a local path."""
  9. if not os.path.exists(url):
  10. raise FileNotFoundError(f"The file at {url} does not exist.")
  11. with open(url, "r", encoding="utf-8") as file:
  12. content = file.read()
  13. doc_id = hashlib.sha256((content + url).encode()).hexdigest()
  14. meta_data = {"url": url, "file_size": os.path.getsize(url), "file_type": url.split(".")[-1]}
  15. return {
  16. "doc_id": doc_id,
  17. "data": [
  18. {
  19. "content": content,
  20. "meta_data": meta_data,
  21. }
  22. ],
  23. }