123456789101112131415161718192021222324252627282930 |
- import hashlib
- import os
- from embedchain.helpers.json_serializable import register_deserializable
- from embedchain.loaders.base_loader import BaseLoader
- @register_deserializable
- class TextFileLoader(BaseLoader):
- def load_data(self, url: str):
- """Load data from a text file located at a local path."""
- if not os.path.exists(url):
- raise FileNotFoundError(f"The file at {url} does not exist.")
- with open(url, "r", encoding="utf-8") as file:
- content = file.read()
- doc_id = hashlib.sha256((content + url).encode()).hexdigest()
- meta_data = {"url": url, "file_size": os.path.getsize(url), "file_type": url.split(".")[-1]}
- return {
- "doc_id": doc_id,
- "data": [
- {
- "content": content,
- "meta_data": meta_data,
- }
- ],
- }
|