docx_file.py 505 B

12345678910111213141516
  1. from langchain.document_loaders import Docx2txtLoader
  2. from embedchain.loaders.base_loader import BaseLoader
  3. class DocxFileLoader(BaseLoader):
  4. def load_data(self, url):
  5. """Load data from a .docx file."""
  6. loader = Docx2txtLoader(url)
  7. output = []
  8. data = loader.load()
  9. content = data[0].page_content
  10. meta_data = data[0].metadata
  11. meta_data["url"] = "local"
  12. output.append({"content": content, "meta_data": meta_data})
  13. return output