docx_file.py 438 B

1234567891011121314
  1. from langchain.document_loaders import Docx2txtLoader
  2. class DocxFileLoader:
  3. def load_data(self, url):
  4. """Load data from a .docx file."""
  5. loader = Docx2txtLoader(url)
  6. output = []
  7. data = loader.load()
  8. content = data[0].page_content
  9. meta_data = data[0].metadata
  10. meta_data["url"] = "local"
  11. output.append({"content": content, "meta_data": meta_data})
  12. return output