doc_file.py 426 B

12345678910111213
  1. from langchain.document_loaders import UnstructuredWordDocumentLoader
  2. class DocFileLoader:
  3. def load_data(self, url):
  4. loader = UnstructuredWordDocumentLoader(url)
  5. output = []
  6. data = loader.load()
  7. content = data[0].page_content
  8. meta_data = data[0].metadata
  9. meta_data["url"] = "local"
  10. output.append({"content": content, "meta_data": meta_data})
  11. return output