12345678910111213 |
- from langchain.document_loaders import UnstructuredWordDocumentLoader
- class DocFileLoader:
- def load_data(self, url):
- loader = UnstructuredWordDocumentLoader(url)
- output = []
- data = loader.load()
- content = data[0].page_content
- meta_data = data[0].metadata
- meta_data["url"] = "local"
- output.append({"content": content, "meta_data": meta_data})
- return output
|