# -*- coding: utf-8 -*- # @Author: privacy # @Date: 2024-06-11 13:43:14 # @Last Modified by: privacy # @Last Modified time: 2024-09-19 17:37:49 import os import base64 import requests def main(client_id, client_secret): url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}".format(client_id, client_secret) payload = "" headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) print(response.text) return response.json() def create_task(url, file_path, file_url): """ Args: url: string, 服务请求链接 file_path: 本地文件路径 file_url: 文件链接 Returns: 响应 """ file = open(file_path, 'rb').read() # 文件请求 body = { "file": (os.path.basename(file_path), file, "multipart/form-data"), } # 文件链接请求 # body = { # "file_url": (file_url, "multipart/form-data") # } data = { "file_name": os.path.basename(file_path), "return_para_nodes": True } response = requests.post(url, data=data, files=body) return response.json() def query_task(url, task_id): """ Args: url: string, 请求链接 task_id: string, task id Returns: 响应 """ data = { "task_id": task_id } response = requests.post(url, data=data, files=data) return response.json() if __name__ == '__main__': import time import json # client_id = 'DFIQUMXb59oGUDkvGhTw15mE' # client_secret = 'F5LkFLo4TatiLcCcJgIXbJrv5Kw04Rf0' # token = main(client_id, client_secret)['access_token'] token = "24.0ab90c2e2b750b61995052ab6b94f62c.2592000.1728805729.282335-86574608" # # print(token) request_host = f"https://aip.baidubce.com/file/2.0/brain/online/v1/parser/task?access_token={token}" file_path = "D:/desktop/三峡水利/celery.pdf" response = create_task(request_host, file_path, "") print(response) time.sleep(10) task_id = response['result']['task_id'] request_host = f"https://aip.baidubce.com/file/2.0/brain/online/v1/parser/task/query?access_token={token}" resp = query_task(request_host, task_id) print(resp) url = resp['result']['parse_result_url'] response = requests.get(url) response.encoding = 'utf-8' with open('浙江国迈建设集团有限公司技术文件.json', 'w', encoding='utf-8') as fp: json.dump(response.json(), fp, indent=4, ensure_ascii=False)