1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- # -*- coding: utf-8 -*-
- # @Author: privacy
- # @Date: 2024-06-11 13:43:14
- # @Last Modified by: privacy
- # @Last Modified time: 2024-09-19 17:37:49
- import os
- import base64
- import requests
- def main(client_id, client_secret):
- url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}".format(client_id, client_secret)
- payload = ""
- headers = {
- 'Content-Type': 'application/json',
- 'Accept': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- print(response.text)
- return response.json()
- def create_task(url, file_path, file_url):
- """
- Args:
- url: string, 服务请求链接
- file_path: 本地文件路径
- file_url: 文件链接
- Returns: 响应
- """
- file = open(file_path, 'rb').read()
- # 文件请求
- body = {
- "file": (os.path.basename(file_path), file, "multipart/form-data"),
- }
- # 文件链接请求
- # body = {
- # "file_url": (file_url, "multipart/form-data")
- # }
- data = {
- "file_name": os.path.basename(file_path),
- "return_para_nodes": True
- }
- response = requests.post(url, data=data, files=body)
- return response.json()
- def query_task(url, task_id):
- """
- Args:
- url: string, 请求链接
- task_id: string, task id
- Returns: 响应
- """
- data = {
- "task_id": task_id
- }
- response = requests.post(url, data=data, files=data)
- return response.json()
- if __name__ == '__main__':
- import time
- import json
- # client_id = 'DFIQUMXb59oGUDkvGhTw15mE'
- # client_secret = 'F5LkFLo4TatiLcCcJgIXbJrv5Kw04Rf0'
- # token = main(client_id, client_secret)['access_token']
- token = "24.0ab90c2e2b750b61995052ab6b94f62c.2592000.1728805729.282335-86574608"
- # # print(token)
- request_host = f"https://aip.baidubce.com/file/2.0/brain/online/v1/parser/task?access_token={token}"
- file_path = "D:/desktop/三峡水利/celery.pdf"
- response = create_task(request_host, file_path, "")
- print(response)
- time.sleep(10)
- task_id = response['result']['task_id']
- request_host = f"https://aip.baidubce.com/file/2.0/brain/online/v1/parser/task/query?access_token={token}"
- resp = query_task(request_host, task_id)
- print(resp)
- url = resp['result']['parse_result_url']
- response = requests.get(url)
- response.encoding = 'utf-8'
- with open('浙江国迈建设集团有限公司技术文件.json', 'w', encoding='utf-8') as fp:
- json.dump(response.json(), fp, indent=4, ensure_ascii=False)
|