123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- # -*- coding: utf-8 -*-
- # @Author: privacy
- # @Date: 2024-06-11 13:43:14
- # @Last Modified by: privacy
- # @Last Modified time: 2024-09-05 10:37:12
- from pprint import pprint
- from celery_tasks.text_extractor import get_instance
- from celery_tasks.tools import match_price_zhs, match_price_num, match_duration, match_quality
- if __name__ == '__main__':
- import json
- pdf_path = r'./data/projects/三峡左岸及地下电站地坪整治/投标/湖北建新建设工程有限公司_T221100130348%2F01整本文件/投标文件-修改版9-5-1-1.pdf'
- with open(pdf_path[:-4] + '-title.json', 'r', encoding='utf-8') as fp:
- titles = json.load(fp)
- with open(pdf_path[:-4] + '-text.json', 'r', encoding='utf-8') as fp:
- texts = json.load(fp)
- price_zhs = get_instance(
- title_instances=['投标函', '开标一览表'],
- content_instances=['人民币投标总报价'],
- titles_list=titles,
- texts_list=texts,
- extractor=match_price_zhs
- )
- price_num = get_instance(
- title_instances=['投标函', '开标一览表'],
- content_instances=['人民币投标总报价'],
- titles_list=titles,
- texts_list=texts,
- extractor=match_price_num
- )
- duration = get_instance(
- title_instances=['投标函', '开标一览表'],
- content_instances=['工期日历天'],
- titles_list=titles,
- texts_list=texts,
- extractor=match_duration
- )
- quality = get_instance(
- title_instances=['投标函', '开标一览表'],
- content_instances=['工程质量'],
- titles_list=titles,
- texts_list=texts,
- extractor=match_quality
- )
- # valid = rmb_to_digit(price_zhs[0][0][0]) == price_num[0][0][0][1:]
- pprint({
- "price_zhs": price_zhs,
- "price_num": price_num,
- "duration": duration,
- "quality": quality,
- # "valid": valid
- })
|