|
@@ -0,0 +1,209 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+# @Author: privacy
|
|
|
+# @Date: 2024-08-30 11:17:21
|
|
|
+# @Last Modified by: privacy
|
|
|
+# @Last Modified time: 2024-09-26 14:32:39
|
|
|
+
|
|
|
+"""
|
|
|
+商务部分
|
|
|
+"""
|
|
|
+import re
|
|
|
+from typing import List, Optional
|
|
|
+
|
|
|
+from celery_tasks.LLMAgent import get_proj
|
|
|
+from celery_tasks.project_loc import extract_project
|
|
|
+from celery_tasks.text_extractor import similar_match
|
|
|
+from celery_tasks.extract_financial_report import extract_financial_report
|
|
|
+
|
|
|
+
|
|
|
+def get_score(comment: str, standard: str):
|
|
|
+ if re.findall(r'A~D|A~D|A、B、C', standard):
|
|
|
+ try:
|
|
|
+ return re.findall('([A-D])级', comment).pop()
|
|
|
+ except Exception:
|
|
|
+ return 'B'
|
|
|
+ elif re.findall(r'\d+\s?分', standard):
|
|
|
+ try:
|
|
|
+ return re.findall(r'(\d+)\s?分', comment).pop()
|
|
|
+ except Exception:
|
|
|
+ return "60"
|
|
|
+
|
|
|
+
|
|
|
+def busi_loc(scrutinize_dict: dict, outline_dict: List[dict], title_list: List[dict], table_list: List[dict], image_list: List[dict], supplier: str, project: str = None, file_name: str = None) -> Optional[List[dict]]:
|
|
|
+ """
|
|
|
+ 投标商务部分定位
|
|
|
+
|
|
|
+ Args:
|
|
|
+ scrutinize_dict: 详细评审
|
|
|
+ tender_file: 投标文件
|
|
|
+ Returns:
|
|
|
+ result: 商务部分详审结果
|
|
|
+ """
|
|
|
+ part = None
|
|
|
+ # 从详审大纲中获取商务评分方法
|
|
|
+ for key in scrutinize_dict.keys():
|
|
|
+ if '商务' in key:
|
|
|
+ part = key
|
|
|
+ break
|
|
|
+
|
|
|
+ # 没有找到商务评审方法则直接返回
|
|
|
+ if not part:
|
|
|
+ return None
|
|
|
+
|
|
|
+ result = {
|
|
|
+ 'scoringCriteria': []
|
|
|
+ }
|
|
|
+
|
|
|
+ for item in scrutinize_dict[key]:
|
|
|
+ if '信用' in item['评分因素']:
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
|
|
|
+ 'pages': []
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ elif '业绩' in item['评分因素']:
|
|
|
+ # 项目业绩表查询
|
|
|
+ proj_list = extract_project(table_list, instances=['合同金额', '合同价格', '发包人名称', '项目规模', '合同时间'])
|
|
|
+ # 定位信息格式化
|
|
|
+ title_sims = [{'fileName': file_name, 'pageKey': '', 'pageStart': str(proj['page_numbers'][-1]), 'pageEnd': str(proj['page_numbers'][-1])} for proj in proj_list]
|
|
|
+ # # 打分
|
|
|
+ comment = get_proj(input_json=proj_list, standard=item['评分标准'])
|
|
|
+ # 结果回传
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': get_score(comment=comment, standard=item['评分标准']),
|
|
|
+ 'pages': title_sims
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ elif '财务' in item['评分因素']:
|
|
|
+ financial_list = extract_financial_report(
|
|
|
+ title_list=title_list,
|
|
|
+ table_list=table_list,
|
|
|
+ image_list=image_list,
|
|
|
+ year=2022
|
|
|
+ )
|
|
|
+
|
|
|
+ title_sims = []
|
|
|
+ starts = []
|
|
|
+
|
|
|
+ for _ in financial_list:
|
|
|
+ for page in _['pages']:
|
|
|
+ starts.append(page[0])
|
|
|
+
|
|
|
+ for page in set(starts):
|
|
|
+ title_sims.append({
|
|
|
+ "fileName": file_name,
|
|
|
+ "pageKey": "",
|
|
|
+ "pageEnd": page,
|
|
|
+ "pageStart": page,
|
|
|
+ })
|
|
|
+
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': 'B',
|
|
|
+ 'supplier': '3个关键的财务指标',
|
|
|
+ 'pages': title_sims
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ elif '报价' in item['评分因素']:
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': 'B',
|
|
|
+ 'pages': []
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ elif '完整性' in item['评分因素']:
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': 'B',
|
|
|
+ 'pages': []
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ elif '涉密' in item['评分因素']:
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': get_score(comment="A级(100分)", standard=item['评分标准']),
|
|
|
+ 'pages': []
|
|
|
+ }]
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ title_sims = similar_match(outline_dict, [item['评分因素']], key='title')
|
|
|
+
|
|
|
+ pages = [{'fileName': file_name, 'pageStart': str(sim['page_number']), 'pageEnd': str(sim['page_number']), 'pageKey': '', 'text': sim['title'], 'score': sim['相似度']} for sim in title_sims]
|
|
|
+
|
|
|
+ result['scoringCriteria'].append({
|
|
|
+ 'scoringFactors': item['评分因素'],
|
|
|
+ 'scoringStandard': item['评分标准'],
|
|
|
+ 'percentage': item['权重'],
|
|
|
+ 'suppliers': [{
|
|
|
+ 'name': supplier,
|
|
|
+ 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
|
|
|
+ 'pages': pages,
|
|
|
+ }]
|
|
|
+ })
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ import os
|
|
|
+ import json
|
|
|
+ from glob import glob
|
|
|
+ from pprint import pprint
|
|
|
+ with open('bidding_dataset.json', 'r', encoding='utf-8') as fp:
|
|
|
+ scrutinizes = json.load(fp)
|
|
|
+ for project in scrutinizes.keys():
|
|
|
+ scrutinize_dict = scrutinizes[project]
|
|
|
+ for file in glob(f'./data/0预审查初审详审测试数据/{project}/*/*-outline.json'):
|
|
|
+ with open(file, 'r', encoding='utf-8') as fp:
|
|
|
+ outline_dict = json.load(fp)
|
|
|
+ if outline_dict == []:
|
|
|
+ os.remove(file)
|
|
|
+ continue
|
|
|
+ with open(file.replace('outline.json', 'title.json'), 'r', encoding='utf-8') as fp:
|
|
|
+ title_list = json.load(fp)
|
|
|
+ with open(file.replace('outline.json', 'table.json'), 'r', encoding='utf-8') as fp:
|
|
|
+ table_list = json.load(fp)
|
|
|
+ with open(file.replace('outline.json', 'image.json'), 'r', encoding='utf-8') as fp:
|
|
|
+ image_list = json.load(fp)
|
|
|
+
|
|
|
+ supplier = file.split('\\')[-2]
|
|
|
+
|
|
|
+ pprint(
|
|
|
+ busi_loc(
|
|
|
+ scrutinize_dict=scrutinize_dict,
|
|
|
+ outline_dict=outline_dict,
|
|
|
+ title_list=title_list,
|
|
|
+ table_list=table_list,
|
|
|
+ image_list=image_list,
|
|
|
+ supplier=supplier,
|
|
|
+ project=project,
|
|
|
+ file_name=file
|
|
|
+ )
|
|
|
+ )
|
|
|
+ exit(0)
|