|
- # -*- coding: utf-8 -*-
- # @Author: privacy
- # @Date: 2024-08-30 11:17:21
- # @Last Modified by: privacy
- # @Last Modified time: 2024-12-04 11:17:17
- """
- 商务部分
- """
- import re
- from typing import List, Optional
- from celery_tasks.LLMAgent import get_proj
- from celery_tasks.project_loc import extract_project
- from celery_tasks.text_extractor import similar_match
- from celery_tasks.extract_financial_report import extract_financial_report
- def get_score(comment: str, standard: str):
- if re.findall(r'A~D|A~D|A、B、C', standard):
- try:
- return re.findall('([A-D])级', comment).pop()
- except Exception:
- return 'B'
- elif re.findall(r'\d+\s?分', standard):
- try:
- return re.findall(r'(\d+)\s?分', comment).pop()
- except Exception:
- return "60"
- def busi_loc(scrutinize_dict: dict, outline_dict: List[dict], title_list: List[dict], table_list: List[dict], image_list: List[dict], supplier: str, project: str = None, file_name: str = None) -> Optional[List[dict]]:
- """
- 投标商务部分定位
- Args:
- scrutinize_dict: 详细评审
- tender_file: 投标文件
- Returns:
- result: 商务部分详审结果
- """
- part = None
- # 从详审大纲中获取商务评分方法
- for key in scrutinize_dict.keys():
- if '商务' in key:
- part = key
- break
- # 没有找到商务评审方法则直接返回
- if not part:
- return None
- result = {
- 'writeName': '',
- 'scoringCriteria': []
- }
- for item in scrutinize_dict[key]:
- if '信用' in item['评分因素']:
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
- 'pages': []
- }]
- })
- elif '业绩' in item['评分因素']:
- # 项目业绩表查询
- proj_list = extract_project(table_list, instances=['合同金额', '合同价格', '发包人名称', '项目规模', '合同时间'])
- # 定位信息格式化
- title_sims = [{'fileName': file_name, 'conformFlag': 1, 'pageKey': '', 'pageStart': str(proj['page_numbers'][-1]), 'pageEnd': str(proj['page_numbers'][-1])} for proj in proj_list]
- # # 打分
- comment = get_proj(input_json=proj_list, standard=item['评分标准'])
- # 结果回传
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': get_score(comment=comment, standard=item['评分标准']),
- 'pages': title_sims
- }]
- })
- elif '财务' in item['评分因素']:
- financial_list = extract_financial_report(
- title_list=title_list,
- table_list=table_list,
- image_list=image_list,
- year=2022
- )
- title_sims = []
- starts = []
- for _ in financial_list:
- for page in _['pages']:
- starts.append(page[0])
- for page in set(starts):
- title_sims.append({
- "fileName": file_name,
- "conformFlag": 1,
- "pageKey": "",
- "pageEnd": page,
- "pageStart": page,
- })
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': 'B',
- 'supplier': '3个关键的财务指标',
- 'pages': title_sims
- }]
- })
- elif '报价' in item['评分因素']:
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': 'B',
- 'pages': []
- }]
- })
- elif '完整性' in item['评分因素']:
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': 'B',
- 'pages': []
- }]
- })
- elif '涉密' in item['评分因素']:
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': get_score(comment="A级(100分)", standard=item['评分标准']),
- 'pages': []
- }]
- })
- else:
- title_sims = similar_match(outline_dict, [item['评分因素']], key='title')
- pages = [{'fileName': file_name, 'conformFlag': 1, 'pageStart': str(sim['page_number']), 'pageEnd': str(sim['page_number']), 'pageKey': '', 'text': sim['title'], 'score': sim['相似度']} for sim in title_sims]
- result['scoringCriteria'].append({
- 'scoringFactors': item['评分因素'],
- 'scoringStandard': item['评分标准'],
- 'percentage': item['权重'],
- 'expertAdvice': '',
- 'writeName': '',
- 'suppliers': [{
- 'expertAdvice': '',
- 'writeName': '',
- 'name': supplier,
- 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
- 'pages': pages,
- }]
- })
- return result
- if __name__ == '__main__':
- import os
- import json
- from glob import glob
- from pprint import pprint
- with open('bidding_dataset.json', 'r', encoding='utf-8') as fp:
- scrutinizes = json.load(fp)
- for project in scrutinizes.keys():
- scrutinize_dict = scrutinizes[project]
- for file in glob(f'./data/0预审查初审详审测试数据/{project}/*/*-outline.json'):
- with open(file, 'r', encoding='utf-8') as fp:
- outline_dict = json.load(fp)
- if outline_dict == []:
- os.remove(file)
- continue
- with open(file.replace('outline.json', 'title.json'), 'r', encoding='utf-8') as fp:
- title_list = json.load(fp)
- with open(file.replace('outline.json', 'table.json'), 'r', encoding='utf-8') as fp:
- table_list = json.load(fp)
- with open(file.replace('outline.json', 'image.json'), 'r', encoding='utf-8') as fp:
- image_list = json.load(fp)
- supplier = file.split('\\')[-2]
- pprint(
- busi_loc(
- scrutinize_dict=scrutinize_dict,
- outline_dict=outline_dict,
- title_list=title_list,
- table_list=table_list,
- image_list=image_list,
- supplier=supplier,
- project=project,
- file_name=file
- )
- )
- exit(0)
|