# -*- coding: utf-8 -*- # @Author: privacy # @Date: 2024-06-11 13:43:14 # @Last Modified by: privacy # @Last Modified time: 2024-09-05 16:29:06 # 技术部分定位 from typing import List from celery_tasks.text_extractor import similar_match def tech_loc(scrutinize_dict: dict, outline_dict: List[dict], supplier: str): """ 投标技术部分定位 Args: scrutinize_dict: 详细评审 outline_dict: 投标文件大纲 """ targets = [] part = None for key in scrutinize_dict.keys(): if '技术' in key: part = key break if not part: return None for ins in scrutinize_dict[key]: inst = ins['评分因素'] text = ins['评分标准'] weig = ins['权重'] t = [] for i in text.split('。'): for j in i.split(';'): for k in j.split(','): t.append(k) targets.append({ 'title': inst, 'text': t, 'weight': weig }) result = { 'scoringCriteria': [], '标准': [] } for i in targets: try: title_sims = similar_match(outline_dict, [i['title']], key='title') except Exception as e: print(supplier) raise(e) result['scoringCriteria'].append({ 'scoringFactors': i['title'], 'scoringStandard': i['text'], 'percentage': i['weight'], 'suppliers': [{ 'name': supplier, 'grade': 'B', 'supplier': '总体服务方案的概括文字(200字以内)', 'pages': title_sims }] }) # text_sims = similar_match(outline_dict, i['text'], key='title') # for j in range(len(text_sims)): # result['标准'].append({ # '评分标准': i['text'][j], # '定位结果': text_sims[j] # }) return result if __name__ == '__main__': import json from glob import glob from pprint import pprint with open('bidding_dataset.json', 'r', encoding='utf-8') as fp: scrutinizes = json.load(fp) for project in scrutinizes.keys(): scrutinize_dict = scrutinizes[project] for file in glob(f'./data/0预审查初审详审测试数据/{project}/*/*-outline.json'): with open(file, 'r', encoding='utf-8') as fp: outline_dict = json.load(fp) supplier = file.split('\\')[-2] pprint( tech_loc(scrutinize_dict=scrutinize_dict, outline_dict=outline_dict, supplier=supplier) )