busi_instance.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. # -*- coding: utf-8 -*-
  2. # @Author: privacy
  3. # @Date: 2024-08-30 11:17:21
  4. # @Last Modified by: privacy
  5. # @Last Modified time: 2024-12-04 11:17:17
  6. """
  7. 商务部分
  8. """
  9. import re
  10. from typing import List, Optional
  11. from celery_tasks.LLMAgent import get_proj
  12. from celery_tasks.project_loc import extract_project
  13. from celery_tasks.text_extractor import similar_match
  14. from celery_tasks.extract_financial_report import extract_financial_report
  15. def get_score(comment: str, standard: str):
  16. if re.findall(r'A~D|A~D|A、B、C', standard):
  17. try:
  18. return re.findall('([A-D])级', comment).pop()
  19. except Exception:
  20. return 'B'
  21. elif re.findall(r'\d+\s?分', standard):
  22. try:
  23. return re.findall(r'(\d+)\s?分', comment).pop()
  24. except Exception:
  25. return "60"
  26. def busi_loc(scrutinize_dict: dict, outline_dict: List[dict], title_list: List[dict], table_list: List[dict], image_list: List[dict], supplier: str, project: str = None, file_name: str = None) -> Optional[List[dict]]:
  27. """
  28. 投标商务部分定位
  29. Args:
  30. scrutinize_dict: 详细评审
  31. tender_file: 投标文件
  32. Returns:
  33. result: 商务部分详审结果
  34. """
  35. part = None
  36. # 从详审大纲中获取商务评分方法
  37. for key in scrutinize_dict.keys():
  38. if '商务' in key:
  39. part = key
  40. break
  41. # 没有找到商务评审方法则直接返回
  42. if not part:
  43. return None
  44. result = {
  45. 'writeName': '',
  46. 'scoringCriteria': []
  47. }
  48. for item in scrutinize_dict[key]:
  49. if '信用' in item['评分因素']:
  50. result['scoringCriteria'].append({
  51. 'scoringFactors': item['评分因素'],
  52. 'scoringStandard': item['评分标准'],
  53. 'percentage': item['权重'],
  54. 'expertAdvice': '',
  55. 'writeName': '',
  56. 'suppliers': [{
  57. 'expertAdvice': '',
  58. 'writeName': '',
  59. 'name': supplier,
  60. 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
  61. 'pages': []
  62. }]
  63. })
  64. elif '业绩' in item['评分因素']:
  65. # 项目业绩表查询
  66. proj_list = extract_project(table_list, instances=['合同金额', '合同价格', '发包人名称', '项目规模', '合同时间'])
  67. # 定位信息格式化
  68. title_sims = [{'fileName': file_name, 'conformFlag': 1, 'pageKey': '', 'pageStart': str(proj['page_numbers'][-1]), 'pageEnd': str(proj['page_numbers'][-1])} for proj in proj_list]
  69. # # 打分
  70. comment = get_proj(input_json=proj_list, standard=item['评分标准'])
  71. # 结果回传
  72. result['scoringCriteria'].append({
  73. 'scoringFactors': item['评分因素'],
  74. 'scoringStandard': item['评分标准'],
  75. 'percentage': item['权重'],
  76. 'expertAdvice': '',
  77. 'writeName': '',
  78. 'suppliers': [{
  79. 'expertAdvice': '',
  80. 'writeName': '',
  81. 'name': supplier,
  82. 'grade': get_score(comment=comment, standard=item['评分标准']),
  83. 'pages': title_sims
  84. }]
  85. })
  86. elif '财务' in item['评分因素']:
  87. financial_list = extract_financial_report(
  88. title_list=title_list,
  89. table_list=table_list,
  90. image_list=image_list,
  91. year=2022
  92. )
  93. title_sims = []
  94. starts = []
  95. for _ in financial_list:
  96. for page in _['pages']:
  97. starts.append(page[0])
  98. for page in set(starts):
  99. title_sims.append({
  100. "fileName": file_name,
  101. "conformFlag": 1,
  102. "pageKey": "",
  103. "pageEnd": page,
  104. "pageStart": page,
  105. })
  106. result['scoringCriteria'].append({
  107. 'scoringFactors': item['评分因素'],
  108. 'scoringStandard': item['评分标准'],
  109. 'percentage': item['权重'],
  110. 'expertAdvice': '',
  111. 'writeName': '',
  112. 'suppliers': [{
  113. 'expertAdvice': '',
  114. 'writeName': '',
  115. 'name': supplier,
  116. 'grade': 'B',
  117. 'supplier': '3个关键的财务指标',
  118. 'pages': title_sims
  119. }]
  120. })
  121. elif '报价' in item['评分因素']:
  122. result['scoringCriteria'].append({
  123. 'scoringFactors': item['评分因素'],
  124. 'scoringStandard': item['评分标准'],
  125. 'percentage': item['权重'],
  126. 'expertAdvice': '',
  127. 'writeName': '',
  128. 'suppliers': [{
  129. 'expertAdvice': '',
  130. 'writeName': '',
  131. 'name': supplier,
  132. 'grade': 'B',
  133. 'pages': []
  134. }]
  135. })
  136. elif '完整性' in item['评分因素']:
  137. result['scoringCriteria'].append({
  138. 'scoringFactors': item['评分因素'],
  139. 'scoringStandard': item['评分标准'],
  140. 'percentage': item['权重'],
  141. 'expertAdvice': '',
  142. 'writeName': '',
  143. 'suppliers': [{
  144. 'expertAdvice': '',
  145. 'writeName': '',
  146. 'name': supplier,
  147. 'grade': 'B',
  148. 'pages': []
  149. }]
  150. })
  151. elif '涉密' in item['评分因素']:
  152. result['scoringCriteria'].append({
  153. 'scoringFactors': item['评分因素'],
  154. 'scoringStandard': item['评分标准'],
  155. 'percentage': item['权重'],
  156. 'expertAdvice': '',
  157. 'writeName': '',
  158. 'suppliers': [{
  159. 'expertAdvice': '',
  160. 'writeName': '',
  161. 'name': supplier,
  162. 'grade': get_score(comment="A级(100分)", standard=item['评分标准']),
  163. 'pages': []
  164. }]
  165. })
  166. else:
  167. title_sims = similar_match(outline_dict, [item['评分因素']], key='title')
  168. pages = [{'fileName': file_name, 'conformFlag': 1, 'pageStart': str(sim['page_number']), 'pageEnd': str(sim['page_number']), 'pageKey': '', 'text': sim['title'], 'score': sim['相似度']} for sim in title_sims]
  169. result['scoringCriteria'].append({
  170. 'scoringFactors': item['评分因素'],
  171. 'scoringStandard': item['评分标准'],
  172. 'percentage': item['权重'],
  173. 'expertAdvice': '',
  174. 'writeName': '',
  175. 'suppliers': [{
  176. 'expertAdvice': '',
  177. 'writeName': '',
  178. 'name': supplier,
  179. 'grade': get_score(comment="B级(70分)", standard=item['评分标准']),
  180. 'pages': pages,
  181. }]
  182. })
  183. return result
  184. if __name__ == '__main__':
  185. import os
  186. import json
  187. from glob import glob
  188. from pprint import pprint
  189. with open('bidding_dataset.json', 'r', encoding='utf-8') as fp:
  190. scrutinizes = json.load(fp)
  191. for project in scrutinizes.keys():
  192. scrutinize_dict = scrutinizes[project]
  193. for file in glob(f'./data/0预审查初审详审测试数据/{project}/*/*-outline.json'):
  194. with open(file, 'r', encoding='utf-8') as fp:
  195. outline_dict = json.load(fp)
  196. if outline_dict == []:
  197. os.remove(file)
  198. continue
  199. with open(file.replace('outline.json', 'title.json'), 'r', encoding='utf-8') as fp:
  200. title_list = json.load(fp)
  201. with open(file.replace('outline.json', 'table.json'), 'r', encoding='utf-8') as fp:
  202. table_list = json.load(fp)
  203. with open(file.replace('outline.json', 'image.json'), 'r', encoding='utf-8') as fp:
  204. image_list = json.load(fp)
  205. supplier = file.split('\\')[-2]
  206. pprint(
  207. busi_loc(
  208. scrutinize_dict=scrutinize_dict,
  209. outline_dict=outline_dict,
  210. title_list=title_list,
  211. table_list=table_list,
  212. image_list=image_list,
  213. supplier=supplier,
  214. project=project,
  215. file_name=file
  216. )
  217. )
  218. exit(0)