document_.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. '''
  2. 招投标文件预审查
  3. 1. 解析Bidding_document_extract中all_tables.json结果
  4. '''
  5. from tools import BaseMethods
  6. from pprint import pprint
  7. import re
  8. import logging
  9. import requests
  10. # from bidding_document_extract.get_Bidding_info import PdfExtractAttr_
  11. # from bidding_document_extract.get_bidding_info import PdfExtractAttr
  12. chinese_num_map = {
  13. '零': 0,
  14. '一': 1,
  15. '二': 2,
  16. '三': 3,
  17. '四': 4,
  18. '五': 5,
  19. '六': 6,
  20. '七': 7,
  21. '八': 8,
  22. '九': 9,
  23. '十': 10
  24. }
  25. def create_logger(log_path):
  26. """
  27. 将日志输出到日志文件和控制台
  28. """
  29. logger = logging.getLogger()
  30. logger.setLevel(logging.INFO)
  31. formatter = logging.Formatter(
  32. '%(asctime)s - %(levelname)s - %(message)s')
  33. # 创建一个handler,用于写入日志文件
  34. file_handler = logging.FileHandler(
  35. filename=log_path, mode='w')
  36. file_handler.setFormatter(formatter)
  37. file_handler.setLevel(logging.INFO)
  38. logger.addHandler(file_handler)
  39. # 创建一个handler,用于将日志输出到控制台
  40. console = logging.StreamHandler()
  41. console.setLevel(logging.DEBUG)
  42. console.setFormatter(formatter)
  43. logger.addHandler(console)
  44. return logger
  45. log_path = "code/logs/logs.log"
  46. logger = create_logger(log_path=log_path)
  47. class DocumentPreReview():
  48. def __init__(self) -> None:
  49. self.bm = BaseMethods()
  50. # self.agent_ = PdfExtractAttr_(file_path=self.file_path)
  51. # self.agent = PdfExtractAttr(file_path=self.file_path)
  52. self.Bidding_tables = self.get_Bidding_table()
  53. self.contexts = self.get_Bidding_contexts()
  54. self.announcement = self.get_announcement()
  55. self.Bidding_context = self.get_Bidding_json()
  56. self.tender_context = self.get_tender_context()
  57. self.chinese_num_map = chinese_num_map
  58. def get_Bidding_contexts(self, file_path:str = 'data/预审查数据/contexts.json'):
  59. ''' get contexts by page
  60. '''
  61. contexts = self.bm.json_read(file_path)
  62. return contexts
  63. def get_Bidding_table(self):
  64. ''' get table data
  65. '''
  66. file_path = "data/预审查数据/三峡左岸及电源电站中央空调系统管网及末端改造(发布稿)-table.json"
  67. # file_path = "data/预审查数据/2023年档案管理系统功能优化项目采购程序文件-table.json"
  68. all_tables = self.bm.json_read(file_path)
  69. return all_tables
  70. def get_Bidding_json(self):
  71. ''' read json to get context
  72. '''
  73. file_path = "data/预审查数据/三峡左岸及电源电站中央空调系统管网及末端改造(发布稿)-table.json"
  74. Bidding_context = self.bm.json_read(file_path)
  75. return Bidding_context
  76. def get_tender_context(self):
  77. ''' read the tender context '''
  78. file_path = "data/预审查数据/南方电网数字研究院有限公司_bidding_content.json"
  79. tender_context = self.bm.json_read(file_path)
  80. return tender_context
  81. def _scrutinize_judge(self, tag:str, threshold_value:int=3):
  82. ''' Clause number content judgment
  83. 商务 技术 报价 评审 评分 标准
  84. '''
  85. scrutinize_tuple = ("商务","技术","报价","评审","评分","标准")
  86. hit_num = 0
  87. for scru in scrutinize_tuple:
  88. if scru in tag: hit_num+= 1
  89. if hit_num>=threshold_value: return True
  90. else: return False
  91. def check_table(self, all_tables):
  92. ''' check the form to assess quailty'''
  93. # all_tables = self.Bidding_tables
  94. tables_list = []
  95. previous_page_number = 0
  96. criteria_sign = False
  97. for partial_form in all_tables:
  98. record_num = 1
  99. table_name = partial_form['table_name']
  100. page_number = partial_form['page_numbers']
  101. title_len = partial_form['title_len']
  102. col_len = partial_form['col_len']
  103. tables = partial_form["table"]
  104. form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[],
  105. 'col_len':col_len, 'title_len':title_len}
  106. # if '须知' in table_name and '前附表' in table_name:
  107. # regulation_number_index,clause_name_index,clause_content_index = '','',''
  108. # for table_index, table in enumerate(tables):
  109. # if '条款号' in table and '编列内容' in table:
  110. # regulation_number_index = table.index("条款号")
  111. # clause_name_index = table.index("条款名称")
  112. # clause_content_index = table.index("编列内容")
  113. # form_['table'].append(table)
  114. # continue
  115. # elif not table[clause_name_index]:
  116. # form_['table'][table_index-1][clause_content_index] += table[clause_content_index]
  117. # else: form_['table'].append(table)
  118. # tables_list.append(form_)
  119. if '办法' in table_name and '前附表' in table_name:
  120. previous_page_number = page_number[0]
  121. regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0
  122. regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0
  123. for table_index, table in enumerate(tables):
  124. if '评审因素' in table and '评审标准' in table:
  125. regulation_number_index = table.index("条款号")
  126. evaluation_factor_index = table.index("评审因素")
  127. evaluation_criteria_index = table.index("评审标准")
  128. form_['table'].append(table)
  129. continue
  130. elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
  131. form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
  132. else:
  133. if table not in form_['table']: form_['table'].append(table)
  134. if '评分因素' in table and '评分标准' in table:
  135. regulation_number_index_ = table.index("条款号")
  136. score_factor_index = table.index("评分因素")
  137. score_criteria_index = table.index("评分标准")
  138. weights_index = table.index("权重")
  139. form_['table'].append(table)
  140. criteria_sign = True
  141. continue
  142. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  143. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  144. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  145. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  146. record_num += 1
  147. else:
  148. if table not in form_['table']: form_['table'].append(table)
  149. tables_list.append(form_)
  150. elif previous_page_number and page_number[-1]<previous_page_number+3:
  151. for table_index, table in enumerate(tables):
  152. if '评分因素' in table and '评分标准' in table:
  153. regulation_number_index_ = table.index("条款号")
  154. score_factor_index = table.index("评分因素")
  155. score_criteria_index = table.index("评分标准")
  156. weights_index = table.index("权重")
  157. form_['table'].append(table)
  158. criteria_sign = True
  159. continue
  160. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  161. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  162. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  163. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  164. record_num += 1
  165. else: form_['table'].append(table)
  166. tables_list.append(form_)
  167. else:
  168. tables_list.append(partial_form)
  169. return tables_list
  170. def get_table(self):
  171. ''' parse the Bidding_tables.json file to get the table data from it.
  172. '''
  173. all_tables = self.check_table(self.Bidding_tables)
  174. # all_tables = self.Bidding_tables
  175. # 招标文件内容中预审查
  176. tag_sign = ''
  177. tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
  178. tag_dict = dict([(tag,[]) for tag in tag_list])
  179. # 招标文件内容中清标表格数据
  180. # scrutinize_tuple = ("商务部分评分标准","技术部分评审标准","技术部分评分标准","投标报价评审标准","报价部分评审标准","报价评分标准","报价部分评分标准")
  181. scrutinize_dict = {}
  182. scrutinize_page = 0
  183. scrutinize_index = -1
  184. scrutinize_Initial_title_len = 0 # 详审位置标记
  185. scrutinize_sign = False
  186. record_page = 0
  187. bidder_know = {} # 投标人须知前附表
  188. for partial_form in all_tables:
  189. table_name = partial_form['table_name']
  190. page_number = partial_form['page_numbers']
  191. title_len = partial_form['title_len']
  192. tables = partial_form["table"]
  193. if '投标人须知前附表' == table_name:
  194. record_page = page_number[0]
  195. if page_number[0] < record_page + 3:
  196. for table in tables[1:]:
  197. if '条' in table: continue # 存在BUG
  198. try:
  199. if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
  200. if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
  201. except:
  202. logger.error('该文件中的投标人须知前附表部分表格没有边框,只有中间部分表格存在边框,提取代码认为只有边框存在才被判定为表格内容')
  203. form_sign = re.findall('评\w+法前附表',table_name)
  204. if form_sign:
  205. table_page_num = page_number[-1]
  206. inital_data = tables[0]
  207. # confirm data location
  208. regulation_number_index = inital_data.index("条款号")
  209. evaluation_factor_index = inital_data.index("评审因素")
  210. evaluation_criteria_index = inital_data.index("评审标准")
  211. for table in tables[1:]:
  212. tag = table[regulation_number_index+1]
  213. if tag: tag = tag.strip().replace("\n","")
  214. if tag:
  215. tag_sign = tag
  216. evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
  217. if tag_sign in tag_dict:
  218. tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
  219. "评审标准":evaluation_criteria.strip().replace("\n","")})
  220. if '评分因素' in table or '评分标准' in table:
  221. scrutinize_page = table_page_num
  222. scrutinize_Initial_title_len = title_len
  223. if not scrutinize_page: scrutinize_page = table_page_num+1
  224. ''' scrutinize '''
  225. if (scrutinize_page == page_number[0] and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
  226. regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
  227. scrutinize_sign = True
  228. if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
  229. for table in tables:
  230. if '评分因素' in table and '评分标准' in table:
  231. regulation_number_index_ = table.index("条款号")
  232. evaluation_factor_index = table.index("评分因素")
  233. evaluation_criteria_index = table.index("评分标准")
  234. weights_index = table.index("权重")
  235. tag_sign_ = ''
  236. scrutinize_index = tables.index(table)
  237. break
  238. elif '评分因素' in table and '评分标准' not in table:
  239. scrutinize_index = tables.index(table)
  240. table_split = table[-1].replace(' ','').split()
  241. if '评分标准' in table_split and '权重' in table_split:
  242. table = table[:-1]
  243. table.extend(table_split)
  244. regulation_number_index_ = table.index("条款号")
  245. evaluation_factor_index = table.index("评分因素")
  246. evaluation_criteria_index = table.index("评分标准")
  247. weights_index = table.index("权重")
  248. tag_sign_ = ''
  249. break
  250. if scrutinize_index != -1:
  251. for table in tables[scrutinize_index+1:]:
  252. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  253. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  254. else: tag = table[regulation_number_index_]
  255. if tag:
  256. tag = tag.strip().replace("\n","")
  257. tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
  258. if tag and self._scrutinize_judge(tag):
  259. tag_sign_ = tag
  260. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  261. try:
  262. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  263. except:
  264. print()
  265. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
  266. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  267. "评分标准":evaluation_criteria.strip().replace("\n",""),
  268. "权重":weights.strip().replace("\n","")}
  269. scrutinize_dict[tag_sign_].append(value)
  270. if table[regulation_number_index_]:
  271. if table[regulation_number_index_][0] == '3':
  272. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  273. scrutinize_Initial_title_len = 0
  274. break
  275. elif scrutinize_page+1 == page_number[0] and scrutinize_sign:
  276. difference_value = scrutinize_Initial_title_len - title_len
  277. if difference_value:
  278. table_length = len(table)
  279. evaluation_factor_index -= difference_value
  280. evaluation_criteria_index -= difference_value
  281. weights_index -= difference_value
  282. if weights_index >= table_length:
  283. evaluation_factor_index = table_length-3
  284. evaluation_criteria_index = table_length-2
  285. weights_index = table_length-1
  286. for table in tables:
  287. if not table[2]:
  288. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
  289. continue
  290. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  291. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  292. else: tag = table[regulation_number_index_]
  293. if tag:
  294. tag = tag.strip().replace("\n","")
  295. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  296. if tag and self._scrutinize_judge(tag):
  297. tag_sign_ = tag
  298. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  299. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  300. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  301. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  302. "评分标准":evaluation_criteria.strip().replace("\n",""),
  303. "权重":weights.strip().replace("\n","")}
  304. scrutinize_dict[tag_sign_].append(value)
  305. if table[regulation_number_index_]:
  306. if table[regulation_number_index_][0] == '3':
  307. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  308. scrutinize_Initial_title_len = 0
  309. break
  310. elif scrutinize_page+2 == page_number[0] and scrutinize_sign:
  311. difference_value = scrutinize_Initial_title_len - title_len
  312. if scrutinize_Initial_title_len:
  313. evaluation_factor_index -= difference_value
  314. evaluation_criteria_index -= difference_value
  315. weights_index -= difference_value
  316. for table in tables:
  317. if not table[2]:
  318. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
  319. continue
  320. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  321. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  322. else: tag = table[regulation_number_index_]
  323. if tag:
  324. tag = tag.strip().replace("\n","")
  325. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  326. if tag and self._scrutinize_judge(tag):
  327. tag_sign_ = tag
  328. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  329. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  330. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  331. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  332. "评分标准":evaluation_criteria.strip().replace("\n",""),
  333. "权重":weights.strip().replace("\n","")}
  334. scrutinize_dict[tag_sign_].append(value)
  335. if table[regulation_number_index_]:
  336. if table[regulation_number_index_][0] == '3':
  337. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  338. scrutinize_Initial_title_len = 0
  339. break
  340. # pprint(tag_dict)
  341. pprint(scrutinize_dict)
  342. # pprint(bidder_know)
  343. return tag_dict,bidder_know,scrutinize_dict
  344. def get_announcement(self)->str:
  345. ''' bidder announcement
  346. '''
  347. announcements = ''
  348. announcement_contexts = self.contexts[2:8]
  349. for index, announcement in enumerate(announcement_contexts):
  350. finder = re.findall("^第一章",announcement['text'])
  351. if finder:
  352. for text in announcement_contexts[index:]:
  353. if re.findall("^第二章", text["text"]): break
  354. announcements += text["text"]
  355. break
  356. return announcements
  357. def contexts_extract(self, evaluation_criteria:str):
  358. ''' 招标文件正文抓取
  359. '''
  360. comp1 = re.compile("(第.*?章)")
  361. comp2 = re.compile("“(.*?)”")
  362. title = comp1.findall(evaluation_criteria)[0]+comp2.findall(evaluation_criteria)[0]
  363. comp3 = re.compile("第(.*?)章")
  364. title_list = []
  365. format_index,sta_page = -1,-1
  366. sign = True
  367. title_next = ''
  368. for context in self.Bidding_context: # 取招标文件内容
  369. text = context['text'].strip().replace(" ","")
  370. if text == '目录':
  371. sta_page = context['page_number']
  372. if sta_page != -1 and context['page_number'] < 4:
  373. finder = comp3.findall(context['text'])
  374. if finder and sign:
  375. if title_list:
  376. chinese_num = self.chinese_num_map.get(comp3.findall(title_list[-1])[0],None)
  377. if chinese_num > self.chinese_num_map.get(finder[0],0):
  378. sign = False
  379. else:
  380. title_list.append(context['text'].split(' ')[0])
  381. else:
  382. title_list.append(context['text'].split(' ')[0])
  383. if text == title and format_index == -1:
  384. format_index = self.Bidding_context.index(context)
  385. break
  386. title_index = title_list.index(title)
  387. if title_index != len(title_list)-1:
  388. title_next = title_list[title_index+1]
  389. file_format = {title:{}}
  390. for context in self.Bidding_context[format_index+1:]:
  391. text = context['text'].strip().replace(" ","").replace("\n","——>")
  392. if title_next and title_next == text:
  393. break
  394. if context['page_number'] not in file_format[title]:
  395. file_format[title][context['page_number']] = []
  396. file_format[title][context['page_number']].append(context['text'])
  397. return file_format
  398. def formal_criteria(self, review_criteria_list:list):
  399. ''' Analysis of formal review criteria
  400. 形式评审标准
  401. [{'评审因素': '投标人名称', '评审标准': '与营业执照书一致'},
  402. {'评审因素': '投标文件封面、投标函签字盖章',
  403. '评审标准': '投标文件封面、投标函须有法定代表人(或其委托代理人)签字(或签章)并加盖单位章,由委托代理人签字的须具有有效的授权委托书'},
  404. {'评审因素': '投标文件格式', '评审标准': '符合第八章“投标文件格式”的要求'},
  405. {'评审因素': '联合体投标人(如有)', '评审标准': '不适用'},
  406. {'评审因素': '报价唯一', '评审标准': '只能有一个有效报价'}]
  407. '''
  408. formal_result = {}
  409. for review_criteria in review_criteria_list:
  410. evaluation_factor = review_criteria['评审因素']
  411. evaluation_criteria = review_criteria['评审标准']
  412. if '投标人名称' in evaluation_factor or '供应商名称' in evaluation_factor:
  413. ['营业执照','资质证书']
  414. '''
  415. 要求投标文件中 投标公司 与 其提供的营业执照或资质证书中的名称相同
  416. '''
  417. pass
  418. elif '报价函签字盖章' in evaluation_factor or '投标文件封面、投标函签字盖章' in evaluation_factor:
  419. '''
  420. 要求投标文件中 投标公司的 法人或委托人签字或是 存在单位盖章
  421. '''
  422. pass
  423. elif '投标文件格式' in evaluation_factor:
  424. file_format = self.contexts_extract(evaluation_criteria)
  425. pprint(file_format)
  426. '''
  427. 招标文件 file_format 与投标文件内容对比,投标文件中只要存在file_format内容即可
  428. '''
  429. chinese_map_list = list(self.chinese_num_map)
  430. catelogue_list = []
  431. tender_start = 0
  432. catelogue_value = ''
  433. add_index = 0
  434. hit_nums = 0
  435. numbers = 0
  436. for format_values in file_format.values():
  437. for format in format_values.values():
  438. numbers += 1
  439. catelogue_update_sign = False
  440. first_value = format[0].replace(" ","").replace("\n","")
  441. if '目录' == first_value:
  442. for i in format[1:]:
  443. for j in chinese_map_list:
  444. if j in i and i not in catelogue_list:
  445. catelogue_list.append(i)
  446. if catelogue_list and not tender_start:
  447. catelogue = catelogue_list[0]
  448. comp1 = re.compile(f'^{catelogue}')
  449. for tender_context in self.tender_context:
  450. context = tender_context['text']
  451. finder = comp1.findall(context)
  452. if finder:
  453. tender_start = self.tender_context.index(tender_context)
  454. break
  455. if first_value in catelogue_list:
  456. catelogue_update_sign = True
  457. catelogue_value = first_value
  458. catelogue_index = catelogue_list.index(catelogue_value)
  459. if catelogue_list[-1] != catelogue_value:
  460. catelogue_value_next = catelogue_list[catelogue_index+1]
  461. else:
  462. catelogue_value_next = catelogue_value
  463. if catelogue_value:
  464. hit_num = 0
  465. if catelogue_update_sign:
  466. tender_start += add_index
  467. add_index = 0
  468. for tender_index, tender_contents in enumerate(self.tender_context[tender_start:]):
  469. tender_context = tender_contents['text'].split("\n")
  470. if tender_context[0] == catelogue_value_next:
  471. add_index = tender_index
  472. break
  473. for value in format:
  474. if value in tender_context:
  475. hit_num += 1
  476. hit_nums += hit_num
  477. hit_rate = round(hit_nums/numbers,4)
  478. if hit_rate>0.70:
  479. formal_result[evaluation_factor] = (True, evaluation_criteria)
  480. else:
  481. formal_result[evaluation_factor] = (False, evaluation_criteria)
  482. elif '联合体投标人' in evaluation_factor:
  483. if '不适用' in evaluation_criteria: continue
  484. elif '报价唯一' in evaluation_factor:
  485. '''
  486. 需要在投标文件中比对三个位置的报价总和值抽取
  487. '''
  488. pass
  489. def qualification_criteria(self, review_criteria_list:list, bidder_know:dict):
  490. ''' Qualification assessment criteria
  491. 资格评审标准
  492. '''
  493. for review_criteria in review_criteria_list:
  494. evaluation_factor = review_criteria['评审因素']
  495. evaluation_criteria = review_criteria['评审标准']
  496. if '营业执照' in evaluation_factor:
  497. '''
  498. 在投标文件中 对营业执照识别营业期限;长期识别认为可以;只有开始时间没有结束时间给提示。
  499. '''
  500. pass
  501. elif '资质' in evaluation_factor:
  502. comp1 = re.compile('(第.*?章)')
  503. comp2 = re.compile('“(.*?)”')
  504. comp3 = re.compile('第([\d+\.]+)项规定')
  505. finder1 = comp1.findall(evaluation_criteria)[0]
  506. finder2 = comp2.findall(evaluation_criteria)[0]
  507. finder3 = comp3.findall(evaluation_criteria)[0]
  508. chapter_name = finder1+finder2
  509. stipulation = finder3
  510. if '投标人须知' in chapter_name:
  511. bidder_data = bidder_know.get(stipulation,None)
  512. if not bidder_data: continue ## 需要修改
  513. clause_name = bidder_data[0]['条款名称'].replace("\n","")
  514. list_content = bidder_data[0]['编列内容']
  515. if '招标公告' in list_content:
  516. cert_index = self.announcement.index('资质') ## 默认 资质条件 不变
  517. cert_required = re.findall(":(.*?)\\n",self.announcement[cert_index:cert_index+500])[0]
  518. print(cert_required)
  519. # 具备法人资格
  520. '''
  521. big model
  522. 需要设计prompt,可将内容及情况在线上glm4中使用,测出合适prompt
  523. '''
  524. def responsive_criteria(self, review_criteria_list:list, bidder_know:dict):
  525. ''' Responsive review criteria
  526. 响应性评审
  527. '''
  528. for review_criteria in review_criteria_list:
  529. evaluation_factor = review_criteria['评审因素']
  530. evaluation_criteria = review_criteria['评审标准']
  531. if evaluation_factor == '权利义务' or '合同' in evaluation_criteria:
  532. '''不对合同进行处理'''
  533. continue
  534. def content_parsing(self):
  535. ''' data analysis aggregate function
  536. '''
  537. tag_dict,bidder_know,scrutinize_dict = dpr.get_table()
  538. # {}
  539. # self.formal_criteria(tag_dict['形式评审标准'])
  540. self.qualification_criteria(tag_dict['资格评审标准'], bidder_know)
  541. from fastapi import FastAPI
  542. import uvicorn
  543. app = FastAPI()
  544. @app.post('get_pre_review')
  545. def get_pre_review():
  546. result = {
  547. "":""
  548. }
  549. return result
  550. if __name__ == '__main__':
  551. dpr = DocumentPreReview()
  552. # dpr.check_table(dpr.Bidding_tables)
  553. dpr.get_table()
  554. # dpr.content_parsing()
  555. # formal_review_criteria = [
  556. # {'评审因素': '投标文件格式', '评审标准': '符合第八章“投标文件格式”的要求'}
  557. # # {'评审因素': '投标文件格式', '评审标准': '符合第四章“合同条款及格式”规定'}
  558. # ]
  559. # dpr.formal_criteria(formal_review_criteria)