document_.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. '''
  2. 招投标文件预审查
  3. 1. 解析Bidding_document_extract中all_tables.json结果
  4. '''
  5. import re
  6. import json
  7. from celery_tasks.tools import BaseMethods
  8. class DocumentPreReview:
  9. def __init__(self, table_path: str):
  10. with open(table_path, 'r', encoding='utf-8') as fp:
  11. self.Bidding_tables = json.load(fp)
  12. def _scrutinize_judge(self, tag: str, threshold_value: int = 3):
  13. '''
  14. Clause number content judgment
  15. 商务 技术 报价 评审 评分 标准
  16. '''
  17. scrutinize_tuple = ("商务", "技术", "报价", "评审", "评分", "标准", "部分")
  18. hit_num = 0
  19. for scru in scrutinize_tuple:
  20. if scru in tag: hit_num += 1
  21. if hit_num >= threshold_value: return True
  22. else: return False
  23. def check_table(self, all_tables):
  24. ''' check the form to assess quailty'''
  25. # all_tables = self.Bidding_tables
  26. tables_list = []
  27. previous_page_number = 0
  28. criteria_sign = False
  29. for partial_form in all_tables:
  30. record_num = 1
  31. table_name = partial_form['table_name']
  32. page_number = partial_form['page_numbers']
  33. title_len = partial_form['title_len']
  34. col_len = partial_form['col_len']
  35. tables = partial_form["table"]
  36. form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[],
  37. 'col_len':col_len, 'title_len':title_len}
  38. if '办法' in table_name and '前附表' in table_name:
  39. previous_page_number = page_number[0]
  40. regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0
  41. regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0
  42. for table_index, table in enumerate(tables):
  43. if '评审因素' in table and '评审标准' in table:
  44. regulation_number_index = table.index("条款号")
  45. evaluation_factor_index = table.index("评审因素")
  46. evaluation_criteria_index = table.index("评审标准")
  47. form_['table'].append(table)
  48. continue
  49. elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
  50. form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
  51. else:
  52. if table not in form_['table'] and not criteria_sign:
  53. form_['table'].append(table)
  54. if '评分因素' in table and '评分标准' in table:
  55. regulation_number_index_ = table.index("条款号")
  56. score_factor_index = table.index("评分因素")
  57. score_criteria_index = table.index("评分标准")
  58. weights_index = table.index("权重")
  59. criteria_sign = True
  60. continue
  61. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  62. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  63. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  64. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  65. record_num += 1
  66. else:
  67. if table not in form_['table'] and criteria_sign:
  68. form_['table'].append(table)
  69. continue
  70. tables_list.append(form_)
  71. elif previous_page_number and page_number[-1]<previous_page_number+3:
  72. for table_index, table in enumerate(tables):
  73. if '评分因素' in table and '评分标准' in table:
  74. regulation_number_index_ = table.index("条款号")
  75. score_factor_index = table.index("评分因素")
  76. score_criteria_index = table.index("评分标准")
  77. weights_index = table.index("权重")
  78. form_['table'].append(table)
  79. criteria_sign = True
  80. continue
  81. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  82. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  83. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  84. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  85. record_num += 1
  86. else: form_['table'].append(table)
  87. tables_list.append(form_)
  88. else:
  89. tables_list.append(partial_form)
  90. return tables_list
  91. def get_table(self):
  92. ''' parse the Bidding_tables.json file to get the table data from it.
  93. '''
  94. all_tables = self.check_table(self.Bidding_tables)
  95. # all_tables = self.Bidding_tables
  96. # 招标文件内容中预审查
  97. tag_sign = ''
  98. tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
  99. tag_dict = dict([(tag,[]) for tag in tag_list])
  100. scrutinize_dict = {}
  101. scrutinize_page = 0
  102. scrutinize_index = -1
  103. scrutinize_Initial_title_len = 0 # 详审表长度
  104. scrutinize_second_title_len = 0
  105. scrutinize_sign = False
  106. weight_comp = re.compile("(\d+%)")
  107. regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
  108. for partial_form in all_tables:
  109. table_name = partial_form['table_name']
  110. page_number = partial_form['page_numbers']
  111. title_len = partial_form['title_len']
  112. tables = partial_form["table"]
  113. form_sign = re.findall('评\w+法前附表',table_name)
  114. if form_sign:
  115. table_page_num = page_number[-1]
  116. for table in tables[1:]:
  117. if '评分因素' in table or '评分标准' in table:
  118. scrutinize_page = table_page_num
  119. scrutinize_Initial_title_len = title_len
  120. if not scrutinize_page: scrutinize_page = table_page_num+1
  121. ''' scrutinize '''
  122. if (scrutinize_page in page_number and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
  123. scrutinize_sign = True
  124. if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
  125. for table in tables:
  126. if '评分因素' in table and '评分标准' in table:
  127. regulation_number_index_ = table.index("条款号")
  128. evaluation_factor_index = table.index("评分因素")
  129. evaluation_criteria_index = table.index("评分标准")
  130. weights_index = table.index("权重")
  131. tag_sign_ = ''
  132. scrutinize_index = tables.index(table)
  133. break
  134. elif '评分因素' in table and '评分标准' not in table:
  135. scrutinize_index = tables.index(table)
  136. table_split = table[-1].replace(' ','').split()
  137. if '评分标准' in table_split and '权重' in table_split:
  138. table = table[:-1]
  139. table.extend(table_split)
  140. regulation_number_index_ = table.index("条款号")
  141. evaluation_factor_index = table.index("评分因素")
  142. evaluation_criteria_index = table.index("评分标准")
  143. weights_index = table.index("权重")
  144. tag_sign_ = ''
  145. break
  146. if scrutinize_index != -1:
  147. for table in tables[scrutinize_index+1:]:
  148. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  149. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  150. else: tag = table[regulation_number_index_]
  151. if tag:
  152. tag = tag.strip().replace("\n","")
  153. tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
  154. if tag and self._scrutinize_judge(tag):
  155. tag_sign_ = tag
  156. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  157. if len(table) >= weights_index:
  158. weighr_finder = weight_comp.findall(table[-1])
  159. if weighr_finder: table.append(weighr_finder[0])
  160. else: table.append('3%')
  161. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  162. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
  163. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  164. "评分标准":evaluation_criteria.strip().replace("\n",""),
  165. "权重":weights.strip().replace("\n","")}
  166. scrutinize_dict[tag_sign_].append(value)
  167. if table[regulation_number_index_]:
  168. if table[regulation_number_index_][0] == '3':
  169. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  170. scrutinize_Initial_title_len = 0
  171. break
  172. elif scrutinize_page+1 in page_number and scrutinize_sign:
  173. scrutinize_second_title_len = title_len
  174. difference_value = scrutinize_Initial_title_len - title_len
  175. if difference_value:
  176. table_length = len(table)
  177. evaluation_factor_index -= difference_value
  178. evaluation_criteria_index -= difference_value
  179. weights_index -= difference_value
  180. if weights_index >= table_length:
  181. evaluation_factor_index = table_length-3
  182. evaluation_criteria_index = table_length-2
  183. weights_index = table_length-1
  184. for table in tables:
  185. if not table[evaluation_criteria_index]:
  186. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
  187. continue
  188. if '条款内容' in table and '编列内容' in table:
  189. break
  190. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  191. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  192. else: tag = table[regulation_number_index_]
  193. if tag:
  194. tag = tag.strip().replace("\n","")
  195. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  196. if tag and self._scrutinize_judge(tag):
  197. tag_sign_ = tag
  198. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  199. if len(table) >= weights_index:
  200. weighr_finder = weight_comp.findall(table[-1])
  201. if weighr_finder: table[weights_index] = weighr_finder[0]
  202. else: table[weights_index] = '3%'
  203. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  204. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  205. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  206. "评分标准":evaluation_criteria.strip().replace("\n",""),
  207. "权重":weights.strip().replace("\n","")}
  208. scrutinize_dict[tag_sign_].append(value)
  209. if table[regulation_number_index_]:
  210. if table[regulation_number_index_][0] == '3':
  211. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  212. scrutinize_Initial_title_len = 0
  213. break
  214. elif scrutinize_page+2 in page_number and scrutinize_sign:
  215. difference_value = scrutinize_second_title_len - title_len
  216. if difference_value:
  217. evaluation_factor_index -= difference_value
  218. evaluation_criteria_index -= difference_value
  219. weights_index -= difference_value
  220. for table in tables:
  221. if not table[evaluation_criteria_index]:
  222. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
  223. continue
  224. if '条款内容' in table and '编列内容' in table:
  225. break
  226. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  227. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  228. else: tag = table[regulation_number_index_]
  229. if tag:
  230. tag = tag.strip().replace("\n","")
  231. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  232. if tag and self._scrutinize_judge(tag):
  233. tag_sign_ = tag
  234. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  235. if len(table) >= weights_index:
  236. weighr_finder = weight_comp.findall(table[-1])
  237. if weighr_finder: table[weights_index] = weighr_finder[0]
  238. else: table[weights_index] = '3%'
  239. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  240. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  241. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  242. "评分标准":evaluation_criteria.strip().replace("\n",""),
  243. "权重":weights.strip().replace("\n","")}
  244. scrutinize_dict[tag_sign_].append(value)
  245. if table[regulation_number_index_]:
  246. if table[regulation_number_index_][0] == '3':
  247. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  248. scrutinize_Initial_title_len = 0
  249. break
  250. return scrutinize_dict
  251. if __name__ == '__main__':
  252. path_list = []
  253. for path_ in path_list:
  254. dpr = DocumentPreReview(path_)
  255. scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果