document_.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. '''
  2. 招投标文件预审查
  3. 1. 解析Bidding_document_extract中all_tables.json结果
  4. '''
  5. import re
  6. from celery_tasks.tools import BaseMethods
  7. class DocumentPreReview:
  8. def _scrutinize_judge(self, tag:str, threshold_value:int=3):
  9. ''' Clause number content judgment
  10. 商务 技术 报价 评审 评分 标准
  11. '''
  12. scrutinize_tuple = ("商务","技术","报价","评审","评分","标准","部分")
  13. hit_num = 0
  14. for scru in scrutinize_tuple:
  15. if scru in tag: hit_num+= 1
  16. if hit_num>=threshold_value: return True
  17. else: return False
  18. def check_table(self, all_tables):
  19. ''' check the form to assess quailty'''
  20. # all_tables = self.Bidding_tables
  21. tables_list = []
  22. previous_page_number = 0
  23. criteria_sign = False
  24. for partial_form in all_tables:
  25. record_num = 1
  26. table_name = partial_form['table_name']
  27. page_number = partial_form['page_numbers']
  28. title_len = partial_form['title_len']
  29. col_len = partial_form['col_len']
  30. tables = partial_form["table"]
  31. form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[],
  32. 'col_len':col_len, 'title_len':title_len}
  33. if '办法' in table_name and '前附表' in table_name:
  34. previous_page_number = page_number[0]
  35. regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0
  36. regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0
  37. for table_index, table in enumerate(tables):
  38. if '评审因素' in table and '评审标准' in table:
  39. regulation_number_index = table.index("条款号")
  40. evaluation_factor_index = table.index("评审因素")
  41. evaluation_criteria_index = table.index("评审标准")
  42. form_['table'].append(table)
  43. continue
  44. elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
  45. form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
  46. else:
  47. if table not in form_['table'] and not criteria_sign:
  48. form_['table'].append(table)
  49. if '评分因素' in table and '评分标准' in table:
  50. regulation_number_index_ = table.index("条款号")
  51. score_factor_index = table.index("评分因素")
  52. score_criteria_index = table.index("评分标准")
  53. weights_index = table.index("权重")
  54. criteria_sign = True
  55. continue
  56. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  57. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  58. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  59. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  60. record_num += 1
  61. else:
  62. if table not in form_['table'] and criteria_sign:
  63. form_['table'].append(table)
  64. continue
  65. tables_list.append(form_)
  66. elif previous_page_number and page_number[-1]<previous_page_number+3:
  67. for table_index, table in enumerate(tables):
  68. if '评分因素' in table and '评分标准' in table:
  69. regulation_number_index_ = table.index("条款号")
  70. score_factor_index = table.index("评分因素")
  71. score_criteria_index = table.index("评分标准")
  72. weights_index = table.index("权重")
  73. form_['table'].append(table)
  74. criteria_sign = True
  75. continue
  76. elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
  77. form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
  78. form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
  79. form_['table'][table_index-record_num][weights_index] += table[weights_index]
  80. record_num += 1
  81. else: form_['table'].append(table)
  82. tables_list.append(form_)
  83. else:
  84. tables_list.append(partial_form)
  85. return tables_list
  86. def get_table(self):
  87. ''' parse the Bidding_tables.json file to get the table data from it.
  88. '''
  89. all_tables = self.check_table(self.Bidding_tables)
  90. # all_tables = self.Bidding_tables
  91. # 招标文件内容中预审查
  92. tag_sign = ''
  93. tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
  94. tag_dict = dict([(tag,[]) for tag in tag_list])
  95. scrutinize_dict = {}
  96. scrutinize_page = 0
  97. scrutinize_index = -1
  98. scrutinize_Initial_title_len = 0 # 详审表长度
  99. scrutinize_second_title_len = 0
  100. scrutinize_sign = False
  101. weight_comp = re.compile("(\d+%)")
  102. regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
  103. for partial_form in all_tables:
  104. table_name = partial_form['table_name']
  105. page_number = partial_form['page_numbers']
  106. title_len = partial_form['title_len']
  107. tables = partial_form["table"]
  108. form_sign = re.findall('评\w+法前附表',table_name)
  109. if form_sign:
  110. table_page_num = page_number[-1]
  111. for table in tables[1:]:
  112. if '评分因素' in table or '评分标准' in table:
  113. scrutinize_page = table_page_num
  114. scrutinize_Initial_title_len = title_len
  115. if not scrutinize_page: scrutinize_page = table_page_num+1
  116. ''' scrutinize '''
  117. if (scrutinize_page in page_number and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
  118. scrutinize_sign = True
  119. if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
  120. for table in tables:
  121. if '评分因素' in table and '评分标准' in table:
  122. regulation_number_index_ = table.index("条款号")
  123. evaluation_factor_index = table.index("评分因素")
  124. evaluation_criteria_index = table.index("评分标准")
  125. weights_index = table.index("权重")
  126. tag_sign_ = ''
  127. scrutinize_index = tables.index(table)
  128. break
  129. elif '评分因素' in table and '评分标准' not in table:
  130. scrutinize_index = tables.index(table)
  131. table_split = table[-1].replace(' ','').split()
  132. if '评分标准' in table_split and '权重' in table_split:
  133. table = table[:-1]
  134. table.extend(table_split)
  135. regulation_number_index_ = table.index("条款号")
  136. evaluation_factor_index = table.index("评分因素")
  137. evaluation_criteria_index = table.index("评分标准")
  138. weights_index = table.index("权重")
  139. tag_sign_ = ''
  140. break
  141. if scrutinize_index != -1:
  142. for table in tables[scrutinize_index+1:]:
  143. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  144. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  145. else: tag = table[regulation_number_index_]
  146. if tag:
  147. tag = tag.strip().replace("\n","")
  148. tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
  149. if tag and self._scrutinize_judge(tag):
  150. tag_sign_ = tag
  151. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  152. if len(table) >= weights_index:
  153. weighr_finder = weight_comp.findall(table[-1])
  154. if weighr_finder: table.append(weighr_finder[0])
  155. else: table.append('3%')
  156. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  157. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
  158. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  159. "评分标准":evaluation_criteria.strip().replace("\n",""),
  160. "权重":weights.strip().replace("\n","")}
  161. scrutinize_dict[tag_sign_].append(value)
  162. if table[regulation_number_index_]:
  163. if table[regulation_number_index_][0] == '3':
  164. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  165. scrutinize_Initial_title_len = 0
  166. break
  167. elif scrutinize_page+1 in page_number and scrutinize_sign:
  168. scrutinize_second_title_len = title_len
  169. difference_value = scrutinize_Initial_title_len - title_len
  170. if difference_value:
  171. table_length = len(table)
  172. evaluation_factor_index -= difference_value
  173. evaluation_criteria_index -= difference_value
  174. weights_index -= difference_value
  175. if weights_index >= table_length:
  176. evaluation_factor_index = table_length-3
  177. evaluation_criteria_index = table_length-2
  178. weights_index = table_length-1
  179. for table in tables:
  180. if not table[evaluation_criteria_index]:
  181. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
  182. continue
  183. if '条款内容' in table and '编列内容' in table:
  184. break
  185. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  186. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  187. else: tag = table[regulation_number_index_]
  188. if tag:
  189. tag = tag.strip().replace("\n","")
  190. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  191. if tag and self._scrutinize_judge(tag):
  192. tag_sign_ = tag
  193. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  194. if len(table) >= weights_index:
  195. weighr_finder = weight_comp.findall(table[-1])
  196. if weighr_finder: table[weights_index] = weighr_finder[0]
  197. else: table[weights_index] = '3%'
  198. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  199. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  200. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  201. "评分标准":evaluation_criteria.strip().replace("\n",""),
  202. "权重":weights.strip().replace("\n","")}
  203. scrutinize_dict[tag_sign_].append(value)
  204. if table[regulation_number_index_]:
  205. if table[regulation_number_index_][0] == '3':
  206. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  207. scrutinize_Initial_title_len = 0
  208. break
  209. elif scrutinize_page+2 in page_number and scrutinize_sign:
  210. difference_value = scrutinize_second_title_len - title_len
  211. if difference_value:
  212. evaluation_factor_index -= difference_value
  213. evaluation_criteria_index -= difference_value
  214. weights_index -= difference_value
  215. for table in tables:
  216. if not table[evaluation_criteria_index]:
  217. scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
  218. continue
  219. if '条款内容' in table and '编列内容' in table:
  220. break
  221. if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
  222. elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
  223. else: tag = table[regulation_number_index_]
  224. if tag:
  225. tag = tag.strip().replace("\n","")
  226. tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
  227. if tag and self._scrutinize_judge(tag):
  228. tag_sign_ = tag
  229. if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
  230. if len(table) >= weights_index:
  231. weighr_finder = weight_comp.findall(table[-1])
  232. if weighr_finder: table[weights_index] = weighr_finder[0]
  233. else: table[weights_index] = '3%'
  234. evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
  235. if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
  236. else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
  237. "评分标准":evaluation_criteria.strip().replace("\n",""),
  238. "权重":weights.strip().replace("\n","")}
  239. scrutinize_dict[tag_sign_].append(value)
  240. if table[regulation_number_index_]:
  241. if table[regulation_number_index_][0] == '3':
  242. scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
  243. scrutinize_Initial_title_len = 0
  244. break
  245. return scrutinize_dict
  246. if __name__ == '__main__':
  247. path_list = []
  248. for path_ in path_list:
  249. dpr = DocumentPreReview(path_)
  250. scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果