|
@@ -1,8 +1,7 @@
|
|
|
-from tools import BaseMethods
|
|
|
+from tools_1 import BaseMethods
|
|
|
from pprint import pprint
|
|
|
import re
|
|
|
import logging
|
|
|
-import requests
|
|
|
|
|
|
|
|
|
def create_logger(log_path):
|
|
@@ -94,14 +93,14 @@ class DocumentPreReview():
|
|
|
elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
|
|
|
form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
|
|
|
else:
|
|
|
- if table not in form_['table']: form_['table'].append(table)
|
|
|
+ if table not in form_['table'] and not criteria_sign:
|
|
|
+ form_['table'].append(table)
|
|
|
|
|
|
if '评分因素' in table and '评分标准' in table:
|
|
|
regulation_number_index_ = table.index("条款号")
|
|
|
score_factor_index = table.index("评分因素")
|
|
|
score_criteria_index = table.index("评分标准")
|
|
|
weights_index = table.index("权重")
|
|
|
- form_['table'].append(table)
|
|
|
criteria_sign = True
|
|
|
continue
|
|
|
elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
|
|
@@ -110,7 +109,9 @@ class DocumentPreReview():
|
|
|
form_['table'][table_index-record_num][weights_index] += table[weights_index]
|
|
|
record_num += 1
|
|
|
else:
|
|
|
- if table not in form_['table']: form_['table'].append(table)
|
|
|
+ if table not in form_['table'] and criteria_sign:
|
|
|
+ form_['table'].append(table)
|
|
|
+ continue
|
|
|
tables_list.append(form_)
|
|
|
elif previous_page_number and page_number[-1]<previous_page_number+3:
|
|
|
for table_index, table in enumerate(tables):
|
|
@@ -140,6 +141,7 @@ class DocumentPreReview():
|
|
|
''' parse the Bidding_tables.json file to get the table data from it.
|
|
|
'''
|
|
|
all_tables = self.check_table(self.Bidding_tables)
|
|
|
+ # all_tables = self.Bidding_tables
|
|
|
|
|
|
# 招标文件内容中预审查
|
|
|
tag_sign = ''
|
|
@@ -152,51 +154,25 @@ class DocumentPreReview():
|
|
|
scrutinize_Initial_title_len = 0 # 详审位置标记
|
|
|
scrutinize_sign = False
|
|
|
|
|
|
- record_page = 0
|
|
|
- bidder_know = {} # 投标人须知前附表
|
|
|
+ regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
|
|
|
+
|
|
|
for partial_form in all_tables:
|
|
|
table_name = partial_form['table_name']
|
|
|
page_number = partial_form['page_numbers']
|
|
|
title_len = partial_form['title_len']
|
|
|
tables = partial_form["table"]
|
|
|
|
|
|
- if '投标人须知前附表' == table_name:
|
|
|
- record_page = page_number[0]
|
|
|
- if page_number[0] < record_page + 3:
|
|
|
- for table in tables[1:]:
|
|
|
- if '条' in table: continue # 存在BUG
|
|
|
- try:
|
|
|
- if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
|
|
|
- if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
|
|
|
- except:
|
|
|
- logger.error('该文件中的投标人须知前附表部分表格没有边框,只有中间部分表格存在边框,提取代码认为只有边框存在才被判定为表格内容')
|
|
|
-
|
|
|
form_sign = re.findall('评\w+法前附表',table_name)
|
|
|
if form_sign:
|
|
|
table_page_num = page_number[-1]
|
|
|
- inital_data = tables[0]
|
|
|
- # confirm data location
|
|
|
- regulation_number_index = inital_data.index("条款号")
|
|
|
- evaluation_factor_index = inital_data.index("评审因素")
|
|
|
- evaluation_criteria_index = inital_data.index("评审标准")
|
|
|
-
|
|
|
for table in tables[1:]:
|
|
|
- tag = table[regulation_number_index+1]
|
|
|
- if tag: tag = tag.strip().replace("\n","")
|
|
|
- if tag:
|
|
|
- tag_sign = tag
|
|
|
- evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
|
|
|
- if tag_sign in tag_dict:
|
|
|
- tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
|
|
|
- "评审标准":evaluation_criteria.strip().replace("\n","")})
|
|
|
if '评分因素' in table or '评分标准' in table:
|
|
|
scrutinize_page = table_page_num
|
|
|
scrutinize_Initial_title_len = title_len
|
|
|
if not scrutinize_page: scrutinize_page = table_page_num+1
|
|
|
|
|
|
''' scrutinize '''
|
|
|
- if (scrutinize_page == page_number[0] and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
|
|
|
- regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
|
|
|
+ if (scrutinize_page in page_number and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
|
|
|
scrutinize_sign = True
|
|
|
if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
|
|
|
for table in tables:
|
|
@@ -231,10 +207,7 @@ class DocumentPreReview():
|
|
|
if tag and self._scrutinize_judge(tag):
|
|
|
tag_sign_ = tag
|
|
|
if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
- try:
|
|
|
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
- except:
|
|
|
- print()
|
|
|
+ evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
"评分标准":evaluation_criteria.strip().replace("\n",""),
|
|
@@ -245,7 +218,7 @@ class DocumentPreReview():
|
|
|
scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
scrutinize_Initial_title_len = 0
|
|
|
break
|
|
|
- elif scrutinize_page+1 == page_number[0] and scrutinize_sign:
|
|
|
+ elif scrutinize_page+1 in page_number and scrutinize_sign:
|
|
|
difference_value = scrutinize_Initial_title_len - title_len
|
|
|
if difference_value:
|
|
|
table_length = len(table)
|
|
@@ -280,9 +253,9 @@ class DocumentPreReview():
|
|
|
scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
scrutinize_Initial_title_len = 0
|
|
|
break
|
|
|
- elif scrutinize_page+2 == page_number[0] and scrutinize_sign:
|
|
|
+ elif scrutinize_page+2 in page_number and scrutinize_sign:
|
|
|
difference_value = scrutinize_Initial_title_len - title_len
|
|
|
- if scrutinize_Initial_title_len:
|
|
|
+ if difference_value:
|
|
|
evaluation_factor_index -= difference_value
|
|
|
evaluation_criteria_index -= difference_value
|
|
|
weights_index -= difference_value
|
|
@@ -315,14 +288,20 @@ class DocumentPreReview():
|
|
|
return scrutinize_dict
|
|
|
|
|
|
|
|
|
-
|
|
|
if __name__ == '__main__':
|
|
|
- path_list = []
|
|
|
- for path_ in path_list:
|
|
|
- dpr = DocumentPreReview(path_)
|
|
|
- scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果
|
|
|
-
|
|
|
+ # import os
|
|
|
+ # base_dir = 'data/清标详审数据'
|
|
|
+ # for path_ in os.listdir(base_dir):
|
|
|
+ # if 'table' in path_ and path_.endswith('.json'):
|
|
|
+ # file_path = os.path.join(base_dir, path_)
|
|
|
+ # dpr = DocumentPreReview(file_path)
|
|
|
+ # print(path_)
|
|
|
+ # scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果
|
|
|
+ # break
|
|
|
|
|
|
+ path_ = "data/清标详审数据/(未签章)白鹤滩电站辅助值班及接待机器人研究采购程序文件-table.json"
|
|
|
+ dpr = DocumentPreReview(path_)
|
|
|
+ scrutinize_dict = dpr.get_table()
|
|
|
|
|
|
|
|
|
|