|
@@ -25,50 +25,47 @@ chinese_num_map = {
|
|
|
'十': 10
|
|
|
}
|
|
|
|
|
|
-def create_logger(log_path):
|
|
|
- """
|
|
|
- 将日志输出到日志文件和控制台
|
|
|
- """
|
|
|
- logger = logging.getLogger()
|
|
|
- logger.setLevel(logging.INFO)
|
|
|
-
|
|
|
- formatter = logging.Formatter(
|
|
|
- '%(asctime)s - %(levelname)s - %(message)s')
|
|
|
-
|
|
|
- # 创建一个handler,用于写入日志文件
|
|
|
- file_handler = logging.FileHandler(
|
|
|
- filename=log_path, mode='w')
|
|
|
- file_handler.setFormatter(formatter)
|
|
|
- file_handler.setLevel(logging.INFO)
|
|
|
- logger.addHandler(file_handler)
|
|
|
-
|
|
|
- # 创建一个handler,用于将日志输出到控制台
|
|
|
- console = logging.StreamHandler()
|
|
|
- console.setLevel(logging.DEBUG)
|
|
|
- console.setFormatter(formatter)
|
|
|
- logger.addHandler(console)
|
|
|
-
|
|
|
- return logger
|
|
|
-
|
|
|
-log_path = "code/logs/logs.log"
|
|
|
-logger = create_logger(log_path=log_path)
|
|
|
-
|
|
|
-class DocumentPreReview():
|
|
|
- def __init__(self, file_path) -> None:
|
|
|
+# def create_logger(log_path):
|
|
|
+# """
|
|
|
+# 将日志输出到日志文件和控制台
|
|
|
+# """
|
|
|
+# logger = logging.getLogger()
|
|
|
+# logger.setLevel(logging.INFO)
|
|
|
+
|
|
|
+# formatter = logging.Formatter(
|
|
|
+# '%(asctime)s - %(levelname)s - %(message)s')
|
|
|
+
|
|
|
+# # 创建一个handler,用于写入日志文件
|
|
|
+# file_handler = logging.FileHandler(
|
|
|
+# filename=log_path, mode='w')
|
|
|
+# file_handler.setFormatter(formatter)
|
|
|
+# file_handler.setLevel(logging.INFO)
|
|
|
+# logger.addHandler(file_handler)
|
|
|
+
|
|
|
+# # 创建一个handler,用于将日志输出到控制台
|
|
|
+# console = logging.StreamHandler()
|
|
|
+# console.setLevel(logging.DEBUG)
|
|
|
+# console.setFormatter(formatter)
|
|
|
+# logger.addHandler(console)
|
|
|
+
|
|
|
+# return logger
|
|
|
+
|
|
|
+# log_path = "./logs.log"
|
|
|
+# logger = create_logger(log_path=log_path)
|
|
|
+
|
|
|
+class DocumentPreReview:
|
|
|
+ def __init__(self) -> None:
|
|
|
self.bm = BaseMethods()
|
|
|
- self.Bidding_tables = self.get_Bidding_table(file_path)
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
def get_Bidding_table(self, file_path:str):
|
|
|
''' get table data
|
|
|
'''
|
|
|
# file_path = "data/预审查数据/三峡左岸及电源电站中央空调系统管网及末端改造(发布稿)-table.json"
|
|
|
# file_path = "data/预审查数据/2023年档案管理系统功能优化项目采购程序文件-table.json"
|
|
|
all_tables = self.bm.json_read(file_path)
|
|
|
+ self.Bidding_tables = all_tables
|
|
|
return all_tables
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
def _scrutinize_judge(self, tag:str, threshold_value:int=3):
|
|
|
''' Clause number content judgment
|
|
|
商务 技术 报价 评审 评分 标准
|
|
@@ -153,9 +150,6 @@ class DocumentPreReview():
|
|
|
tables_list.append(partial_form)
|
|
|
return tables_list
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
def get_table(self):
|
|
|
''' parse the Bidding_tables.json file to get the table data from it.
|
|
|
'''
|
|
@@ -180,176 +174,150 @@ class DocumentPreReview():
|
|
|
title_len = partial_form['title_len']
|
|
|
tables = partial_form["table"]
|
|
|
|
|
|
- if '投标人须知前附表' == table_name:
|
|
|
- record_page = page_number[0]
|
|
|
- if page_number[0] < record_page + 3:
|
|
|
- for table in tables[1:]:
|
|
|
- if '条' in table: continue # 存在BUG
|
|
|
- try:
|
|
|
- if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
|
|
|
- if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
|
|
|
- except:
|
|
|
- logger.error('该文件中的投标人须知前附表部分表格没有边框,只有中间部分表格存在边框,提取代码认为只有边框存在才被判定为表格内容')
|
|
|
-
|
|
|
- form_sign = re.findall('评\w+法前附表',table_name)
|
|
|
- if form_sign:
|
|
|
- table_page_num = page_number[-1]
|
|
|
- inital_data = tables[0]
|
|
|
- # confirm data location
|
|
|
- regulation_number_index = inital_data.index("条款号")
|
|
|
- evaluation_factor_index = inital_data.index("评审因素")
|
|
|
- evaluation_criteria_index = inital_data.index("评审标准")
|
|
|
-
|
|
|
- for table in tables[1:]:
|
|
|
- tag = table[regulation_number_index+1]
|
|
|
- if tag: tag = tag.strip().replace("\n","")
|
|
|
- if tag:
|
|
|
- tag_sign = tag
|
|
|
- evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
|
|
|
- if tag_sign in tag_dict:
|
|
|
- tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
|
|
|
- "评审标准":evaluation_criteria.strip().replace("\n","")})
|
|
|
- if '评分因素' in table or '评分标准' in table:
|
|
|
- scrutinize_page = table_page_num
|
|
|
- scrutinize_Initial_title_len = title_len
|
|
|
- if not scrutinize_page: scrutinize_page = table_page_num+1
|
|
|
-
|
|
|
- ''' scrutinize '''
|
|
|
- if (scrutinize_page == page_number[0] and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
|
|
|
- regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
|
|
|
- scrutinize_sign = True
|
|
|
- if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
|
|
|
- for table in tables:
|
|
|
- if '评分因素' in table and '评分标准' in table:
|
|
|
- regulation_number_index_ = table.index("条款号")
|
|
|
- evaluation_factor_index = table.index("评分因素")
|
|
|
- evaluation_criteria_index = table.index("评分标准")
|
|
|
- weights_index = table.index("权重")
|
|
|
- tag_sign_ = ''
|
|
|
- scrutinize_index = tables.index(table)
|
|
|
- break
|
|
|
- elif '评分因素' in table and '评分标准' not in table:
|
|
|
- scrutinize_index = tables.index(table)
|
|
|
- table_split = table[-1].replace(' ','').split()
|
|
|
- if '评分标准' in table_split and '权重' in table_split:
|
|
|
- table = table[:-1]
|
|
|
- table.extend(table_split)
|
|
|
- regulation_number_index_ = table.index("条款号")
|
|
|
- evaluation_factor_index = table.index("评分因素")
|
|
|
- evaluation_criteria_index = table.index("评分标准")
|
|
|
- weights_index = table.index("权重")
|
|
|
- tag_sign_ = ''
|
|
|
- break
|
|
|
- if scrutinize_index != -1:
|
|
|
- for table in tables[scrutinize_index+1:]:
|
|
|
+ if 30 < page_number[0] < 50:
|
|
|
+ form_sign = re.findall('评\w+法前附表',table_name)
|
|
|
+ if form_sign:
|
|
|
+ table_page_num = page_number[-1]
|
|
|
+ inital_data = tables[0]
|
|
|
+ # confirm data location
|
|
|
+ regulation_number_index = inital_data.index("条款号")
|
|
|
+ evaluation_factor_index = inital_data.index("评审因素")
|
|
|
+ evaluation_criteria_index = inital_data.index("评审标准")
|
|
|
+
|
|
|
+ for table in tables[1:]:
|
|
|
+ tag = table[regulation_number_index+1]
|
|
|
+ if tag: tag = tag.strip().replace("\n","")
|
|
|
+ if tag:
|
|
|
+ tag_sign = tag
|
|
|
+ evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
|
|
|
+ if tag_sign in tag_dict:
|
|
|
+ tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
|
|
|
+ "评审标准":evaluation_criteria.strip().replace("\n","")})
|
|
|
+ if '评分因素' in table or '评分标准' in table:
|
|
|
+ scrutinize_page = table_page_num
|
|
|
+ scrutinize_Initial_title_len = title_len
|
|
|
+ if not scrutinize_page: scrutinize_page = table_page_num+1
|
|
|
+
|
|
|
+ ''' scrutinize '''
|
|
|
+ if (scrutinize_page == page_number[0] and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
|
|
|
+ regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
|
|
|
+ scrutinize_sign = True
|
|
|
+ if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
|
|
|
+ for table in tables:
|
|
|
+ if '评分因素' in table and '评分标准' in table:
|
|
|
+ regulation_number_index_ = table.index("条款号")
|
|
|
+ evaluation_factor_index = table.index("评分因素")
|
|
|
+ evaluation_criteria_index = table.index("评分标准")
|
|
|
+ weights_index = table.index("权重")
|
|
|
+ tag_sign_ = ''
|
|
|
+ scrutinize_index = tables.index(table)
|
|
|
+ break
|
|
|
+ elif '评分因素' in table and '评分标准' not in table:
|
|
|
+ scrutinize_index = tables.index(table)
|
|
|
+ table_split = table[-1].replace(' ','').split()
|
|
|
+ if '评分标准' in table_split and '权重' in table_split:
|
|
|
+ table = table[:-1]
|
|
|
+ table.extend(table_split)
|
|
|
+ regulation_number_index_ = table.index("条款号")
|
|
|
+ evaluation_factor_index = table.index("评分因素")
|
|
|
+ evaluation_criteria_index = table.index("评分标准")
|
|
|
+ weights_index = table.index("权重")
|
|
|
+ tag_sign_ = ''
|
|
|
+ break
|
|
|
+ if scrutinize_index != -1:
|
|
|
+ for table in tables[scrutinize_index+1:]:
|
|
|
+ if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
|
|
|
+ elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
|
|
|
+ else: tag = table[regulation_number_index_]
|
|
|
+ if tag:
|
|
|
+ tag = tag.strip().replace("\n","")
|
|
|
+ tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
|
|
|
+ if tag and self._scrutinize_judge(tag):
|
|
|
+ tag_sign_ = tag
|
|
|
+ if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
+ try:
|
|
|
+ evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
+ else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
+ "评分标准":evaluation_criteria.strip().replace("\n",""),
|
|
|
+ "权重":weights.strip().replace("\n","")}
|
|
|
+ scrutinize_dict[tag_sign_].append(value)
|
|
|
+ if table[regulation_number_index_]:
|
|
|
+ if table[regulation_number_index_][0] == '3':
|
|
|
+ scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
+ scrutinize_Initial_title_len = 0
|
|
|
+ break
|
|
|
+ elif scrutinize_page+1 == page_number[0] and scrutinize_sign:
|
|
|
+ difference_value = scrutinize_Initial_title_len - title_len
|
|
|
+ if difference_value:
|
|
|
+ table_length = len(table)
|
|
|
+ evaluation_factor_index -= difference_value
|
|
|
+ evaluation_criteria_index -= difference_value
|
|
|
+ weights_index -= difference_value
|
|
|
+ if weights_index >= table_length:
|
|
|
+ evaluation_factor_index = table_length-3
|
|
|
+ evaluation_criteria_index = table_length-2
|
|
|
+ weights_index = table_length-1
|
|
|
+ for table in tables:
|
|
|
+ if not table[2]:
|
|
|
+ scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
|
|
|
+ continue
|
|
|
if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
|
|
|
elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
|
|
|
else: tag = table[regulation_number_index_]
|
|
|
if tag:
|
|
|
tag = tag.strip().replace("\n","")
|
|
|
- tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
|
|
|
+ tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
|
|
|
if tag and self._scrutinize_judge(tag):
|
|
|
tag_sign_ = tag
|
|
|
if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
- try:
|
|
|
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
- except:
|
|
|
- print()
|
|
|
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
+ evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
+ if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
"评分标准":evaluation_criteria.strip().replace("\n",""),
|
|
|
"权重":weights.strip().replace("\n","")}
|
|
|
scrutinize_dict[tag_sign_].append(value)
|
|
|
if table[regulation_number_index_]:
|
|
|
- if table[regulation_number_index_][0] == '3':
|
|
|
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
- scrutinize_Initial_title_len = 0
|
|
|
- break
|
|
|
- elif scrutinize_page+1 == page_number[0] and scrutinize_sign:
|
|
|
- difference_value = scrutinize_Initial_title_len - title_len
|
|
|
- if difference_value:
|
|
|
- table_length = len(table)
|
|
|
- evaluation_factor_index -= difference_value
|
|
|
- evaluation_criteria_index -= difference_value
|
|
|
- weights_index -= difference_value
|
|
|
- if weights_index >= table_length:
|
|
|
- evaluation_factor_index = table_length-3
|
|
|
- evaluation_criteria_index = table_length-2
|
|
|
- weights_index = table_length-1
|
|
|
- for table in tables:
|
|
|
- if not table[2]:
|
|
|
- scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
|
|
|
- continue
|
|
|
- if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
|
|
|
- elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
|
|
|
- else: tag = table[regulation_number_index_]
|
|
|
- if tag:
|
|
|
- tag = tag.strip().replace("\n","")
|
|
|
- tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
|
|
|
- if tag and self._scrutinize_judge(tag):
|
|
|
- tag_sign_ = tag
|
|
|
- if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
- else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
+ if table[regulation_number_index_][0] == '3':
|
|
|
+ scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
+ scrutinize_Initial_title_len = 0
|
|
|
+ break
|
|
|
+ elif scrutinize_page+2 == page_number[0] and scrutinize_sign:
|
|
|
+ difference_value = scrutinize_Initial_title_len - title_len
|
|
|
+ if difference_value:
|
|
|
+ evaluation_factor_index -= difference_value
|
|
|
+ evaluation_criteria_index -= difference_value
|
|
|
+ weights_index -= difference_value
|
|
|
+ for table in tables:
|
|
|
+ if not table[2]:
|
|
|
+ scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
|
|
|
+ continue
|
|
|
+ if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
|
|
|
+ elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
|
|
|
+ else: tag = table[regulation_number_index_]
|
|
|
+ if tag:
|
|
|
+ tag = tag.strip().replace("\n","")
|
|
|
+ tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
|
|
|
+ if tag and self._scrutinize_judge(tag):
|
|
|
+ tag_sign_ = tag
|
|
|
+ if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
+ evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
+ if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
+ else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
"评分标准":evaluation_criteria.strip().replace("\n",""),
|
|
|
"权重":weights.strip().replace("\n","")}
|
|
|
- scrutinize_dict[tag_sign_].append(value)
|
|
|
- if table[regulation_number_index_]:
|
|
|
- if table[regulation_number_index_][0] == '3':
|
|
|
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
- scrutinize_Initial_title_len = 0
|
|
|
- break
|
|
|
- elif scrutinize_page+2 == page_number[0] and scrutinize_sign:
|
|
|
- difference_value = scrutinize_Initial_title_len - title_len
|
|
|
- if scrutinize_Initial_title_len:
|
|
|
- evaluation_factor_index -= difference_value
|
|
|
- evaluation_criteria_index -= difference_value
|
|
|
- weights_index -= difference_value
|
|
|
- for table in tables:
|
|
|
- if not table[2]:
|
|
|
- scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3]
|
|
|
- continue
|
|
|
- if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
|
|
|
- elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
|
|
|
- else: tag = table[regulation_number_index_]
|
|
|
- if tag:
|
|
|
- tag = tag.strip().replace("\n","")
|
|
|
- tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
|
|
|
- if tag and self._scrutinize_judge(tag):
|
|
|
- tag_sign_ = tag
|
|
|
- if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
|
|
|
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
|
|
|
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
|
|
|
- else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
|
|
|
- "评分标准":evaluation_criteria.strip().replace("\n",""),
|
|
|
- "权重":weights.strip().replace("\n","")}
|
|
|
- scrutinize_dict[tag_sign_].append(value)
|
|
|
- if table[regulation_number_index_]:
|
|
|
- if table[regulation_number_index_][0] == '3':
|
|
|
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
- scrutinize_Initial_title_len = 0
|
|
|
- break
|
|
|
+ scrutinize_dict[tag_sign_].append(value)
|
|
|
+ if table[regulation_number_index_]:
|
|
|
+ if table[regulation_number_index_][0] == '3':
|
|
|
+ scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
|
|
|
+ scrutinize_Initial_title_len = 0
|
|
|
+ break
|
|
|
|
|
|
- pprint(scrutinize_dict)
|
|
|
return scrutinize_dict
|
|
|
|
|
|
|
|
|
-
|
|
|
-from fastapi import FastAPI
|
|
|
-import uvicorn
|
|
|
-app = FastAPI()
|
|
|
-
|
|
|
-@app.post('get_pre_review')
|
|
|
-def get_pre_review():
|
|
|
-
|
|
|
- result = {
|
|
|
- "":""
|
|
|
- }
|
|
|
- return result
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
if __name__ == '__main__':
|
|
|
path_list = []
|
|
|
for path_ in path_list:
|