''' 招投标文件预审查 1. 解析Bidding_document_extract中all_tables.json结果 ''' from tools import BaseMethods from pprint import pprint import re import logging import requests # from bidding_document_extract.get_Bidding_info import PdfExtractAttr_ # from bidding_document_extract.get_bidding_info import PdfExtractAttr chinese_num_map = { '零': 0, '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 } # def create_logger(log_path): # """ # 将日志输出到日志文件和控制台 # """ # logger = logging.getLogger() # logger.setLevel(logging.INFO) # formatter = logging.Formatter( # '%(asctime)s - %(levelname)s - %(message)s') # # 创建一个handler,用于写入日志文件 # file_handler = logging.FileHandler( # filename=log_path, mode='w') # file_handler.setFormatter(formatter) # file_handler.setLevel(logging.INFO) # logger.addHandler(file_handler) # # 创建一个handler,用于将日志输出到控制台 # console = logging.StreamHandler() # console.setLevel(logging.DEBUG) # console.setFormatter(formatter) # logger.addHandler(console) # return logger # log_path = "./logs.log" # logger = create_logger(log_path=log_path) class DocumentPreReview: def __init__(self) -> None: self.bm = BaseMethods() def get_Bidding_table(self, file_path:str): ''' get table data ''' # file_path = "data/预审查数据/三峡左岸及电源电站中央空调系统管网及末端改造(发布稿)-table.json" # file_path = "data/预审查数据/2023年档案管理系统功能优化项目采购程序文件-table.json" all_tables = self.bm.json_read(file_path) self.Bidding_tables = all_tables return all_tables def _scrutinize_judge(self, tag:str, threshold_value:int=3): ''' Clause number content judgment 商务 技术 报价 评审 评分 标准 ''' scrutinize_tuple = ("商务","技术","报价","评审","评分","标准") hit_num = 0 for scru in scrutinize_tuple: if scru in tag: hit_num+= 1 if hit_num>=threshold_value: return True else: return False def check_table(self, all_tables): ''' check the form to assess quailty''' # all_tables = self.Bidding_tables tables_list = [] previous_page_number = 0 criteria_sign = False for partial_form in all_tables: record_num = 1 table_name = partial_form['table_name'] page_number = partial_form['page_numbers'] title_len = partial_form['title_len'] col_len = partial_form['col_len'] tables = partial_form["table"] form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[], 'col_len':col_len, 'title_len':title_len} if '办法' in table_name and '前附表' in table_name: previous_page_number = page_number[0] regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0 regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0 for table_index, table in enumerate(tables): if '评审因素' in table and '评审标准' in table: regulation_number_index = table.index("条款号") evaluation_factor_index = table.index("评审因素") evaluation_criteria_index = table.index("评审标准") form_['table'].append(table) continue elif not table[evaluation_factor_index] and table[evaluation_criteria_index]: form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index] else: if table not in form_['table']: form_['table'].append(table) if '评分因素' in table and '评分标准' in table: regulation_number_index_ = table.index("条款号") score_factor_index = table.index("评分因素") score_criteria_index = table.index("评分标准") weights_index = table.index("权重") form_['table'].append(table) criteria_sign = True continue elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]: form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1] form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index] form_['table'][table_index-record_num][weights_index] += table[weights_index] record_num += 1 else: if table not in form_['table']: form_['table'].append(table) tables_list.append(form_) elif previous_page_number and page_number[-1]= table_length: evaluation_factor_index = table_length-3 evaluation_criteria_index = table_length-2 weights_index = table_length-1 for table in tables: if not table[2]: scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3] continue if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1] elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2] else: tag = table[regulation_number_index_] if tag: tag = tag.strip().replace("\n","") tag = re.findall("[\u4e00-\u9fff]+", tag)[0] if tag and self._scrutinize_judge(tag): tag_sign_ = tag if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = [] evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index] if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")} else: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n",""), "权重":weights.strip().replace("\n","")} scrutinize_dict[tag_sign_].append(value) if table[regulation_number_index_]: if table[regulation_number_index_][0] == '3': scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value} scrutinize_Initial_title_len = 0 break elif scrutinize_page+2 == page_number[0] and scrutinize_sign: difference_value = scrutinize_Initial_title_len - title_len if difference_value: evaluation_factor_index -= difference_value evaluation_criteria_index -= difference_value weights_index -= difference_value for table in tables: if not table[2]: scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3] continue if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1] elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2] else: tag = table[regulation_number_index_] if tag: tag = tag.strip().replace("\n","") tag = re.findall("[\u4e00-\u9fff]+", tag)[0] if tag and self._scrutinize_judge(tag): tag_sign_ = tag if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = [] evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index] if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")} else: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n",""), "权重":weights.strip().replace("\n","")} scrutinize_dict[tag_sign_].append(value) if table[regulation_number_index_]: if table[regulation_number_index_][0] == '3': scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value} scrutinize_Initial_title_len = 0 break return scrutinize_dict if __name__ == '__main__': path_list = [] for path_ in path_list: dpr = DocumentPreReview(path_) scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果