from tools import BaseMethods from pprint import pprint import re import logging import requests def create_logger(log_path): """ 将日志输出到日志文件和控制台 """ logger = logging.getLogger() logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(levelname)s - %(message)s') # 创建一个handler,用于写入日志文件 file_handler = logging.FileHandler( filename=log_path, mode='w') file_handler.setFormatter(formatter) file_handler.setLevel(logging.INFO) logger.addHandler(file_handler) # 创建一个handler,用于将日志输出到控制台 console = logging.StreamHandler() console.setLevel(logging.DEBUG) console.setFormatter(formatter) logger.addHandler(console) return logger log_path = "code/logs/logs.log" logger = create_logger(log_path=log_path) class DocumentPreReview(): def __init__(self, file_path) -> None: self.bm = BaseMethods() self.Bidding_tables = self.get_Bidding_table(file_path) def get_Bidding_table(self, file_path:str): ''' get table data ''' # file_path = "data/预审查数据/三峡左岸及电源电站中央空调系统管网及末端改造(发布稿)-table.json" # file_path = "data/预审查数据/2023年档案管理系统功能优化项目采购程序文件-table.json" all_tables = self.bm.json_read(file_path) return all_tables def _scrutinize_judge(self, tag:str, threshold_value:int=3): ''' Clause number content judgment 商务 技术 报价 评审 评分 标准 ''' scrutinize_tuple = ("商务","技术","报价","评审","评分","标准") hit_num = 0 for scru in scrutinize_tuple: if scru in tag: hit_num+= 1 if hit_num>=threshold_value: return True else: return False def check_table(self, all_tables): ''' check the form to assess quailty''' # all_tables = self.Bidding_tables tables_list = [] previous_page_number = 0 criteria_sign = False for partial_form in all_tables: record_num = 1 table_name = partial_form['table_name'] page_number = partial_form['page_numbers'] title_len = partial_form['title_len'] col_len = partial_form['col_len'] tables = partial_form["table"] form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[], 'col_len':col_len, 'title_len':title_len} if '办法' in table_name and '前附表' in table_name: previous_page_number = page_number[0] regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0 regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0 for table_index, table in enumerate(tables): if '评审因素' in table and '评审标准' in table: regulation_number_index = table.index("条款号") evaluation_factor_index = table.index("评审因素") evaluation_criteria_index = table.index("评审标准") form_['table'].append(table) continue elif not table[evaluation_factor_index] and table[evaluation_criteria_index]: form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index] else: if table not in form_['table']: form_['table'].append(table) if '评分因素' in table and '评分标准' in table: regulation_number_index_ = table.index("条款号") score_factor_index = table.index("评分因素") score_criteria_index = table.index("评分标准") weights_index = table.index("权重") form_['table'].append(table) criteria_sign = True continue elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]: form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1] form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index] form_['table'][table_index-record_num][weights_index] += table[weights_index] record_num += 1 else: if table not in form_['table']: form_['table'].append(table) tables_list.append(form_) elif previous_page_number and page_number[-1]= table_length: evaluation_factor_index = table_length-3 evaluation_criteria_index = table_length-2 weights_index = table_length-1 for table in tables: if not table[2]: scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3] continue if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1] elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2] else: tag = table[regulation_number_index_] if tag: tag = tag.strip().replace("\n","") tag = re.findall("[\u4e00-\u9fff]+", tag)[0] if tag and self._scrutinize_judge(tag): tag_sign_ = tag if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = [] evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index] if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")} else: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n",""), "权重":weights.strip().replace("\n","")} scrutinize_dict[tag_sign_].append(value) if table[regulation_number_index_]: if table[regulation_number_index_][0] == '3': scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value} scrutinize_Initial_title_len = 0 break elif scrutinize_page+2 == page_number[0] and scrutinize_sign: difference_value = scrutinize_Initial_title_len - title_len if scrutinize_Initial_title_len: evaluation_factor_index -= difference_value evaluation_criteria_index -= difference_value weights_index -= difference_value for table in tables: if not table[2]: scrutinize_dict[tag_sign_][-1]['评分标准'] += table[3] continue if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1] elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2] else: tag = table[regulation_number_index_] if tag: tag = tag.strip().replace("\n","") tag = re.findall("[\u4e00-\u9fff]+", tag)[0] if tag and self._scrutinize_judge(tag): tag_sign_ = tag if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = [] evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index] if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")} else: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n",""), "权重":weights.strip().replace("\n","")} scrutinize_dict[tag_sign_].append(value) if table[regulation_number_index_]: if table[regulation_number_index_][0] == '3': scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value} scrutinize_Initial_title_len = 0 break pprint(scrutinize_dict) return scrutinize_dict if __name__ == '__main__': path_list = [] for path_ in path_list: dpr = DocumentPreReview(path_) scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果