|
- '''
- 招投标文件预审查
- 1. 解析Bidding_document_extract中all_tables.json结果
- '''
- import re
- import json
- from celery_tasks.tools import BaseMethods
- class DocumentPreReview:
- def __init__(self, table_path: str):
- with open(table_path, 'r', encoding='utf-8') as fp:
- self.Bidding_tables = json.load(fp)
- def _scrutinize_judge(self, tag: str, threshold_value: int = 3):
- '''
- Clause number content judgment
- 商务 技术 报价 评审 评分 标准
- '''
- scrutinize_tuple = ("商务", "技术", "报价", "评审", "评分", "标准", "部分")
- hit_num = 0
- for scru in scrutinize_tuple:
- if scru in tag: hit_num += 1
- if hit_num >= threshold_value: return True
- else: return False
- def check_table(self, all_tables):
- ''' check the form to assess quailty'''
- # all_tables = self.Bidding_tables
- tables_list = []
- previous_page_number = 0
- criteria_sign = False
- for partial_form in all_tables:
- record_num = 1
- table_name = partial_form['table_name']
- page_number = partial_form['page_numbers']
- title_len = partial_form['title_len']
- col_len = partial_form['col_len']
- tables = partial_form["table"]
- form_ = {'table_name':table_name, 'page_numbers':page_number, 'table':[],
- 'col_len':col_len, 'title_len':title_len}
- if '办法' in table_name and '前附表' in table_name:
- previous_page_number = page_number[0]
- regulation_number_index,evaluation_factor_index,evaluation_criteria_index = 0,0,0
- regulation_number_index_,score_factor_index,score_criteria_index = 0,0,0
- for table_index, table in enumerate(tables):
- if '评审因素' in table and '评审标准' in table:
- regulation_number_index = table.index("条款号")
- evaluation_factor_index = table.index("评审因素")
- evaluation_criteria_index = table.index("评审标准")
- form_['table'].append(table)
- continue
- elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
- form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
- else:
- if table not in form_['table'] and not criteria_sign:
- form_['table'].append(table)
- if '评分因素' in table and '评分标准' in table:
- regulation_number_index_ = table.index("条款号")
- score_factor_index = table.index("评分因素")
- score_criteria_index = table.index("评分标准")
- weights_index = table.index("权重")
- criteria_sign = True
- continue
- elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
- form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
- form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
- form_['table'][table_index-record_num][weights_index] += table[weights_index]
- record_num += 1
- else:
- if table not in form_['table'] and criteria_sign:
- form_['table'].append(table)
- continue
- tables_list.append(form_)
- elif previous_page_number and page_number[-1]<previous_page_number+3:
- for table_index, table in enumerate(tables):
- if '评分因素' in table and '评分标准' in table:
- regulation_number_index_ = table.index("条款号")
- score_factor_index = table.index("评分因素")
- score_criteria_index = table.index("评分标准")
- weights_index = table.index("权重")
- form_['table'].append(table)
- criteria_sign = True
- continue
- elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]:
- form_['table'][table_index-record_num][score_factor_index-1] += table[score_factor_index-1]
- form_['table'][table_index-record_num][score_criteria_index] += table[score_criteria_index]
- form_['table'][table_index-record_num][weights_index] += table[weights_index]
- record_num += 1
- else: form_['table'].append(table)
- tables_list.append(form_)
- else:
- tables_list.append(partial_form)
- return tables_list
- def get_table(self):
- ''' parse the Bidding_tables.json file to get the table data from it.
- '''
- all_tables = self.check_table(self.Bidding_tables)
- # all_tables = self.Bidding_tables
- # 招标文件内容中预审查
- tag_sign = ''
- tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
- tag_dict = dict([(tag,[]) for tag in tag_list])
-
- scrutinize_dict = {}
- scrutinize_page = 0
- scrutinize_index = -1
- scrutinize_Initial_title_len = 0 # 详审表长度
- scrutinize_second_title_len = 0
- scrutinize_sign = False
- weight_comp = re.compile("(\d+%)")
- regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
- for partial_form in all_tables:
- table_name = partial_form['table_name']
- page_number = partial_form['page_numbers']
- title_len = partial_form['title_len']
- tables = partial_form["table"]
-
- form_sign = re.findall('评\w+法前附表',table_name)
- if form_sign:
- table_page_num = page_number[-1]
- for table in tables[1:]:
- if '评分因素' in table or '评分标准' in table:
- scrutinize_page = table_page_num
- scrutinize_Initial_title_len = title_len
- if not scrutinize_page: scrutinize_page = table_page_num+1
- ''' scrutinize '''
- if (scrutinize_page in page_number and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
- scrutinize_sign = True
- if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
- for table in tables:
- if '评分因素' in table and '评分标准' in table:
- regulation_number_index_ = table.index("条款号")
- evaluation_factor_index = table.index("评分因素")
- evaluation_criteria_index = table.index("评分标准")
- weights_index = table.index("权重")
- tag_sign_ = ''
- scrutinize_index = tables.index(table)
- break
- elif '评分因素' in table and '评分标准' not in table:
- scrutinize_index = tables.index(table)
- table_split = table[-1].replace(' ','').split()
- if '评分标准' in table_split and '权重' in table_split:
- table = table[:-1]
- table.extend(table_split)
- regulation_number_index_ = table.index("条款号")
- evaluation_factor_index = table.index("评分因素")
- evaluation_criteria_index = table.index("评分标准")
- weights_index = table.index("权重")
- tag_sign_ = ''
- break
- if scrutinize_index != -1:
- for table in tables[scrutinize_index+1:]:
- if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
- elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
- else: tag = table[regulation_number_index_]
- if tag:
- tag = tag.strip().replace("\n","")
- tag = ''.join(re.findall(r"[\u4e00-\u9fa5]+", tag))
- if tag and self._scrutinize_judge(tag):
- tag_sign_ = tag
- if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
- if len(table) >= weights_index:
- weighr_finder = weight_comp.findall(table[-1])
- if weighr_finder: table.append(weighr_finder[0])
- else: table.append('3%')
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
- else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
- "评分标准":evaluation_criteria.strip().replace("\n",""),
- "权重":weights.strip().replace("\n","")}
- scrutinize_dict[tag_sign_].append(value)
- if table[regulation_number_index_]:
- if table[regulation_number_index_][0] == '3':
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
- scrutinize_Initial_title_len = 0
- break
- elif scrutinize_page+1 in page_number and scrutinize_sign:
- scrutinize_second_title_len = title_len
- difference_value = scrutinize_Initial_title_len - title_len
- if difference_value:
- table_length = len(table)
- evaluation_factor_index -= difference_value
- evaluation_criteria_index -= difference_value
- weights_index -= difference_value
- if weights_index >= table_length:
- evaluation_factor_index = table_length-3
- evaluation_criteria_index = table_length-2
- weights_index = table_length-1
- for table in tables:
- if not table[evaluation_criteria_index]:
- scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
- continue
- if '条款内容' in table and '编列内容' in table:
- break
- if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
- elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
- else: tag = table[regulation_number_index_]
- if tag:
- tag = tag.strip().replace("\n","")
- tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
- if tag and self._scrutinize_judge(tag):
- tag_sign_ = tag
- if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
- if len(table) >= weights_index:
- weighr_finder = weight_comp.findall(table[-1])
- if weighr_finder: table[weights_index] = weighr_finder[0]
- else: table[weights_index] = '3%'
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
- else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
- "评分标准":evaluation_criteria.strip().replace("\n",""),
- "权重":weights.strip().replace("\n","")}
- scrutinize_dict[tag_sign_].append(value)
- if table[regulation_number_index_]:
- if table[regulation_number_index_][0] == '3':
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
- scrutinize_Initial_title_len = 0
- break
- elif scrutinize_page+2 in page_number and scrutinize_sign:
- difference_value = scrutinize_second_title_len - title_len
- if difference_value:
- evaluation_factor_index -= difference_value
- evaluation_criteria_index -= difference_value
- weights_index -= difference_value
- for table in tables:
- if not table[evaluation_criteria_index]:
- scrutinize_dict[tag_sign_][-1]['评分标准'] += table[-1] if table[-1] else table[-2]
- continue
- if '条款内容' in table and '编列内容' in table:
- break
- if table[regulation_number_index_+1]: tag = table[regulation_number_index_+1]
- elif self._scrutinize_judge(table[regulation_number_index_+2]): tag = table[regulation_number_index_+2]
- else: tag = table[regulation_number_index_]
- if tag:
- tag = tag.strip().replace("\n","")
- tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
- if tag and self._scrutinize_judge(tag):
- tag_sign_ = tag
- if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
- if len(table) >= weights_index:
- weighr_finder = weight_comp.findall(table[-1])
- if weighr_finder: table[weights_index] = weighr_finder[0]
- else: table[weights_index] = '3%'
- evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
- if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
- else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
- "评分标准":evaluation_criteria.strip().replace("\n",""),
- "权重":weights.strip().replace("\n","")}
- scrutinize_dict[tag_sign_].append(value)
- if table[regulation_number_index_]:
- if table[regulation_number_index_][0] == '3':
- scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
- scrutinize_Initial_title_len = 0
- break
- return scrutinize_dict
- if __name__ == '__main__':
- path_list = []
- for path_ in path_list:
- dpr = DocumentPreReview(path_)
- scrutinize_dict = dpr.get_table() # TODO scrutinize_dict是需要的结果
|