Przeglądaj źródła

document_.py update

lfygithub01 11 miesięcy temu
rodzic
commit
abe85f88db
1 zmienionych plików z 26 dodań i 47 usunięć
  1. 26 47
      document_.py

+ 26 - 47
document_.py

@@ -1,8 +1,7 @@
-from tools import BaseMethods
+from tools_1 import BaseMethods
 from pprint import pprint
 import re
 import logging
-import requests
 
 
 def create_logger(log_path):
@@ -94,14 +93,14 @@ class DocumentPreReview():
                     elif not table[evaluation_factor_index] and table[evaluation_criteria_index]:
                         form_['table'][table_index-1][evaluation_criteria_index] += table[evaluation_factor_index]
                     else: 
-                        if table not in form_['table']: form_['table'].append(table)
+                        if table not in form_['table'] and not criteria_sign: 
+                            form_['table'].append(table)
 
                     if '评分因素' in table and '评分标准' in table:
                         regulation_number_index_ = table.index("条款号")
                         score_factor_index = table.index("评分因素")
                         score_criteria_index = table.index("评分标准")
                         weights_index = table.index("权重")
-                        form_['table'].append(table)
                         criteria_sign = True
                         continue
                     elif criteria_sign and self._scrutinize_judge(table[regulation_number_index_+1],2) and not table[score_factor_index]: 
@@ -110,7 +109,9 @@ class DocumentPreReview():
                         form_['table'][table_index-record_num][weights_index] += table[weights_index]
                         record_num += 1
                     else: 
-                        if table not in form_['table']: form_['table'].append(table)
+                        if table not in form_['table'] and criteria_sign: 
+                            form_['table'].append(table)
+                            continue
                 tables_list.append(form_)
             elif previous_page_number and page_number[-1]<previous_page_number+3:
                 for table_index, table in enumerate(tables):
@@ -140,6 +141,7 @@ class DocumentPreReview():
         ''' parse the Bidding_tables.json file to get the table data from it.
         '''
         all_tables = self.check_table(self.Bidding_tables)
+        # all_tables = self.Bidding_tables
 
         # 招标文件内容中预审查
         tag_sign = ''
@@ -152,51 +154,25 @@ class DocumentPreReview():
         scrutinize_Initial_title_len = 0  # 详审位置标记
         scrutinize_sign = False
 
-        record_page = 0
-        bidder_know = {}   # 投标人须知前附表
+        regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
+
         for partial_form in all_tables:
             table_name = partial_form['table_name']
             page_number = partial_form['page_numbers']
             title_len = partial_form['title_len']
             tables = partial_form["table"]
             
-            if '投标人须知前附表' == table_name:  
-                record_page = page_number[0]
-            if page_number[0] < record_page + 3: 
-                for table in tables[1:]:
-                    if '条' in table: continue    # 存在BUG            
-                    try:
-                        if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
-                        if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
-                    except:
-                        logger.error('该文件中的投标人须知前附表部分表格没有边框,只有中间部分表格存在边框,提取代码认为只有边框存在才被判定为表格内容')
-            
             form_sign = re.findall('评\w+法前附表',table_name)
             if form_sign:
                 table_page_num = page_number[-1]
-                inital_data = tables[0]
-                # confirm data location
-                regulation_number_index = inital_data.index("条款号")
-                evaluation_factor_index = inital_data.index("评审因素")
-                evaluation_criteria_index = inital_data.index("评审标准")
-
                 for table in tables[1:]:
-                    tag = table[regulation_number_index+1]
-                    if tag: tag = tag.strip().replace("\n","")
-                    if tag:
-                        tag_sign = tag
-                    evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
-                    if tag_sign in tag_dict: 
-                        tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
-                                                "评审标准":evaluation_criteria.strip().replace("\n","")})
                     if '评分因素' in table or '评分标准' in table:
                         scrutinize_page = table_page_num
                         scrutinize_Initial_title_len = title_len
                 if not scrutinize_page: scrutinize_page = table_page_num+1
 
             ''' scrutinize '''
-            if (scrutinize_page == page_number[0] and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
-                regulation_number_index_,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
+            if (scrutinize_page in page_number and scrutinize_Initial_title_len) or scrutinize_page == page_number[0]:
                 scrutinize_sign = True
                 if not scrutinize_Initial_title_len: scrutinize_Initial_title_len = title_len
                 for table in tables:
@@ -231,10 +207,7 @@ class DocumentPreReview():
                         if tag and self._scrutinize_judge(tag):
                             tag_sign_ = tag
                             if tag_sign_ not in scrutinize_dict: scrutinize_dict[tag_sign_] = []
-                        try:
-                            evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
-                        except:
-                            print()
+                        evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
                         if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
                         else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
                                         "评分标准":evaluation_criteria.strip().replace("\n",""),
@@ -245,7 +218,7 @@ class DocumentPreReview():
                                 scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
                                 scrutinize_Initial_title_len = 0
                                 break
-            elif scrutinize_page+1 == page_number[0] and scrutinize_sign:
+            elif scrutinize_page+1 in page_number and scrutinize_sign:
                 difference_value = scrutinize_Initial_title_len - title_len
                 if difference_value:
                     table_length = len(table)
@@ -280,9 +253,9 @@ class DocumentPreReview():
                                 scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
                                 scrutinize_Initial_title_len = 0
                                 break
-            elif scrutinize_page+2 == page_number[0] and scrutinize_sign:
+            elif scrutinize_page+2 in page_number and scrutinize_sign:
                 difference_value = scrutinize_Initial_title_len - title_len
-                if scrutinize_Initial_title_len:
+                if difference_value:
                     evaluation_factor_index -= difference_value
                     evaluation_criteria_index -= difference_value
                     weights_index -= difference_value
@@ -315,14 +288,20 @@ class DocumentPreReview():
         return scrutinize_dict
 
 
-
 if __name__ == '__main__':
-    path_list = []
-    for path_ in path_list:
-        dpr = DocumentPreReview(path_)
-        scrutinize_dict = dpr.get_table()  # TODO scrutinize_dict是需要的结果
-
+    # import os
+    # base_dir = 'data/清标详审数据'
+    # for path_ in os.listdir(base_dir):
+    #     if 'table' in path_ and path_.endswith('.json'):
+    #         file_path = os.path.join(base_dir, path_)
+    #         dpr = DocumentPreReview(file_path)
+    #         print(path_)
+    #         scrutinize_dict = dpr.get_table()  # TODO scrutinize_dict是需要的结果
+    #         break
 
+    path_ = "data/清标详审数据/(未签章)白鹤滩电站辅助值班及接待机器人研究采购程序文件-table.json"
+    dpr = DocumentPreReview(path_)
+    scrutinize_dict = dpr.get_table()