Sfoglia il codice sorgente

document_.py更新

lfygithub01 1 anno fa
parent
commit
23c4bbacc3
1 ha cambiato i file con 76 aggiunte e 124 eliminazioni
  1. 76 124
      document_.py

+ 76 - 124
document_.py

@@ -41,8 +41,8 @@ class DocumentPreReview():
     def get_bidding_table(self):
         ''' get table data
         '''
-        # file_path = "code/bidding_document_extract/all_tables.json"
-        file_path = "code/bidding_document_extract/all_tables_三峡左右岸.json"
+        file_path = "code/bidding_document_extract/all_tables.json"
+        # file_path = "code/bidding_document_extract/all_tables_三峡左右岸.json"
         all_tables = self.bm.json_read(file_path)
         return all_tables
     
@@ -58,6 +58,7 @@ class DocumentPreReview():
         '''
         all_tables = self.bidding_tables
 
+        tag_sign = ''
         tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
         tag_dict = dict([(tag,[]) for tag in tag_list])
         
@@ -65,25 +66,25 @@ class DocumentPreReview():
         scrutinize_dict = dict([(scrutinize,[]) for scrutinize in scrutinize_tuple])
         scrutinize_page = 0
         scrutinize_index = 0
+        scrutinize_Initial_position_marker = 0  # 详审位置标记
 
         record_page = 0
         bidder_know = {}   # 投标人须知前附表
         for partial_form in all_tables:
             table_name = partial_form['table_name']
-            confidence = partial_form['confidence']
             page_number = partial_form['page_numbers']
             title_len = partial_form['title_len']
             tables = partial_form["table"]
             
-            if '投标人须知前附表' == table_name:  record_page = page_number[0]
+            if '投标人须知前附表' == table_name:  
+                record_page = page_number[0]
             if page_number[0] < record_page + 3: 
-                
                 for table in tables[1:]:
                     if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
                     if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
-
-            if ('评标方法' not in table_name) and (confidence != 1): continue
-            table_name = table_name.strip().replace("\n","")
+                
+            if '评标方法' in table_name:
+                table_name = table_name.strip().replace("\n","")
             if table_name == "评标办法前附表":
                 table_page_num = page_number[0]
                 inital_data = tables[0]
@@ -91,10 +92,8 @@ class DocumentPreReview():
                 regulation_number_index = inital_data.index("条款号")
                 evaluation_factor_index = inital_data.index("评审因素")
                 evaluation_criteria_index = inital_data.index("评审标准")
-                tag_sign = ''
+                
                 for table in tables[1:]:
-                    # regulation_number = table[regulation_number_index]
-                    # if "条款号" == regulation_number: break
                     tag = table[regulation_number_index+1]
                     if tag: tag = tag.strip().replace("\n","")
                     if tag and (tag in tag_list):
@@ -103,23 +102,25 @@ class DocumentPreReview():
                     if tag_sign in tag_dict: 
                         tag_dict[tag_sign].append({"评审因素":evaluation_factor.strip().replace("\n",""),
                                                 "评审标准":evaluation_criteria.strip().replace("\n","")})
+                    if '评分因素' in table or '评分标准' in table:
+                        scrutinize_page = table_page_num
+                        scrutinize_Initial_position_marker = 1
+                if not scrutinize_page: scrutinize_page = table_page_num+1
 
+            ''' scrutinize '''
+            if page_number[0] == 35:
+                print()
+            if (scrutinize_page == page_number[0] and scrutinize_Initial_position_marker) or scrutinize_page == page_number[0]:
+                regulation_number_index,evaluation_factor_index,evaluation_criteria_index,weights_index = 0,0,0,0
+                for table in tables:
                     if '评分因素' in table and '评分标准' in table:
-                        scrutinize_page = table_page_num+1
+                        regulation_number_index = table.index("条款号")
+                        evaluation_factor_index = table.index("评分因素")
+                        evaluation_criteria_index = table.index("评分标准")
+                        weights_index = table.index("权重")
+                        tag_sign = ''
                         scrutinize_index = tables.index(table)
-                if not scrutinize_page:
-                    scrutinize_page = table_page_num+1
-
-            ''' scrutinize '''
-            if scrutinize_index:
-                if scrutinize_page-1 == page_number[0]:
-                    table_ = tables[scrutinize_index]
-                    regulation_number_index = table_.index("条款号")
-                    evaluation_factor_index = table_.index("评分因素")
-                    evaluation_criteria_index = table_.index("评分标准")
-                    weights_index = table_.index("权重")
-                    tag_sign = ''
-
+                if scrutinize_index:
                     for table in tables[scrutinize_index+1:]:
                         if table[regulation_number_index+1]: tag = table[regulation_number_index+1]
                         else: tag = table[regulation_number_index+2]
@@ -134,111 +135,62 @@ class DocumentPreReview():
                                         "评分标准":evaluation_criteria.strip().replace("\n",""),
                                         "权重":weights.strip().replace("\n","")}
                         scrutinize_dict[tag_sign].append(value)
-
-                elif scrutinize_page == page_number[0] and title_len == 5:
-                    evaluation_factor_index -= 1
-                    evaluation_criteria_index -= 1
-                    weights_index -= 1
-                    for table in tables:
-                        if not table[2]:
-                            scrutinize_dict[tag_sign][-1]['评分标准'] += table[3]
-                            continue
-                        tag = table[regulation_number_index+1]
-                        if tag: 
-                            tag = tag.strip().replace("\n","")
-                            tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
-                        if tag and (tag in scrutinize_tuple):
-                            tag_sign = tag
-                        evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
-                        if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
-                        else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
-                                        "评分标准":evaluation_criteria.strip().replace("\n",""),
-                                        "权重":weights.strip().replace("\n","")}
-                        scrutinize_dict[tag_sign].append(value)
-                        if '报价' in tag_sign and '评审标准' in tag_sign:
-                            scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
-                            break
-                    print(scrutinize_dict)
-
-                elif scrutinize_page+1 == page_number[0] and title_len != 4:
-                    for table in tables:
-                        if not table[2]:
-                            scrutinize_dict[tag_sign][-1]['评分标准'] += table[3]
-                            continue
-                        tag = table[regulation_number_index+1]
-                        if tag: 
-                            tag = tag.strip().replace("\n","")
-                            tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
-                        if tag and (tag in scrutinize_tuple):
-                            tag_sign = tag
-                        evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
-                        if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
-                        else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
-                                        "评分标准":evaluation_criteria.strip().replace("\n",""),
-                                        "权重":weights.strip().replace("\n","")}
-                        scrutinize_dict[tag_sign].append(value)
                         if '报价' in tag_sign and '评审标准' in tag_sign:
                             scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
                             break
-
-                    print(scrutinize_dict)
-
-            else:
-                if scrutinize_page == page_number[0]:
-                    for table in tables:
-                        if '评分因素' in table and '评分标准' in table:
-                            regulation_number_index = table.index("条款号")
-                            evaluation_factor_index = table.index("评分因素")
-                            evaluation_criteria_index = table.index("评分标准")
-                            weights_index = table.index("权重")
-                            tag_sign = ''
-                            scrutinize_index = tables.index(table)
-                        if not scrutinize_index: continue
-                        for table in tables[scrutinize_index+1:]:
-                            if table[regulation_number_index+1]: tag = table[regulation_number_index+1]
-                            else: tag = table[regulation_number_index+2]
-                            if tag: 
-                                tag = tag.strip().replace("\n","")
-                                tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
-                            if tag and (tag in scrutinize_tuple):
-                                tag_sign = tag
-                            evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
-                            if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
-                            else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
-                                            "评分标准":evaluation_criteria.strip().replace("\n",""),
-                                            "权重":weights.strip().replace("\n","")}
-                            scrutinize_dict[tag_sign].append(value)
-                            if '报价' in tag_sign and '评审标准' in tag_sign:
-                                scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
-                                break
-                        pprint(scrutinize_dict)
-
-                elif scrutinize_page+1 == page_number[0] and title_len != 4:
-                    for table in tables:
-                        if not table[2]:
-                            scrutinize_dict[tag_sign][-1]['评分标准'] += table[3]
-                            continue
-                        tag = table[regulation_number_index+1]
-                        if tag: 
-                            tag = tag.strip().replace("\n","")
-                            tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
-                        if tag and (tag in scrutinize_tuple):
-                            tag_sign = tag
-                        evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
+            elif scrutinize_page+1 == page_number[0] and title_len == 5  and '报价' not in tag_sign:
+                if scrutinize_Initial_position_marker:
+                    evaluation_factor_index -= 1
+                    evaluation_criteria_index -= 1
+                    weights_index -= 1
+                for table in tables:
+                    if not table[2]:
+                        scrutinize_dict[tag_sign][-1]['评分标准'] += table[3]
+                        continue
+                    tag = table[regulation_number_index+1]
+                    if tag: 
+                        tag = tag.strip().replace("\n","")
+                        tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
+                    if tag and (tag in scrutinize_tuple):
+                        tag_sign = tag
+                    evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
+                    if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
+                    else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
+                                    "评分标准":evaluation_criteria.strip().replace("\n",""),
+                                    "权重":weights.strip().replace("\n","")}
+                    scrutinize_dict[tag_sign].append(value)
+                    if '报价' in tag_sign and '评审标准' in tag_sign:
+                        scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
+                        scrutinize_Initial_position_marker = 0
+                        break
+            elif scrutinize_page+2 == page_number[0] and title_len == 5 and '报价' not in tag_sign:
+                for table in tables:
+                    if not table[2]:
+                        scrutinize_dict[tag_sign][-1]['评分标准'] += table[3]
+                        continue
+                    tag = table[regulation_number_index+1]
+                    if tag: 
+                        tag = tag.strip().replace("\n","")
+                        tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
+                    if tag and (tag in scrutinize_tuple):
+                        tag_sign = tag
+                    evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
+                    try:
                         if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
                         else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
-                                        "评分标准":evaluation_criteria.strip().replace("\n",""),
-                                        "权重":weights.strip().replace("\n","")}
-                        scrutinize_dict[tag_sign].append(value)
-                        if '报价' in tag_sign and '评审标准' in tag_sign:
-                            scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
-                            break
-                    pprint(scrutinize_dict)
+                                    "评分标准":evaluation_criteria.strip().replace("\n",""),
+                                    "权重":weights.strip().replace("\n","")}
+                    except:
+                        print()
+                    scrutinize_dict[tag_sign].append(value)
+                    if '报价' in tag_sign and '评审标准' in tag_sign:
+                        scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
+                        break
 
-                    
-        pprint(tag_dict)
-        pprint(bidder_know)
-        return tag_dict,bidder_know
+        # pprint(tag_dict)
+        # pprint(scrutinize_dict)
+        # pprint(bidder_know)
+        return tag_dict,bidder_know,scrutinize_dict
     
     def get_announcement(self)->str:
         ''' bidder announcement