1 anno fa · 048c038cae
--- a/document_.py
+++ b/document_.py
@@ -1,7 +1,5 @@
 
				 '''
			
 
				 招投标文件预审查
			
 
				-
			
 
				-1. 解析bidding_document_extract中all_tables.json结果
			
 
				 '''
			
 
				 from tools import BaseMethods
			
 
				 from pprint import pprint
			
@@ -32,7 +30,7 @@ class DocumentPreReview():
 
				         self.bidding_context = self.get_bidding_context()
			
 
				         self.chinese_num_map = chinese_num_map
			
 
				 
			
 
				-    def get_contexts(self, file_path:str = 'code/bidding_document_extract/contexts.json'):
			
 
				+    def get_contexts(self, file_path:str = 'data/预审查数据/contexts.json'):
			
 
				         ''' get contexts by page
			
 
				         '''
			
 
				         contexts = self.bm.json_read(file_path)
			
@@ -41,27 +39,39 @@ class DocumentPreReview():
 
				     def get_bidding_table(self):
			
 
				         ''' get table data
			
 
				         '''
			
 
				-        file_path = "code/bidding_document_extract/all_tables.json"
			
 
				-        # file_path = "code/bidding_document_extract/all_tables_三峡左右岸.json"
			
 
				+        file_path = "data/预审查数据/Bidding_tables_2022-2025年度三峡电站9台机组检修密封加工制作重新招标招标文件印刷版.json"
			
 
				         all_tables = self.bm.json_read(file_path)
			
 
				         return all_tables
			
 
				     
			
 
				     def get_bidding_context(self):
			
 
				         ''' read json to get context
			
 
				         '''
			
 
				-        file_path = "code/bidding_document_extract/基于物联网技术的三峡坝区智慧仓储研究与建设招标文件-发出.json"
			
 
				+        file_path = "data/预审查数据/基于物联网技术的三峡坝区智慧仓储研究与建设招标文件-发出.json"
			
 
				         bidding_context = self.bm.json_read(file_path)
			
 
				         return bidding_context
			
 
				+    
			
 
				+    def _scrutinize_judge(self, tag:str):
			
 
				+        ''' Clause number content judgment 
			
 
				+            商务 技术 报价 评审 评分 标准
			
 
				+        '''
			
 
				+        scrutinize_tuple = ("商务","技术","报价","评审","评分","标准")
			
 
				+        hit_num = 0
			
 
				+        for scru in scrutinize_tuple:
			
 
				+            if scru in tag: hit_num+= 1
			
 
				+        if hit_num>=3: return True
			
 
				+        else: return False
			
 
				 
			
 
				     def get_table(self):
			
 
				-        ''' get table to json
			
 
				+        ''' parse the tables.json file to get the table data from it.
			
 
				         '''
			
 
				         all_tables = self.bidding_tables
			
 
				 
			
 
				+        # 招标文件内容中预审查
			
 
				         tag_sign = ''
			
 
				         tag_list = ("形式评审标准", "资格评审标准", "响应性评审标准")
			
 
				         tag_dict = dict([(tag,[]) for tag in tag_list])
			
 
				         
			
 
				+        # 招标文件内容中清标表格数据
			
 
				         scrutinize_tuple = ("商务部分评分标准","技术部分评审标准","投标报价评审标准","报价部分评审标准","报价评分标准")
			
 
				         scrutinize_dict = dict([(scrutinize,[]) for scrutinize in scrutinize_tuple])
			
 
				         scrutinize_page = 0
			
@@ -80,8 +90,11 @@ class DocumentPreReview():
 
				                 record_page = page_number[0]
			
 
				             if page_number[0] < record_page + 3: 
			
 
				                 for table in tables[1:]:
			
 
				-                    if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
			
 
				-                    if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
			
 
				+                    try:
			
 
				+                        if table[0] and table[0] not in bidder_know: bidder_know[table[0]] = []
			
 
				+                        if table[0]: bidder_know[table[0]].append({"条款名称":table[1],"编列内容":table[2]})
			
 
				+                    except:
			
 
				+                        print()
			
 
				                 
			
 
				             if '评标方法' in table_name:
			
 
				                 table_name = table_name.strip().replace("\n","")
			
@@ -96,7 +109,7 @@ class DocumentPreReview():
 
				                 for table in tables[1:]:
			
 
				                     tag = table[regulation_number_index+1]
			
 
				                     if tag: tag = tag.strip().replace("\n","")
			
 
				-                    if tag and (tag in tag_list):
			
 
				+                    if tag and self._scrutinize_judge(tag):
			
 
				                         tag_sign = tag
			
 
				                     evaluation_factor,evaluation_criteria = table[evaluation_factor_index],table[evaluation_criteria_index]
			
 
				                     if tag_sign in tag_dict: 
			
@@ -125,8 +138,9 @@ class DocumentPreReview():
 
				                         if tag: 
			
 
				                             tag = tag.strip().replace("\n","")
			
 
				                             tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
			
 
				-                        if tag and (tag in scrutinize_tuple):
			
 
				+                        if tag and self._scrutinize_judge(tag):
			
 
				                             tag_sign = tag
			
 
				+                            if tag_sign not in scrutinize_dict: scrutinize_dict[tag_sign] = []
			
 
				                         evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
			
 
				                         if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""),"评分标准":evaluation_criteria.strip().replace("\n","")}
			
 
				                         else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
			
@@ -149,8 +163,9 @@ class DocumentPreReview():
 
				                     if tag: 
			
 
				                         tag = tag.strip().replace("\n","")
			
 
				                         tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
			
 
				-                    if tag and (tag in scrutinize_tuple):
			
 
				+                    if tag and self._scrutinize_judge(tag):
			
 
				                         tag_sign = tag
			
 
				+                        if tag_sign not in scrutinize_dict: scrutinize_dict[tag_sign] = []
			
 
				                     evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
			
 
				                     if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
			
 
				                     else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
			
@@ -170,23 +185,21 @@ class DocumentPreReview():
 
				                     if tag: 
			
 
				                         tag = tag.strip().replace("\n","")
			
 
				                         tag = re.findall("[\u4e00-\u9fff]+", tag)[0]
			
 
				-                    if tag and (tag in scrutinize_tuple):
			
 
				+                    if tag and self._scrutinize_judge(tag):
			
 
				                         tag_sign = tag
			
 
				+                        if tag_sign not in scrutinize_dict: scrutinize_dict[tag_sign] = []
			
 
				                     evaluation_factor,evaluation_criteria,weights = table[evaluation_factor_index],table[evaluation_criteria_index],table[weights_index]
			
 
				-                    try:
			
 
				-                        if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
			
 
				-                        else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
			
 
				-                                    "评分标准":evaluation_criteria.strip().replace("\n",""),
			
 
				-                                    "权重":weights.strip().replace("\n","")}
			
 
				-                    except:
			
 
				-                        print()
			
 
				+                    if not weights: value = {"评分因素":evaluation_factor.strip().replace("\n",""), "评分标准":evaluation_criteria.strip().replace("\n","")}
			
 
				+                    else: value = {"评分因素":evaluation_factor.strip().replace("\n",""),
			
 
				+                                "评分标准":evaluation_criteria.strip().replace("\n",""),
			
 
				+                                "权重":weights.strip().replace("\n","")}
			
 
				                     scrutinize_dict[tag_sign].append(value)
			
 
				                     if '报价' in tag_sign and '标准' in tag_sign:
			
 
				                         scrutinize_dict = {key: value for key, value in scrutinize_dict.items() if value}
			
 
				                         break
			
 
				 
			
 
				         # pprint(tag_dict)
			
 
				-        # pprint(scrutinize_dict)
			
 
				+        pprint(scrutinize_dict)
			
 
				         # pprint(bidder_know)
			
 
				         return tag_dict,bidder_know,scrutinize_dict
			
 
				     
			
@@ -260,10 +273,7 @@ class DocumentPreReview():
 
				                     if text == title and format_index == -1:
			
 
				                         format_index = self.bidding_context.index(context)
			
 
				                         break
			
 
				-                    '''
			
 
				-                    不对比目录，只对比内容，只要存在即认定符合要求
			
 
				-                    '''
			
 
				-
			
 
				+                    
			
 
				                 title_index = title_list.index(title)
			
 
				                 if title_index != len(title_list)-1:
			
 
				                     title_next = title_list[title_index+1]
			
@@ -273,8 +283,8 @@ class DocumentPreReview():
 
				                     text = context['text'].strip().replace(" ","")
			
 
				                     if title_next and title_next == text:
			
 
				                         break
			
 
				-                    file_format[title].append(context)
			
 
				-                file_format   # 需要优化提取的内容
			
 
				+                    file_format[title].append(text)
			
 
				+                pprint(file_format)   # 需要优化提取的内容
			
 
				                 '''
			
 
				                 招标文件 file_format 与投标文件内容对比，投标文件中只要存在file_format内容即可
			
 
				                 '''