#coding:utf-8 """ paths = './采购立项/QZCTG_04.04.V3-2019_科研项目标准招标文件(试行).docx' # paths = './采购立项/副本水轮发电机组并网监测及同期过程状态评价系统研制采购技术规范(102-CYPC-102-004329-2024-1-A)-1 (3).docx' paths = './采购立项/结构化-服务类竞争性谈判文件(标准版)(试行)-采购公告-综合评分法.docx' csv = '数据.xlsx' from docx import Document # 加载Word文档 doc = Document(paths) beg_pag = '' # 读取并打印每段的内容 for para in doc.paragraphs[:200]: # if '_' in para.text: # print(para.text) beg = '' end = '' flag = 0 index = 0 for run in para.runs: index = index + 1 # 打印文本以及其格式 #print(run.text, run.font.name, run.font.size, run.underline) if run.underline: print('run',run.text) flag = 1 continue if flag == 1: if index <=2 and beg == 0: print(run.text, '*****', beg_pag) else: print(run.text, '----', beg) flag = 0 beg = run.text # # 如果run包含域 # if run.fields: # for field in run.fields: # print(f"域名称: {field.name}, 文本: {field.text}") if flag == 1: print('----', beg) beg_pag = para.text """ import re import Levenshtein import requests import json import re import Levenshtein import json aaa = """|序号|项目编码|项目名称|项目特征|单位|工程量|全费用综合单价|合价|备注| |1 |0116050020 01 |踢脚线拆除|1、瓷砖踢脚线,高度15-20cm;2、含砂浆凿除,凿除至墙面基层。|m2 |2000 |14.2 |28400 || |2 |0116050010 01 |地砖及砂浆层凿除|1、施工部位:非设备区域;2、与非施工区域地面交界处做好地砖保护,破坏地砖进行恢复。3 、地砖及砂浆层凿除,凿除至混凝土基层,厚度约5-8cm,以现场实际为准。4、凿除设备和施工方法需考虑降尘,以及震动对电站设备的影响。|m2 |20000 |14.2 |284000 || |3 |0116050010 02 |地砖凿除|1、施工部位:设备区域及周边;2、仅凿除地砖,厚度为1-2cm,以现场实际为准。|m2 |1000 |14.2 |14200 || |4 |0101030020 01 |垃圾清理外运|1、垃圾类型:凿除的地砖、踢脚线及砂浆层、地面铣刨层;2、建筑垃圾场内转运及外运运距由承包方自行考虑,外运地点需满足环保要求,并告知发包方;3、含垃圾消纳费。|m3 |1743 |113.36 |197586.48 || |5 |0116020020 5 01 |基面铣刨|1、地面整体铣刨,深度≥3mm;2、使用无尘设备,考虑震动对电气设备的影响。|m2 |20000 |8.18 |163600 || |6 |0111020030 01 |预制无机磨石施工|1、成品预制无机磨石,颜色与无机磨石整体施工相一致,厚度大于等于20mm;2、预制磨石铺贴,2cm干混地面砂浆DS M20;3、不低于西卡、绰琪、迈拓品牌。具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m2 |1000 |621.57 |624570 || |7 |0115020010 01 |护边角钢|1、部位:地坪收边;2、304哑光不锈钢角钢;3、不锈钢角钢规格为50×50×3mm;具体详见图纸及技术方案,材料垂直运输及二次转运由承包人自行考|m |2000 |171.13 |342260|| ||||虑|||||| |8 |0111010010 01 |抗裂砂浆垫层|1、部位:不具备钢纤维混凝土运输条件的部位;2、成品抗裂砂浆垫层(非现场搅拌砂浆后添加外加剂),具备低收缩早强快干性能;3、成品焊接钢筋网片直径8mm、间距150mm×150mm;4、厚度约5cm,以实际情况为准,成品品牌与无机磨石一致。具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m2 |7000 |150.42 |1052940 || |9 |0111010030 01 |C25混凝土|1、C25细石商品混凝土垫层,厚度5-8cm,以现场实际为准;2、覆膜养护具体详见图纸及技术方案,材 料垂直运输及二次转运由承包人自行考虑|m2 |14000 |68.6 |960400 || |10 |01B004 |钢纤维|1、钢纤维于拌合站添加至混凝土内,掺量为每立方米混凝土40-50kg 钢纤维;2、抗拉强度不低于380MPa;具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|t |50 |6780 |339000 || |11 |0111010020 01 |无机磨石|1、无机磨石地坪,厚度≥10mm,骨料粒径6-8mm,骨料以石英石为主;2、防污抗渗罩面。3、铝镁分隔 条(包含墙脚分隔条);4、无机磨石体系尺寸变化率不大于0.05%,莫氏硬度6级及以上,抗折强度不小于6Mpa,抗压强度不小于30Mpa,无机磨石表面平整度2 米靠尺不大于2mm。5、不低于绰琪鑫磨石、迈拓FREAZZO自然岩采系列、西卡Sikafloor 55 Terrazzo CM系列品牌 。具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m2 |20000 |519.77 |10395400 || |12 |0111050060 01 |不锈钢踢脚线|1、施工部位:电站厂房;2、成品哑光拉丝304不锈钢踢脚线,面板整体厚度≥1mm,高度15cm;3、专用卡扣固定,每米不少于两个;4、含成品不锈钢阴阳角线及封头等配件。具体详见技术方案,材料垂直运输及二次转运由承包人自行 考虑|m |10000 |105.73 |1057300 || |13 |0112030010 01 |墙面零星抹灰|1、部位:原踢脚线拆除后的墙面;2、1.20厚干混抹灰砂浆DP M10抹灰具体详见技术方案,材料垂 直运输及二次转运由承包人自行考虑|m2 |2000 |47.96 |95920 || |14 |0114060010 01 |墙面乳胶漆恢复|1、部位:原踢脚线拆除后的墙面;2、墙面修补,抗碱底漆二遍、内墙腻子二遍、面漆二遍。3、不低于多乐士、立邦、华润等产品性能参数。具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m2 |500 |22.3 |11150 || |15 |0402050060 15 01 |定置管理划线(环氧地坪漆)|1. 部位:设备周边、通道周边等其他地面;2. 划线宽度100mm,底漆二遍、中漆一遍、面漆二遍;3.环氧地坪漆不低于巴斯夫、马贝、西卡等产品性能参数;具体详见图纸及技术方案,材料垂直运输及二次转运由承包人自行考虑|㎡|400 |75.8 |30320 || |16 |0109040040 01 |成品地面结构缝装置|1、施工部位:地坪结构缝;2、成品承重型不锈钢结构缝装置,宽度15cm;3、盖板材质:铝合金或者不锈钢面板厚度不小于3mm;4、不锈钢基座,不锈钢滑杆@500,M8金属膨胀螺栓@300(交错排列),3mm不锈钢止水带,阻火带 、填缝胶具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m |1000 |178 |178000 || |17 |01B002 |设备防护措施|设备防护措施1、部分施工区域有电气盘柜,施工过程包含防尘、防火、防水、防碰撞、防飞溅、防污染、 防较大震动等防护;2、地面凿除及地坪施工时,对电气设备使用彩钢板、油布等材料做好防护。3、防护方案由承包人报发包方审批。具体详见技术方案,材料垂直运输及二次转运由承包人自行考虑|m |5000 |14.1 |70500 || |||合计(其中,安全文明施工费157500元。不含垃圾清运费用)|合计(其中,安全文明施工费157500元。不含垃圾清运费用)|合计( 其中,安全文明施工费157500元。不含垃圾清运费用)|合计(其中,安全文明施工费157500元。不含垃圾清运费用)|合计(其中,安全文明施工费157500元。不含垃圾清运费用)|||""" aaa2 = """|序号|项目名称|含税报价(元)|其中|其中|其中| |序号|项目名称|含税报价(元)|增值税税率(%)|增值税金额(元)|安全文明施工费(元)| |1 |三峡左岸电站及地下电站地坪整治|15845546.48 |9% |1426099.18 |157500 |""" aaa3 = """|序号|项目名称|服务内容|单位|数量|税率|不含税价格|含税合价|备注|\n|1|建设基础调研|(1)针对国内外物联网技术的应用现状及发展方向进行调研分析;(2)针对公司核心业务现状进行调研分析;(3)针对公司物联网技术应用现状进行调研分析;|项|1|6%|291000|308460|详见表2:人工费分项报价表|\n|2|设计需求分析|(1)面向长江电力及所属单位(公司)用户开展业务需求调研分析;(2)针对物联网及物联管理平台的应用等开展需求分析。|项|1|6%|255000|270300|详见表2:人工费分项报价表|\n|3|物联网技术应用路线设计|结合调研及需求分析情况,明确公司物联网的建设技术路线,完成物联网技术的技术路线、应用实施、安全管理、运维保障等内容的设计,形成设计方案。|项|1|6%|425000|450500|详见表2:人工费分项报价表|\n|4|物联管理平台详细设计|根据物联网技术应用路线设计,开展长江电力物联管理平台详细设计,包括管理平台系统架构、功能架构、部署方式以及相应接口、数据、模型、网络安全等内容,形成相应的技术标准及详细设计方案,满足长江电力物联网技术应用需求。|项|1|6%|510000|540600|详见表2:人工费分项报价表|\n|5|典型应用场景物联网建设设计|针对典型应用场景,基于网络基础设施现状,对前端物联感知设备选型、安装、网络通信链路的建设、后端应用系统的开发、部署以及后续运维等方面提出可操作的实施方案,并提供相应的项目预算及相关计算方法|项|1|6%|595000|630700|详见表2:人工费分项报价表|\n|||与依据。|||||||\n|6|组织专家评审|组织方案评审专题会议,邀请至少5名公司外部专家(职称要求至少为大学教授或者相关企业高工)参加方案评审,并提交评审报告。|项|1|6%|100000|106000||\n|7|设计成果落地支持服务|(1)为后续项目的采购、实施以及关键节点验收等提供技术支持;(2)根据后续建设项目具体实施情况,完善建设规范及设计方案等。|||6%|680000|720800|详见表2:人工费分项报价表|\n|合计|合计|合计||||2856000|3027360||""" aaa4="""|等|项目名称|单位|数量|单价(元)|合价(元)|备注|\n|一|工作经费||||24430700||\n|1|劳务费||||18400000||\n|1.1|教授级高工|人日|360|1500|540000|2人×180日|\n|1.2|高级工程师|人日|3000|1200|3600000|10人×300日|\n|1.3|工程师|人日|7500|1000|7500000|15人×500日|\n|1.4|助理工程师|人日|9000|800|7200000|12人×750日|\n|2|咨询费|项|1|364000|364000|支付外部专家咨询费|\n|3|会议费|项|1|1553100|1553100|召开技术讨论、咨询会|\n|4|差旅费|项|1|2853600|2853600|往返交通费、住宿费、出差补贴|\n|5|出版/文献/信息传播/知识产权事务费|项|1|1260000|1260000|论文版面费、资料费等|\n|二|管理费|项|1|1492242|1492242|费率:6%|\n|三|利润|项|1|||费率:%|\n|四|税费|项|1|1581777|1581777|费率:6%|\n|五|合计|元|||27944719|一+二+三+四|""" aaa_list = aaa.split('\n') aaa2_list = aaa2.split('\n') aaa3_list = aaa3.split('\n') aaa4_list = aaa4.split('\n') # if '合计' in aaa2_list[-1]: # aaa2_list = aaa2_list[:-1] # if '合计' in aaa_list[-1]: # aaa_list = aaa_list[:-1] # if '合计' in aaa3_list[-1]: # aaa3_list = aaa3_list[:-1] for i in range(len(aaa2_list)): aaa2_list[i] = re.split('\|', aaa2_list[i][1:-1]) for i in range(len(aaa_list)): aaa_list[i] = re.split('\|', aaa_list[i][1:-1]) for i in range(len(aaa3_list)): aaa3_list[i] = re.split('\|', aaa3_list[i][1:-1]) for i in range(len(aaa4_list)): aaa4_list[i] = re.split('\|', aaa4_list[i][1:-1]) def table_titer(aaa2_list): if not aaa2_list: return aaa2_list beg_flag = 0 # if not '序号' in aaa2_list[0]: # aaa2_list[0][0] = '序号' for i in range(len(aaa2_list)): beg_flag = i if '序号' in aaa2_list[i] or '名称' in str(aaa2_list[i]) or aaa2_list[i][0]==None: continue else: break for j in range(len(aaa2_list[0])): if aaa2_list[0][j] != None: word_name = aaa2_list[0][j] else: word_name = '' for i in range(1, beg_flag): if aaa2_list[i][j] == aaa2_list[i-1][j] or aaa2_list[i][j] == None: continue else: if aaa2_list[i][j] != None: word_name = word_name + aaa2_list[i][j] # print(word_name) aaa2_list[beg_flag-1][j] = word_name # print(beg_flag) # print(aaa2_list[beg_flag-1:]) return aaa2_list[beg_flag-1:] pdf_aaa2 = [['序号','项目名称','含税报价(元)','其中',None,None], [None,None,None,'增值税税率(%)','增值税金额(元)','安全文明施工费(元)'], ['1','三峡左岸电站及地下电站地坪整治','15845546.48','9%','1426099.18','157500']] # 清标-报价部分表格抽取 # 分项报价表 # 单价 全费用综合单价 # 合价 合价 # 单位 单位 # 数量 数量、工程量、工作量 # 名称 项目名称、名称 # 报价汇总表 # 税率 增值税税率 # 总价 含税总价、含税报价 # 增值税 增值税金额、增值税额 # 不含税总价 不含税合价 # 投标文件 # table 表头,表格跨页 def table_head(aaa_list): table_key = [] aaa_list = table_titer(aaa_list) # print(aaa_list) for i in range(len(aaa_list)): empty_num = 0 for j in range(len(aaa_list[i])): aaa_list[i][j] = aaa_list[i][j].strip() if not aaa_list[i][j]: empty_num = empty_num + 1 # print(i, empty_num, len(aaa_list[i])*0.75) if empty_num > len(aaa_list[i])*0.63: if i == len(aaa_list) - 1 and '合计' in str(aaa_list[i]): continue try: if float(aaa_list[i][0]) - float(aaa_list[i-1][0]) == 1: flag_x = 1 except: flag_x = -1 for j in range(len(aaa_list[i])): if aaa_list[i+flag_x][j] != None: aaa_list[i][j] = aaa_list[i+flag_x][j] + aaa_list[i][j] aaa_list[i+flag_x][j] = '' print('xxxx',aaa_list[i]) return aaa_list print('------------->', table_head(pdf_aaa2)) aaa_list = table_head(aaa_list) aaa2_list = table_head(aaa2_list) aaa3_list = table_head(aaa3_list) aaa4_list = table_head(aaa4_list) index_list = """unitPrice 全费用综合单价、单价、单价(元) totalPrice 合价、含税合价、合价(元) totalPrice2 不含税价格 unit 单位 quantity 数量、工程量、工作量 proNum 序号 projectName 项目名称、名称 taxRate 增值税税率、税率 taxIncludedTotal 含税总价、含税报价 taxExcludedTotal 不含税合价""" key_words = {} for a in index_list.split('\n'): aa = a.split(' ') # print(aa) key_words[aa[0]]=aa[-1].split('、') # 计算ocr识别文本与标准信息的相似度 def text_sim(word, black_lists): _json = {} for word_key in black_lists: # print(word_key.lower(), word) for ww in black_lists[word_key]: if ww == word: return word_key sim_score = Levenshtein.ratio(ww, word) if(sim_score > 0.9): return word_key if not word_key in _json: _json[word_key] = sim_score if word_key in _json and sim_score > _json[word_key]: _json[word_key] = sim_score datas = sorted(_json.items(), key=lambda x:x[1], reverse=True) # print('xx'*10, datas) if datas[0][1] > 0.6: return datas[0][0] else: return '' quote_res_json = { "proNum":"", "projectName": "", "unit": "", "quantity": "", "taxRate": "", "unitPrice": "", "totalPrice": "", "totalPrice2":"" } money_json_list = [] taxRate = '' taxExcludedTotal = '' taxIncludedTotal = '' index_list_json = {} # 报价汇总表 for i in range(len(aaa2_list[0])): # print(aaa2_list[0][i], text_sim(aaa2_list[0][i], key_words)) word_name = text_sim(aaa2_list[0][i], key_words) if word_name: aaa2_list[0][i] = word_name if word_name == 'taxRate' and '%' in aaa2_list[1][i]: taxRate = aaa2_list[1][i] if word_name == 'taxIncludedTotal': taxIncludedTotal = aaa2_list[1][i] if word_name == 'taxExcludedTotal': taxExcludedTotal = aaa2_list[1][i] #分项报价表、工程量清单报价表 def get_money_data(aaa_list, key_words, taxRate): _index_json = {} money_json_list = [] if not '序号' in aaa_list[0]: aaa_list[0][0] = '序号' for i in range(len(aaa_list[0])): # print(aaa_list[0][i], text_sim(aaa_list[0][i], key_words)) word_name = text_sim(aaa_list[0][i], key_words) # print(word_name) if word_name: aaa_list[0][i] = aaa_list[0][i] + ',' + word_name else: continue for j in range(1, len(aaa_list)): empty_num = 0 # print(aaa_list[j]) for x in range(len(aaa_list[j])): if not aaa_list[j][x]: empty_num = empty_num + 1 if empty_num > len(aaa_list[j])*0.9: continue try: money_json_list[_index_json[j]][word_name] = aaa_list[j][i] except: _json = { "proNum":"", "projectName": "", "unit": "", "quantity": "", "taxRate": taxRate, "unitPrice": "", "totalPrice": "", "totalPrice2":"" } _json[word_name] = aaa_list[j][i] _index_json[j] = len(money_json_list) money_json_list.append(_json) # print(_index_json) for money_index in range(len(money_json_list)): if not money_json_list[money_index]['unit']: money_json_list[money_index]['unit'] = '项' if not money_json_list[money_index]['quantity']: money_json_list[money_index]['quantity'] = '1' if not money_json_list[money_index]['unitPrice'] and money_json_list[money_index]['quantity'] == '1': money_json_list[money_index]['unitPrice'] = money_json_list[money_index]['totalPrice'] return money_json_list money_data = get_money_data(aaa_list, key_words, taxRate) # money_data = get_money_data(aaa3_list, key_words, taxRate) money_data = get_money_data(aaa4_list, key_words, taxRate) print(money_data) # print(aaa4_list) if '合计' in money_data[-1]['projectName']: if money_data[-1]['totalPrice']: taxIncludedTotal = money_data[-1]['totalPrice'] if money_data[-1]['totalPrice2']: taxExcludedTotal = money_data[-1]['totalPrice2'] money_data = money_data[:-1] print(money_data) # "calQuoteRes": { # "avgPrice": "28393.96", # "benchmarkPrice": "28393.96", # "suppliers": [ # { # "name": "供应商A", # "taxIncludedTotal": "32915.00", # "taxExcludedTotal": "28829.20", # "deviationRate": "0.01533", # "score": "90.333", # "finalScore": "28.00" # }] cal_quote_res_json = {"suppliers":[],"avgPrice":"","benchmarkPrice":""} supplier = { "name":"", "taxIncludedTotal":"", "taxExcludedTotal":"", "deviationRate":"", "score":"", "finalScore":"" } company_name = '供应商名称A' quote_res_json = { "proNum":"", "projectName": "", "unit": "", "quantity": "", "taxRate": "", "unitPrice": "", "totalPrice": "", "totalPrice2":"" } quote_res_list = [] for money in money_data: quote_res_json = { "proNum":money['proNum'], "projectName": money['projectName'], "unit": money['unit'], "quantity": money['quantity'], "suppliers":[{ "name": company_name, "taxRate": money['taxRate'], "finalQuote": { "unitPrice": money['unitPrice'], "totalPrice": money['totalPrice'] }, "taxCompareRes": { "theoryValue": "", "deviation": "" }, "finalExcludeTaxPrice": { "unitPrice": money['totalPrice2'], "totalPrice": money['totalPrice2'] } }] } quote_res_list.append(quote_res_json) cal_quote_res_json = {"suppliers":[],"avgPrice":"","benchmarkPrice":""} supplier = { "name":company_name, "taxIncludedTotal":taxIncludedTotal, "taxExcludedTotal":taxExcludedTotal, "deviationRate":"", "score":"", "finalScore":"" } cal_quote_res_json['suppliers'].append(supplier) # print(quote_res_list) result_money_data = { "calMethod": "当0