# -*- coding: utf-8 -*- # @Author: privacy # @Date: 2023-12-25 10:19:57 # @Last Modified by: privacy # @Last Modified time: 2024-02-19 11:43:01 import pandas as pd from urllib import parse all_keys = [] # 解析URL def parse_url(url): query_string = parse.urlparse(url) return query_string.scheme, query_string.netloc, query_string.path, query_string.params, query_string.fragment, parse.parse_qs(query_string.query) # 解析路径 def parse_path(url): path_string = parse.urlparse(url).path return path_string # 解析appCode参数 def parse_appCode(url): query_string = parse.urlparse(url).query appCode = parse.parse_qs(query_string).get('appCode') if appCode: return appCode[0] # 解析内嵌页URL def parse_iframeUrl(url): fragment_string = parse.urlparse(url).fragment iframe_url = parse.parse_qs(fragment_string).get('url') iframe_from = parse.parse_qs(fragment_string).get('from') if iframe_url: path = parse.urlparse(iframe_url[0]).path if path and iframe_from: return path, iframe_from[0] elif path: return path, None elif iframe_from: return None, iframe_from[0] # 解析appcontext def parse_appcontext(url): query_string = parse.urlparse(url).query appcontext = parse.parse_qs(query_string).get('appcontext') if appcontext: return appcontext[0] # 解析编辑参数 def parse_Edit(url): query_string = parse.urlparse(url).query isEdit = parse.parse_qs(query_string).get('isEdit') if isEdit: return isEdit[0] # 解析查询参数 def parse_Query(url): query_string = parse.urlparse(url).query isQuery = parse.parse_qs(query_string).get('isQuery') if isQuery: return isQuery[0] # 解析编辑标签 def parse_editFlag(url): query_string = parse.urlparse(url).query editFlag = parse.parse_qs(query_string).get('editFlag') if editFlag: return editFlag[0] """ # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基建管理应用") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="数字供应链") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="合同管理") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="安全生产") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="创新管理") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基建智慧工程") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="并网服务管理") # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基础应用") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="计划预算管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="成本管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="资金管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="核算管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="报账管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="工程财务管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="资产价值管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="物资财务管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="价格管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="税务管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="会计档案") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="共享服务") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="报表管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="综合管理") # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="基础应用") df['tag'] = df['Unnamed: 1'].apply(lambda x: x.split()[2]) df['url'] = df['Unnamed: 1'].apply(lambda x: x.split()[1]) del df['Unnamed: 0'] del df['Unnamed: 1'] df.drop(df[df['tag'] == 'undefined'].index, inplace=True) df.to_excel("temp.xlsx", sheet_name='Sheet1') """ # df['tag'] = df['url'].apply(lambda x: x.split("/")[-1].split("=")[-1]) """ df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="资产域") del df['域'] df['domain'] = '资产域' df['path'] = df['url'].apply(lambda x: parse_path(x)) df['appCode'] = df['url'].apply(lambda x: parse_appCode(x)) del df['url'] print(df) df.to_json('资产域.json', orient='records', lines=True, force_ascii=False) """ """ df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="财务域") df['domain'] = '财务域' df['path'] = df['url'].apply(lambda x: parse_path(x)) df['appCode'] = df['url'].apply(lambda x: parse_appCode(x)) del df['url'] print(df) df.to_json('财务域.json', orient='records', lines=True, force_ascii=False) """ """ df = pd.read_excel("营销域关联字段查询表.xlsx", sheet_name="营销域") df['一级标题'] = df['一级标题'].apply(lambda x: x.strip('\'')) df['二级标题'] = df['二级标题'].apply(lambda x: x.strip('\'')) df['三级标题'] = df['三级标题'].apply(lambda x: x.strip('\'')) df['四级标题'] = df['四级标题'].apply(lambda x: x.strip('\'') if isinstance(x, str) else None) df['url'] = df['url'].apply(lambda x: x.strip('\'')) df['domain'] = '营销域' df['path'] = df['url'].apply(lambda x: parse_path(x)) df['appcontext'] = df['url'].apply(lambda x: parse_appcontext(x)) df['isEdit'] = df['url'].apply(lambda x: parse_Edit(x)) df['editFlag'] = df['url'].apply(lambda x: parse_editFlag(x)) df['isQuery'] = df['url'].apply(lambda x: parse_Query(x)) print(df) df.to_json('营销域.json', orient='records', lines=True, force_ascii=False) """ """ df = pd.read_excel("人资域关联字段查询表.xlsx", sheet_name="Sheet1") df['domain'] = '人资域' df['netloc'] = '10.10.21.23' df['path'] = df['url'].apply(lambda x: parse_path(x)) df['appCode'] = df['url'].apply(lambda x: parse_appCode(x)) df['iframe'] = df['url'].apply(lambda x: parse_iframeUrl(x)) del df['url'] print(df) df.to_json('人资域.json', orient='records', lines=True, force_ascii=False) """