preprocess.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. # -*- coding: utf-8 -*-
  2. # @Author: privacy
  3. # @Date: 2023-12-25 10:19:57
  4. # @Last Modified by: privacy
  5. # @Last Modified time: 2024-01-12 14:26:31
  6. import pandas as pd
  7. from urllib import parse
  8. all_keys = []
  9. def parse_url(url):
  10. query_string = parse.urlparse(url)
  11. return query_string.scheme, query_string.netloc, query_string.path, query_string.params, query_string.fragment, parse.parse_qs(query_string.query)
  12. def parse_path(url):
  13. path_string = parse.urlparse(url).path
  14. return path_string
  15. def parse_appCode(url):
  16. query_string = parse.urlparse(url).query
  17. appCode = parse.parse_qs(query_string).get('appCode')
  18. if appCode:
  19. return appCode[0]
  20. def parse_iframeUrl(url):
  21. fragment_string = parse.urlparse(url).fragment
  22. iframe_url = parse.parse_qs(fragment_string).get('url')
  23. iframe_from = parse.parse_qs(fragment_string).get('from')
  24. if iframe_url:
  25. path = parse.urlparse(iframe_url[0]).path
  26. if path and iframe_from:
  27. return path, iframe_from[0]
  28. elif path:
  29. return path, None
  30. elif iframe_from:
  31. return None, iframe_from[0]
  32. def parse_appcontext(url):
  33. query_string = parse.urlparse(url).query
  34. appcontext = parse.parse_qs(query_string).get('appcontext')
  35. if appcontext:
  36. return appcontext[0]
  37. def parse_Edit(url):
  38. query_string = parse.urlparse(url).query
  39. isEdit = parse.parse_qs(query_string).get('isEdit')
  40. if isEdit:
  41. return isEdit[0]
  42. def parse_Query(url):
  43. query_string = parse.urlparse(url).query
  44. isQuery = parse.parse_qs(query_string).get('isQuery')
  45. if isQuery:
  46. return isQuery[0]
  47. def parse_editFlag(url):
  48. query_string = parse.urlparse(url).query
  49. editFlag = parse.parse_qs(query_string).get('editFlag')
  50. if editFlag:
  51. return editFlag[0]
  52. """
  53. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基建管理应用")
  54. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="数字供应链")
  55. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="合同管理")
  56. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="安全生产")
  57. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="创新管理")
  58. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基建智慧工程")
  59. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="并网服务管理")
  60. # df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="基础应用")
  61. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="计划预算管理")
  62. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="成本管理")
  63. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="资金管理")
  64. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="核算管理")
  65. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="报账管理")
  66. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="工程财务管理")
  67. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="资产价值管理")
  68. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="物资财务管理")
  69. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="价格管理")
  70. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="税务管理")
  71. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="会计档案")
  72. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="共享服务")
  73. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="报表管理")
  74. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="综合管理")
  75. # df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="基础应用")
  76. df['tag'] = df['Unnamed: 1'].apply(lambda x: x.split()[2])
  77. df['url'] = df['Unnamed: 1'].apply(lambda x: x.split()[1])
  78. del df['Unnamed: 0']
  79. del df['Unnamed: 1']
  80. df.drop(df[df['tag'] == 'undefined'].index, inplace=True)
  81. df.to_excel("temp.xlsx", sheet_name='Sheet1')
  82. """
  83. # df['tag'] = df['url'].apply(lambda x: x.split("/")[-1].split("=")[-1])
  84. """
  85. df = pd.read_excel("资产域关联字段查询表.xlsx", sheet_name="资产域")
  86. del df['域']
  87. df['domain'] = '资产域'
  88. df['path'] = df['url'].apply(lambda x: parse_path(x))
  89. df['appCode'] = df['url'].apply(lambda x: parse_appCode(x))
  90. del df['url']
  91. print(df)
  92. df.to_json('资产域.json', orient='records', lines=True, force_ascii=False)
  93. """
  94. """
  95. df = pd.read_excel("财务域关联字段查询表.xlsx", sheet_name="财务域")
  96. df['domain'] = '财务域'
  97. df['path'] = df['url'].apply(lambda x: parse_path(x))
  98. df['appCode'] = df['url'].apply(lambda x: parse_appCode(x))
  99. del df['url']
  100. print(df)
  101. df.to_json('财务域.json', orient='records', lines=True, force_ascii=False)
  102. """
  103. """
  104. df = pd.read_excel("营销域关联字段查询表.xlsx", sheet_name="营销域")
  105. df['一级标题'] = df['一级标题'].apply(lambda x: x.strip('\''))
  106. df['二级标题'] = df['二级标题'].apply(lambda x: x.strip('\''))
  107. df['三级标题'] = df['三级标题'].apply(lambda x: x.strip('\''))
  108. df['四级标题'] = df['四级标题'].apply(lambda x: x.strip('\'') if isinstance(x, str) else None)
  109. df['url'] = df['url'].apply(lambda x: x.strip('\''))
  110. df['domain'] = '营销域'
  111. df['path'] = df['url'].apply(lambda x: parse_path(x))
  112. df['appcontext'] = df['url'].apply(lambda x: parse_appcontext(x))
  113. df['isEdit'] = df['url'].apply(lambda x: parse_Edit(x))
  114. df['editFlag'] = df['url'].apply(lambda x: parse_editFlag(x))
  115. df['isQuery'] = df['url'].apply(lambda x: parse_Query(x))
  116. print(df)
  117. df.to_json('营销域.json', orient='records', lines=True, force_ascii=False)
  118. """
  119. """
  120. df = pd.read_excel("人资域关联字段查询表.xlsx", sheet_name="Sheet1")
  121. df['domain'] = '人资域'
  122. df['netloc'] = '10.10.21.23'
  123. df['path'] = df['url'].apply(lambda x: parse_path(x))
  124. df['appCode'] = df['url'].apply(lambda x: parse_appCode(x))
  125. df['iframe'] = df['url'].apply(lambda x: parse_iframeUrl(x))
  126. del df['url']
  127. print(df)
  128. df.to_json('人资域.json', orient='records', lines=True, force_ascii=False)
  129. """