KG数据接入.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. # !/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: sprivacy
  4. # @Date: 2022-05-05 10:18:56
  5. # @Last Modified by: privacy
  6. # @Last Modified time: 2022-10-13 14:23:31
  7. import sys
  8. import time
  9. import json
  10. from hashlib import md5
  11. import requests
  12. from pprint import pprint
  13. import pandas as pd
  14. # 各种知识集构建
  15. global cookie, base_url, willpush, pushed, pushlen
  16. willpush = True
  17. cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
  18. base_url = 'http://172.16.128.159:8284/'
  19. pushed = []
  20. pushlen = 0
  21. def post_json(json_obj, token="1664516531417"):
  22. """
  23. 将json数据提交到push接口
  24. :param json_obj:
  25. :return:
  26. """
  27. global willpush, pushed, pushlen
  28. # 是否已推送
  29. if json_obj['@id'] in pushed:
  30. print(len(pushed))
  31. return ''
  32. # 推送
  33. pushed.append(json_obj['@id'])
  34. url = base_url + "data/api/access/push"
  35. headers = {
  36. "token": token,
  37. "Cookie": cookie
  38. }
  39. # print(url)
  40. if pushlen <= 500:
  41. return ""
  42. else:
  43. response = requests.post(url, json=json_obj, headers=headers)
  44. result = response.text
  45. print(result)
  46. pass
  47. if json_obj["@type"] == "相关机构":
  48. print(json_obj)
  49. time.sleep(0.1)
  50. return ""
  51. def main():
  52. global pushlen
  53. df = pd.read_excel('../xxx3.xlsx', sheet_name='Sheet1')
  54. df = df.fillna(value="")
  55. df['id'] = df['id'].apply(str)
  56. df['当前年薪(单位:万)'] = df['当前年薪(单位:万)'].apply(str)
  57. df['意向年薪(单位:万)'] = df['意向年薪(单位:万)'].apply(str)
  58. df['工作年限'] = df['工作年限'].apply(str)
  59. df['年龄'] = df['年龄'].apply(str)
  60. for row in df.iloc:
  61. edu_list = []
  62. job_list = []
  63. pro_list = []
  64. tra_list = []
  65. org_list = []
  66. orgs = []
  67. for item in row[22].split():
  68. cols = item.split('/')
  69. if len(cols) > 3:
  70. if cols[1] not in orgs:
  71. org_list.append({"@value":cols[1]})
  72. orgs.append(cols[1])
  73. edu_obj = {
  74. "id": row[0],
  75. "@type": "教育经历demo",
  76. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  77. "@contentType": "struct",
  78. "@markdel": "0",
  79. "name": [{"@value": item}],
  80. "时间": [{"@value":cols[0]}],
  81. "学校": [{"@value":cols[1]}],
  82. "专业": [{"@value":cols[2]}],
  83. "学历": [{"@value":cols[-1]}]
  84. }
  85. edu_list.append({"@value": item})
  86. post_json({
  87. "@type": "相关机构",
  88. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
  89. "@contentType": "struct",
  90. "@markdel": "0",
  91. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
  92. "name": [{"@value":cols[1]}],
  93. })
  94. post_json(edu_obj)
  95. for item in row[23].split():
  96. cols = item.split('/')
  97. if len(cols) > 3:
  98. if cols[1] not in orgs:
  99. org_list.append({"@value":cols[1]})
  100. orgs.append(cols[1])
  101. job_obj = {
  102. "id": row[0],
  103. "@type": "工作经历demo",
  104. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  105. "name": [{"@value": item}],
  106. "@markdel": "0",
  107. "@contentType": "struct",
  108. "时间": [{"@value":cols[0]}],
  109. "公司": [{"@value":cols[1]}],
  110. "行业": [{"@value":cols[2]}],
  111. "职位": [{"@value":cols[3]}],
  112. "工作内容": [{"@value":cols[-1]}]
  113. }
  114. job_list.append({"@value": item})
  115. post_json({
  116. "@type": "相关机构",
  117. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  118. "@contentType": "struct",
  119. "@markdel": "0",
  120. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  121. "name": [{"@value":cols[1]}]
  122. })
  123. post_json(job_obj)
  124. for item in row[24].split():
  125. cols = item.split('/')
  126. if len(cols) > 4:
  127. if cols[1] not in orgs:
  128. org_list.append({"@value":cols[1]})
  129. orgs.append(cols[1])
  130. pro_obj = {
  131. "id": row[0],
  132. "@type": "项目经历demo",
  133. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  134. "name": [{"@value": item}],
  135. "@markdel": "0",
  136. "@contentType": "struct",
  137. "时间": [{"@value":cols[0]}],
  138. "公司": [{"@value":cols[1]}],
  139. "项目": [{"@value":cols[2]}],
  140. "职位": [{"@value":cols[3]}],
  141. "成果": [{"@value":cols[-1]}]
  142. }
  143. pro_list.append({"@value": item})
  144. post_json({
  145. "@type": "相关机构",
  146. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  147. "@contentType": "struct",
  148. "@markdel": "0",
  149. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  150. "name": [{"@value":cols[1]}]
  151. })
  152. post_json(pro_obj)
  153. for item in row[34].split():
  154. cols = item.split('/')
  155. if len(cols) > 3:
  156. if cols[1] not in orgs:
  157. org_list.append({"@value":cols[1]})
  158. orgs.append(cols[1])
  159. tra_obj = {
  160. "id": row[0],
  161. "@type": "培训和海外经历demo",
  162. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  163. "name": [{"@value":item}],
  164. "@markdel": "0",
  165. "@contentType": "struct",
  166. "时间": [{"@value":cols[0]}],
  167. "培训机构": [{"@value":cols[1]}],
  168. "培训名称": [{"@value":cols[2]}],
  169. "培训内容": [{"@value":cols[-1]}]
  170. }
  171. tra_list.append({"@value": item})
  172. post_json({
  173. "@type": "相关机构",
  174. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  175. "@contentType": "struct",
  176. "@markdel": "0",
  177. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  178. "name": [{"@value":cols[1]}]
  179. })
  180. post_json(tra_obj)
  181. # break
  182. json_obj = {
  183. "@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID
  184. "id": row[0],# 自增 id
  185. "@type": "人才特征demo",# 数据类目
  186. "name": [{"@value":row[1]}],# 实体名称 消歧
  187. "@markdel": '0',# 写入/删除
  188. "@contentType": "struct",# 资源类型
  189. "姓名": [{"@value":row[1]}],
  190. "性别": [{"@value":row[2]}],
  191. "出生年月": [{"@value":row[3]}],
  192. "婚姻状况": [{"@value":row[4]}],
  193. "特长爱好": [{"@value":row[5]}],
  194. "手机号码": [{"@value":row[6]}],
  195. "电子邮箱": [{"@value":row[7]}],
  196. "当前最高学历": [{"@value":row[8]}],
  197. "当前最高学历专业": [{"@value":row[9]}],
  198. "研究领域": [{"@value":row[10]}],
  199. "人才标签": [{"@value":row[11]}],
  200. "人才特点": [{"@value":row[12]}],
  201. "参加工作时间": [{"@value":row[13]}],
  202. "政治面貌": [{"@value":row[14]}],
  203. "当前所在城市": [{"@value":row[15]}],
  204. "当前行业": [{"@value":row[16]}],
  205. "当前职位": [{"@value":row[17]}],
  206. "当前年薪": [{"@value":row[18]}],
  207. "意向城市": [{"@value":row[19]}],
  208. "意向职位": [{"@value":row[20]}],
  209. "意向年薪": [{"@value":row[21]}],
  210. "教育经历": edu_list,
  211. "工作经历": job_list,
  212. "项目经历": pro_list,
  213. "语言能力": [{"@value":row[25]}],
  214. "专业证书": [{"@value":row[26]}],
  215. "技术职称": [{"@value":row[27]}],
  216. "入选人才": [{"@value":row[28]}],
  217. "知识产权": [{"@value":row[29]}],
  218. "获得荣誉及证明": [{"@value":row[30]}],
  219. "备注信息": [{"@value":row[31]}],
  220. "对报名岗位认识及工作设想": [{"@value":row[32]}],
  221. "自我评价及主要工作业绩": [{"@value":row[33]}],
  222. "培训和海外经历": tra_list,
  223. "当前公司": [{"@value":row[35]}],
  224. "毕业院校分类": [{"@value":row[36]}],
  225. "工作年限": [{"@value":row[37]}],
  226. "专业方向大类": [{"@value":row[38]}],
  227. "最高学历学校": [{"@value":row[39]}],
  228. "研究领域分类": [{"@value":row[40]}],
  229. "报名岗位": [{"@value":row[41]}],
  230. "年龄": [{"@value":row[42]}],
  231. "相关机构": org_list,
  232. }
  233. post_json({
  234. "@type": "相关机构",
  235. "@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
  236. "@contentType": "struct",
  237. "@markdel": "0",
  238. "id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
  239. "name": [{"@value":row[35]}]
  240. })
  241. post_json({
  242. "@type": "相关机构",
  243. "@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
  244. "@contentType": "struct",
  245. "@markdel": "0",
  246. "id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
  247. "name": [{"@value":row[39]}],
  248. })
  249. post_json(json_obj)
  250. pushlen += 1
  251. if pushlen >= 600:
  252. break
  253. if __name__ == '__main__':
  254. main()