KG数据接入.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. # !/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: sprivacy
  4. # @Date: 2022-05-05 10:18:56
  5. # @Last Modified by: sprivacy
  6. # @Last Modified time: 2022-05-06 16:07:41
  7. import sys
  8. import time
  9. import json
  10. from hashlib import md5
  11. import requests
  12. from pprint import pprint
  13. import pandas as pd
  14. # from unicodedata import name
  15. # from uuid import NAMESPACE_URL
  16. # import locale
  17. # locale.setlocale(locale.LC_CTYPE, 'chinese')
  18. # reload(sys)
  19. # sys.setdefaultencoding("utf-8")
  20. # 各种知识集构建
  21. global cookie, base_url, willpush, pushed, pushlen
  22. willpush = True
  23. cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
  24. base_url = 'http://180.76.188.39:8284/'
  25. pushed = []
  26. pushlen = 0
  27. def post_json(json_obj, token="1654940290763"):
  28. """
  29. 将json数据提交到push接口
  30. :param json_obj:
  31. :return:
  32. """
  33. global willpush, pushed, pushlen
  34. # 是否已推送
  35. if json_obj['@id'] in pushed:
  36. print(len(pushed))
  37. return ''
  38. # 推送
  39. pushed.append(json_obj['@id'])
  40. url = base_url + "data/api/access/push"
  41. headers = {
  42. "token": token,
  43. "Cookie": cookie
  44. }
  45. # print(url)
  46. if pushlen < 300:
  47. return ""
  48. else:
  49. response = requests.post(url, json=json_obj, headers=headers)
  50. result = response.text
  51. print(result)
  52. pass
  53. if json_obj["@type"] == "相关机构":
  54. print(json_obj)
  55. time.sleep(0.1)
  56. return ""
  57. def main():
  58. global pushlen
  59. df = pd.read_excel('xxx3.xlsx', sheet_name='Sheet1')
  60. df = df.fillna(value="")
  61. df['id'] = df['id'].apply(str)
  62. df['当前年薪(单位:万)'] = df['当前年薪(单位:万)'].apply(str)
  63. df['意向年薪(单位:万)'] = df['意向年薪(单位:万)'].apply(str)
  64. df['工作年限'] = df['工作年限'].apply(str)
  65. df['年龄'] = df['年龄'].apply(str)
  66. for row in df.iloc:
  67. edu_list = []
  68. job_list = []
  69. pro_list = []
  70. tra_list = []
  71. org_list = []
  72. orgs = []
  73. for item in row[22].split():
  74. cols = item.split('/')
  75. if len(cols) > 3:
  76. if cols[1] not in orgs:
  77. org_list.append({"@value":cols[1]})
  78. orgs.append(cols[1])
  79. edu_obj = {
  80. "id": row[0],
  81. "@type": "教育经历demo",
  82. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  83. "@contentType": "struct",
  84. "@markdel": "0",
  85. "name": [{"@value": item}],
  86. "时间": [{"@value":cols[0]}],
  87. "学校": [{"@value":cols[1]}],
  88. "专业": [{"@value":cols[2]}],
  89. "学历": [{"@value":cols[-1]}]
  90. }
  91. edu_list.append({"@value": item})
  92. post_json({
  93. "@type": "相关机构",
  94. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
  95. "@contentType": "struct",
  96. "@markdel": "0",
  97. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
  98. "name": [{"@value":cols[1]}],
  99. })
  100. post_json(edu_obj)
  101. for item in row[23].split():
  102. cols = item.split('/')
  103. if len(cols) > 3:
  104. if cols[1] not in orgs:
  105. org_list.append({"@value":cols[1]})
  106. orgs.append(cols[1])
  107. job_obj = {
  108. "id": row[0],
  109. "@type": "工作经历demo",
  110. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  111. "name": [{"@value": item}],
  112. "@markdel": "0",
  113. "@contentType": "struct",
  114. "时间": [{"@value":cols[0]}],
  115. "公司": [{"@value":cols[1]}],
  116. "行业": [{"@value":cols[2]}],
  117. "职位": [{"@value":cols[3]}],
  118. "工作内容": [{"@value":cols[-1]}]
  119. }
  120. job_list.append({"@value": item})
  121. post_json({
  122. "@type": "相关机构",
  123. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  124. "@contentType": "struct",
  125. "@markdel": "0",
  126. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  127. "name": [{"@value":cols[1]}]
  128. })
  129. post_json(job_obj)
  130. for item in row[24].split():
  131. cols = item.split('/')
  132. if len(cols) > 4:
  133. if cols[1] not in orgs:
  134. org_list.append({"@value":cols[1]})
  135. orgs.append(cols[1])
  136. pro_obj = {
  137. "id": row[0],
  138. "@type": "项目经历demo",
  139. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  140. "name": [{"@value": item}],
  141. "@markdel": "0",
  142. "@contentType": "struct",
  143. "时间": [{"@value":cols[0]}],
  144. "公司": [{"@value":cols[1]}],
  145. "项目": [{"@value":cols[2]}],
  146. "职位": [{"@value":cols[3]}],
  147. "成果": [{"@value":cols[-1]}]
  148. }
  149. pro_list.append({"@value": item})
  150. post_json({
  151. "@type": "相关机构",
  152. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  153. "@contentType": "struct",
  154. "@markdel": "0",
  155. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  156. "name": [{"@value":cols[1]}]
  157. })
  158. post_json(pro_obj)
  159. for item in row[34].split():
  160. cols = item.split('/')
  161. if len(cols) > 3:
  162. if cols[1] not in orgs:
  163. org_list.append({"@value":cols[1]})
  164. orgs.append(cols[1])
  165. tra_obj = {
  166. "id": row[0],
  167. "@type": "培训和海外经历demo",
  168. "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
  169. "name": [{"@value":item}],
  170. "@markdel": "0",
  171. "@contentType": "struct",
  172. "时间": [{"@value":cols[0]}],
  173. "培训机构": [{"@value":cols[1]}],
  174. "培训名称": [{"@value":cols[2]}],
  175. "培训内容": [{"@value":cols[-1]}]
  176. }
  177. tra_list.append({"@value": item})
  178. post_json({
  179. "@type": "相关机构",
  180. "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  181. "@contentType": "struct",
  182. "@markdel": "0",
  183. "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
  184. "name": [{"@value":cols[1]}]
  185. })
  186. post_json(tra_obj)
  187. # break
  188. json_obj = {
  189. "@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID
  190. "id": row[0],# 自增 id
  191. "@type": "人才特征demo",# 数据类目
  192. "name": [{"@value":row[1]}],# 实体名称 消歧
  193. "@markdel": '0',# 写入/删除
  194. "@contentType": "struct",# 资源类型
  195. "姓名": [{"@value":row[1]}],
  196. "性别": [{"@value":row[2]}],
  197. "出生年月": [{"@value":row[3]}],
  198. "婚姻状况": [{"@value":row[4]}],
  199. "特长爱好": [{"@value":row[5]}],
  200. "手机号码": [{"@value":row[6]}],
  201. "电子邮箱": [{"@value":row[7]}],
  202. "当前最高学历": [{"@value":row[8]}],
  203. "当前最高学历专业": [{"@value":row[9]}],
  204. "研究领域": [{"@value":row[10]}],
  205. "人才标签": [{"@value":row[11]}],
  206. "人才特点": [{"@value":row[12]}],
  207. "参加工作时间": [{"@value":row[13]}],
  208. "政治面貌": [{"@value":row[14]}],
  209. "当前所在城市": [{"@value":row[15]}],
  210. "当前行业": [{"@value":row[16]}],
  211. "当前职位": [{"@value":row[17]}],
  212. "当前年薪": [{"@value":row[18]}],
  213. "意向城市": [{"@value":row[19]}],
  214. "意向职位": [{"@value":row[20]}],
  215. "意向年薪": [{"@value":row[21]}],
  216. "教育经历": edu_list,
  217. "工作经历": job_list,
  218. "项目经历": pro_list,
  219. "语言能力": [{"@value":row[25]}],
  220. "专业证书": [{"@value":row[26]}],
  221. "技术职称": [{"@value":row[27]}],
  222. "入选人才": [{"@value":row[28]}],
  223. "知识产权": [{"@value":row[29]}],
  224. "获得荣誉及证明": [{"@value":row[30]}],
  225. "备注信息": [{"@value":row[31]}],
  226. "对报名岗位认识及工作设想": [{"@value":row[32]}],
  227. "自我评价及主要工作业绩": [{"@value":row[33]}],
  228. "培训和海外经历": tra_list,
  229. "当前公司": [{"@value":row[35]}],
  230. "毕业院校分类": [{"@value":row[36]}],
  231. "工作年限": [{"@value":row[37]}],
  232. "专业方向大类": [{"@value":row[38]}],
  233. "最高学历学校": [{"@value":row[39]}],
  234. "研究领域分类": [{"@value":row[40]}],
  235. "报名岗位": [{"@value":row[41]}],
  236. "年龄": [{"@value":row[42]}],
  237. "相关机构": org_list,
  238. }
  239. # post_json({
  240. # "@type": "相关机构",
  241. # "@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
  242. # "@contentType": "struct",
  243. # "@markdel": "0",
  244. # "id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
  245. # "name": [{"@value":row[35]}]
  246. # })
  247. # post_json({
  248. # "@type": "相关机构",
  249. # "@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
  250. # "@contentType": "struct",
  251. # "@markdel": "0",
  252. # "id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
  253. # "name": [{"@value":row[39]}],
  254. # })
  255. post_json(json_obj)
  256. pushlen += 1
  257. if __name__ == '__main__':
  258. main()