KG_Search_API.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. from pprint import pprint
  2. import requests
  3. import json
  4. from hashlib import md5
  5. import uvicorn
  6. from fastapi import FastAPI
  7. from typing import Optional
  8. app = FastAPI()
  9. host = "172.16.57.71"
  10. # 请求头
  11. headers = {
  12. "token": '369ca613c7a74a58b0f95be2cfd59257',
  13. "Content-Type": "application/json",
  14. }
  15. eduback_dict = {
  16. 26:'博士后',
  17. 24:'博士',
  18. 22:'MBA/EMBA',
  19. 20:'硕士',
  20. 18:'本科',
  21. 16:'大专',
  22. 14:'中专/中技',
  23. 12:'高中',
  24. 10:'初中及以下'
  25. }
  26. def getQaAttachment(data):
  27. nodes = []
  28. ids = []
  29. links = []
  30. for d in data:
  31. if not d['target']['id']:
  32. d['target']['id'] = md5(str(d['target']['entity']).encode(encoding='UTF-8')).hexdigest()
  33. if not d['target']['id'] in ids:
  34. nodes.append({"id":d['target']['id'], "name": str(d['target']['entity']), "target":True, "source":False})
  35. ids.append(d['target']['id'])
  36. if not d['source']['id'] in ids:
  37. nodes.append({"id":d['source']['id'], "name": d['source']['entity'], "source":True, "target":False})
  38. ids.append(d['source']['id'])
  39. links.append({'source': d['source']['id'], 'name':d["relation"], 'target': d['target']['id']})
  40. return nodes, links
  41. def post_json(json_obj, token="1664516531417"):
  42. """
  43. 将json数据提交到push接口
  44. :param json_obj:
  45. :return:
  46. """
  47. # 推送
  48. url = 'http://{}:8284/data/api/access/push'.format(host)
  49. headers = {
  50. "token": token,
  51. # "Cookie": "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
  52. }
  53. try:
  54. response = requests.post(url, json=json_obj, headers=headers)
  55. result = response.text
  56. logger.info(result)
  57. except:
  58. logger.error(e)
  59. return "ERROR: {} 数据推送 KG 失败!".format(json)
  60. else:
  61. return 0
  62. @app.get('/person_to_person')
  63. def search_person(query: str, less: Optional[bool] = False):
  64. """
  65. # 人与人的相关关系查询
  66. # 必选参数:
  67. # query: 人名
  68. # 可选参数
  69. # less: 显示少量关系
  70. """
  71. url = 'http://:8085/mpks/api/extra/gremlin'.format(host)
  72. if less:
  73. json_obj = {
  74. # "gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE(['当前公司','最高学历学校']).inV.inE('相关机构').outV.with('*').graph"
  75. "gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE('相关机构').limit(8).inV.inE('相关机构').outV.with('*').graph"
  76. }
  77. else:
  78. json_obj = {
  79. "gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE('相关机构').inV.inE('相关机构').outV.with('*').graph"
  80. }
  81. # 请求查询
  82. try:
  83. r = requests.post(url, headers=headers, json=json_obj)
  84. rst = json.loads(r.text)
  85. except Exception as e:
  86. logger.error(e)
  87. return {"errno":555, "msg": "请求检索失败"}
  88. entity_array = []
  89. nodes = []
  90. links = []
  91. ids = dict([])
  92. # 查询成功
  93. if (rst['errno'] == 0) and rst['data']:
  94. # 所有实体
  95. for entity in rst['data']['vertices']:
  96. if entity["@id"] not in ids:
  97. ids[entity["@id"]] = entity["name"]
  98. nodes.append({
  99. "id": entity["@id"],
  100. "name": entity["name"],
  101. "target": False if (entity["@type"] == "人才特征") else True,
  102. "source": True if (entity["@type"] == "人才特征") else False,
  103. "itemStyle": {
  104. "normal": {
  105. "color": 'red' if (entity["@type"] == "人才特征") else 'blue'
  106. }
  107. },
  108. })
  109. for edge in rst['data']['edges']:
  110. source = {'id': edge['@from'], 'entity': ids[edge['@from']], "refId": None, "entityIndex": None}
  111. target = {'id': edge['@to'], 'entity': ids[edge['@to']], "refId": None, "entityIndex": None}
  112. relation = edge['@label']
  113. entity_array.append({"source": source, "relation": relation, "target": target})
  114. links.append({'source': edge['@from'], 'name': edge['@label'], 'target': edge['@to']})
  115. else:
  116. pprint(rst['errno'])
  117. pprint(rst['msg'])
  118. return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
  119. @app.get('/search_query')
  120. def search_query(query: str):
  121. """
  122. # 搜索接口,模糊搜索
  123. # 參數:
  124. # query: 查詢的關鍵詞
  125. """
  126. entity_array = []
  127. base_url = 'http://{}:8085/mpks/api/search'.format(host)
  128. json_obj = {
  129. "query": query,
  130. "sort": "relevance",
  131. "needCorrect": True,
  132. "saveHistory": False
  133. }
  134. try:
  135. r = requests.post(base_url, headers=headers, json=json_obj)
  136. rst = json.loads(r.text)
  137. except Exception as e:
  138. logger.error(e)
  139. return {"errno":555, "msg": "请求检索失败"}
  140. # 查询结果
  141. if (rst['errno'] == 0) and rst['data']:
  142. # 当前实体
  143. source = {'id': rst['data']['results'][0]['entityList'][0]['@id'], 'entity': rst['data']['results'][0]['entityList'][0]['name'],'refId': None, "entityIndex": None}
  144. # 实体属性关系
  145. for dic in rst['data']['results'][0]['entityList'][0]['properties']:
  146. relation = dic['key']
  147. # 关系
  148. if isinstance(dic['value'], dict) and ("@id" in dic['value']):
  149. target = {"id": dic['value']['@id'], "entity":dic['value']['value'], 'refId': None, "entityIndex": None}
  150. entity_array.append({"source": source, "relation": relation, "target": target})
  151. # 属性
  152. elif (not isinstance(dic['value'], list)) or ("@id" not in dic['value'][0]):
  153. target = {"id": None, "entity":dic['value'], 'refId': None, "entityIndex": None}
  154. entity_array.append({"source": source, "relation": relation, "target": target})
  155. # 关系
  156. else:
  157. for vec in dic['value']:
  158. target = {"id": vec["@id"], "entity": vec["value"], 'refId': None, "entityIndex": None}
  159. entity_array.append({"source": source, "relation": relation, "target": target})
  160. # 边
  161. for edge in rst['data']['results'][0]['entityList'][0]['graphData']['vertices']:
  162. relation = edge['@type'].replace('demo', '')
  163. target = {"id": edge["@id"], "entity": edge["name"], 'refId': None, "entityIndex": None}
  164. entity_array.append({"source": source, "relation": relation, "target": target})
  165. else:
  166. pprint(rst['errno'])
  167. pprint(rst['msg'])
  168. nodes, links = getQaAttachment(entity_array)
  169. return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
  170. @app.get('/search_gremlin')
  171. def search_gremlin(query: Optional[str] = None, _id: Optional[str] = None):
  172. """
  173. # 搜索接口,精确搜索
  174. # 參數:
  175. # query[可选]: 使用实体名称查询
  176. # _id [可选]: 使用实体 id 查询
  177. # 同时填写时 _id 优先
  178. g.has("name.@value",MATCH,"于策").outE("相关机构").inV.inE("相关机构").outV.with("*").graph
  179. """
  180. # 请求地址
  181. url = 'http://{}:8085/mpks/api/extra/gremlin'.format(host)
  182. # 查询语句
  183. if _id:
  184. json_obj = {
  185. "gremlin" : "g.key('"+ _id +"').both.with('*').graph"
  186. }
  187. elif query:
  188. json_obj = {
  189. "gremlin" : "g.has('name.@value', MATCH, '"+ query +"').both.with('*').graph"
  190. }
  191. else:
  192. return {"errno": 3001, "msg": "can not get query or id", "data": []}
  193. # 请求查询
  194. try:
  195. r = requests.post(url, headers=headers, json=json_obj)
  196. rst = json.loads(r.text)
  197. except Exception as e:
  198. logger.error(e)
  199. return {"errno":555, "msg": "请求检索失败"}
  200. entity_array = []
  201. # 查询成功
  202. if (rst['errno'] == 0) and rst['data']:
  203. # 当前实体
  204. source = {'id': rst['data']['vertices'][0]['@id'], 'entity': rst['data']['vertices'][0]['name'],'refId': None, "entityIndex": None}
  205. # 属性
  206. for relation in rst['data']['vertices'][0].keys():
  207. if relation not in ['@context', '@del', '@edge_number', '@formattype', '@fromtype', '@fromurl', '@id', '@kbid', '@nodeid', '@semiid', '@tags', '@type', '_id', '_type', 'alias', 'appId', 'name', 'nodeId', 'tags', '教育经历', '工作经历', '项目经历', '培训和海外经历', '最高学历学校', '当前公司']:
  208. target = {'id': None, 'entity': rst['data']['vertices'][0][relation], 'refId': None, "entityIndex": None}
  209. if rst['data']['vertices'][0][relation]:
  210. entity_array.append({"source": source, "relation": relation, "target": target})
  211. # 关系
  212. if len(rst['data']['vertices']) > 1:
  213. for index in range(1, len(rst['data']['vertices'])):
  214. target = {'id': rst['data']['vertices'][index]['@id'], 'entity': rst['data']['vertices'][index]['name'], 'refId': None, "entityIndex": None}
  215. for edge in rst['data']['edges']:
  216. if edge['@from'] == rst['data']['vertices'][index]['@id']:
  217. entity_array.append({"source": target, "relation": edge['@label'].replace('demo', ''), "target": source})
  218. break
  219. else:
  220. entity_array.append({"source": source, "relation": rst['data']['vertices'][index]['@type'].replace('demo', ''), "target": target})
  221. # 边
  222. # for edge in rst['data']['edges']:
  223. # target = {'id': edge['@to'], 'entity': None, "refId": edge['@from'], "entityIndex": source_index}
  224. # relation = edge['@label']
  225. # entity_array.append({"source": source, "relation": relation, "target": target})
  226. else:
  227. pprint(rst['errno'])
  228. pprint(rst['msg'])
  229. nodes, links = getQaAttachment(entity_array)
  230. return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
  231. @app.post('/update_person')
  232. def update_person(xdpost: dict):
  233. xdpost = xdpost['data']
  234. # 最高学历
  235. high_edu = {"schoolName": None, "major": None, "degree": None}
  236. # 机构列表
  237. org_list = []
  238. # 工作经历
  239. job_list = []
  240. for job in xdpost["hisJob"]:
  241. org_list.append(job['companyName'])
  242. job_list.append(job['companyName'] + job['industry'] + job['jobName'] + job['jobDesc'])
  243. post_json({
  244. "@id": md5(job['companyName'].encode(encoding="UTF-8")).hexdigest(),
  245. "id": md5(job['companyName'].encode(encoding="UTF-8")).hexdigest(),
  246. "@type": "相关机构",
  247. "@contentType": "struct",
  248. "@markdel": "0",
  249. "name": [{"@value": job['companyName']}]
  250. })
  251. post_json({
  252. "@id": md5(job['id'].encode(encoding="UTF-8")).hexdigest(),
  253. "id": job['id'],
  254. "@type": "工作经历",
  255. "name": job['companyName'] + job['industry'] + job['jobName'] + job['jobDesc'],
  256. "@markdel": "0",
  257. "@contentType": "struct",
  258. "时间": [{"@value": job['startTime']+job['endTime']}],
  259. "公司": [{"@value": job['companyName']}],
  260. "行业": [{"@value": job['industry']}],
  261. "职位": [{"@value": job['jobName']}],
  262. "工作内容": [{"@value": job['jobDesc']}]
  263. })
  264. # 教育经历
  265. edu_list = []
  266. for edu in xdpost["hisEdu"]:
  267. if (not high_edu.get("schoolName")) or (high_edu['degree'] < edu['degree']):
  268. high_edu['schoolName'] = edu['schoolName']
  269. high_edu['major'] = edu['major']
  270. high_edu['degree'] = edu['degree']
  271. org_list.append(edu['schoolName'])
  272. edu_list.append(edu['startTime'] + edu['schoolName'] + edu['major'] + eduback_dict[edu['degree']])
  273. post_json({
  274. "@id": md5(edu['schoolName'].encode(encoding="UTF-8")).hexdigest(),
  275. "id": md5(edu['schoolName'].encode(encoding="UTF-8")).hexdigest(),
  276. "@type": "相关机构",
  277. "@contentType": "struct",
  278. "@markdel": "0",
  279. "name": [{"@value": edu['schoolName']}]
  280. })
  281. post_json({
  282. "@id": md5(edu['id'].encode(encoding="UTF-8")).hexdigest(),
  283. "id": edu['id'],
  284. "@type": "教育经历",
  285. "name": edu['startTime'] + edu['schoolName'] + edu['major'] + eduback_dict[edu['degree']],
  286. "@markdel": "0",
  287. "@contentType": "struct",
  288. "时间": [{"@value": edu['startTime']+edu['endTime']}],
  289. "学校": [{"@value": edu['schoolName']}],
  290. "专业": [{"@value": edu['major']}],
  291. "学历": [{"@value": edu['degree']}]
  292. })
  293. # 项目经历
  294. pro_list = []
  295. for pro in xdpost["hisProject"]:
  296. pro_list.append(pro['companyName'] + pro['projectName'] + pro['projectOffice'] + pro['projectDuty'])
  297. post_json({
  298. "@id": md5(pro['id'].encode(encoding="UTF-8")).hexdigest(),
  299. "id": pro['id'],
  300. "@type": "项目经历",
  301. "name": pro['companyName'] + pro['projectName'] + pro['projectOffice'] + pro['projectDuty'],
  302. "@markdel": "0",
  303. "@contentType": "struct",
  304. "时间": [{"@value": pro['startTime']+pro['endTime']}],
  305. "公司": [{"@value": pro['companyName']}],
  306. "项目": [{"@value": pro['projectName']}],
  307. "职位": [{"@value": pro['projectDuty']}],
  308. "成果": [{"@value": pro['projectOffice']}]
  309. })
  310. # 海外培训经历
  311. tra_list = [{"@value": None}]
  312. # 基本信息
  313. json_obj = {
  314. "@id": md5(xdpost['tId'].encode(encoding='UTF-8')).hexdigest(),
  315. "id": xdpost['tId'],
  316. "@type": "人才特征",
  317. "name": xdpost['name'],
  318. "@markdel": '0',
  319. "@contentType": "struct",
  320. "姓名": [{"@value": xdpost['name']}],
  321. "年龄": [{"@value": xdpost['age']}],
  322. "性别": [{"@value": '男' if xdpost['gender'] == "0" else '女'}],
  323. "出生年月": [{"@value": xdpost['birthTime']}],
  324. "手机号码": [{"@value": xdpost['mobile']}],
  325. "电子邮箱": [{"@value": xdpost['email']}],
  326. "政治面貌": [{"@value": xdpost['politics']}],
  327. "参加工作时间": [{"@value": xdpost['workBeginTime']}],
  328. "当前职位": [{"@value": xdpost['currentJob']}],
  329. "意向职位": [{"@value": xdpost['intentJob']}],
  330. "当前年薪": [{"@value": xdpost['currentSalaryYearly']}],
  331. "当前所在城市": [{"@value": None}],
  332. "意向城市": [{"@value": None}],
  333. "意向年薪": [{"@value": xdpost['intentSalaryYearlyMax']}],
  334. "相关机构": org_list,
  335. "教育经历": edu_list,
  336. "工作经历": job_list,
  337. "项目经历": pro_list,
  338. "培训和海外经历": tra_list,
  339. "当前最高学历": [{"@value": eduback_dict[high_edu['degree']]}],
  340. "最高学历学校": [{"@value": high_edu['schoolName']}],
  341. "当前最高学历专业": [{"@value": high_edu['major']}],
  342. "语言能力": [{"@value": val['lanName']} for val in xdpost['language']],
  343. "技术职称": [{"@value": None}],
  344. "研究领域": [{"@value": val['researchName']} for val in xdpost['researchList']],
  345. "研究领域分类": [{"@value": val['researchName']} for val in xdpost['researchList']],
  346. "婚姻状况": [{"@value": None}],
  347. "特长爱好": [{"@value": None}],
  348. "人才标签": [{"@value": None}],
  349. "人才特点": [{"@value": None}],
  350. "当前行业": [{"@value": None}],
  351. "专业证书": [{"@value": None}],
  352. "入选人才": [{"@value": None}],
  353. "知识产权": [{"@value": None}],
  354. "获得荣誉及证明": [{"@value": None}],
  355. "备注信息": [{"@value": None}],
  356. "对报名岗位认识及工作设想": [{"@value": None}],
  357. "自我评价及主要工作业绩": [{"@value": None}],
  358. "当前公司": [{"@value": None}],
  359. "毕业院校分类": [{"@value": None}],
  360. "工作年限": [{"@value": None}],
  361. "专业方向大类": [{"@value": None}],
  362. "报名岗位": [{"@value": None}],
  363. }
  364. post_json(json_obj)
  365. return {"errno": 0, "msg": "Success"}
  366. if __name__ == '__main__':
  367. uvicorn.run(app=app, host='0.0.0.0', port=9000)