Forráskód Böngészése

new file: KGAPI.py
new file: "KG\346\220\234\347\264\242.py"
new file: "KG\346\225\260\346\215\256\346\216\245\345\205\245.py"
new file: README.md

sprivacy 3 éve
commit
6e61e56815
4 módosított fájl, 558 hozzáadás és 0 törlés
  1. 204 0
      KGAPI.py
  2. 83 0
      KG搜索.py
  3. 271 0
      KG数据接入.py
  4. 0 0
      README.md

+ 204 - 0
KGAPI.py

@@ -0,0 +1,204 @@
+from pprint import pprint
+import requests
+import json
+from hashlib import md5
+import uvicorn
+from fastapi import FastAPI
+from typing import Optional
+
+app = FastAPI()
+
+def getQaAttachment(data):
+    nodes = []
+    ids = []
+    links = []
+    for d in data:
+        if not d['target']['id']:
+            d['target']['id'] = md5(str(d['target']['entity']).encode(encoding='UTF-8')).hexdigest()
+        if not d['target']['id'] in ids:
+            nodes.append({"id":d['target']['id'], "name": str(d['target']['entity']), "target":True, "source":False})
+            ids.append(d['target']['id'])
+        if not d['source']['id'] in ids:
+            nodes.append({"id":d['source']['id'], "name": d['source']['entity'], "source":True, "target":False})
+            ids.append(d['source']['id'])
+        links.append({'source': d['source']['id'], 'name':d["relation"], 'target': d['target']['id']})
+    return nodes, links
+
+# 请求头
+headers = {
+	"token": '369ca613c7a74a58b0f95be2cfd59257',
+	"Content-Type": "application/json",
+}
+
+
+
+@app.get('/person_to_person')
+def search_person(query: str, less: Optional[bool] = False):
+	"""
+	# 人与人的相关关系查询
+	# 必选参数:
+	# query: 人名
+	# 可选参数
+	# less: 显示少量关系
+	"""
+	url = 'http://180.76.188.39:8085/mpks/api/extra/gremlin'
+	if less:
+		json_obj = {
+			# "gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE(['当前公司','最高学历学校']).inV.inE('相关机构').outV.with('*').graph"
+			"gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE('相关机构').limit(8).inV.inE('相关机构').outV.with('*').graph"
+		}
+	else:
+		json_obj = {
+			"gremlin": "g.has('name.@value', MATCH, '"+ query +"').outE('相关机构').inV.inE('相关机构').outV.with('*').graph"
+		}
+	# 请求查询
+	r = requests.post(url, headers=headers, json=json_obj)
+	rst = json.loads(r.text)
+
+	entity_array = []
+	nodes = []
+	links = []
+	ids = dict([])
+	
+	# 查询成功
+	if (rst['errno'] == 0) and rst['data']:
+		# 所有实体
+		for entity in rst['data']['vertices']:
+			if entity["@id"] not in ids:
+				ids[entity["@id"]] = entity["name"]
+				nodes.append({
+					"id": entity["@id"],
+					"name": entity["name"],
+					"target": False if (entity["@type"] == "人才特征demo") else True,
+					"source": True if (entity["@type"] == "人才特征demo") else False,
+					"itemStyle": {
+            			"normal": {
+              				"color": 'red' if (entity["@type"] == "人才特征demo") else 'blue'
+            			}
+            		},
+            	})
+		for edge in rst['data']['edges']:
+			source = {'id': edge['@from'], 'entity': ids[edge['@from']], "refId": None, "entityIndex": None}
+			target = {'id': edge['@to'], 'entity': ids[edge['@to']], "refId": None, "entityIndex": None}
+			relation = edge['@label']
+			entity_array.append({"source": source, "relation": relation, "target": target})
+			links.append({'source': edge['@from'], 'name': edge['@label'], 'target': edge['@to']})
+	else:
+		pprint(rst['errno'])
+		pprint(rst['msg'])
+	return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
+
+
+
+@app.get('/search_query')
+def search_query(query: str):
+	"""
+	# 搜索接口,模糊搜索
+	# 參數:
+	# query: 查詢的關鍵詞
+	"""
+	entity_array = []
+	base_url = 'http://180.76.188.39:8085/mpks/api/search'
+	json_obj = {
+		"query": query,
+		"sort": "relevance",
+		"needCorrect": True,
+		"saveHistory": False
+	}
+	r = requests.post(base_url, headers=headers, json=json_obj)
+	rst = json.loads(r.text)
+	# 查询结果
+	if (rst['errno'] == 0) and rst['data']:
+		# 当前实体
+		source = {'id': rst['data']['results'][0]['entityList'][0]['@id'], 'entity': rst['data']['results'][0]['entityList'][0]['name'],'refId': None, "entityIndex": None}
+		# 实体属性关系
+		for dic in rst['data']['results'][0]['entityList'][0]['properties']:
+			relation = dic['key']
+			# 关系
+			if isinstance(dic['value'], dict) and ("@id" in dic['value']):
+				target = {"id": dic['value']['@id'], "entity":dic['value']['value'], 'refId': None, "entityIndex": None}
+				entity_array.append({"source": source, "relation": relation, "target": target})
+			# 属性
+			elif (not isinstance(dic['value'], list)) or ("@id" not in dic['value'][0]):
+				target = {"id": None, "entity":dic['value'], 'refId': None, "entityIndex": None}
+				entity_array.append({"source": source, "relation": relation, "target": target})
+			# 关系
+			else:
+				for vec in dic['value']:
+					target = {"id": vec["@id"], "entity": vec["value"], 'refId': None, "entityIndex": None}
+					entity_array.append({"source": source, "relation": relation, "target": target})
+		# 边
+		for edge in rst['data']['results'][0]['entityList'][0]['graphData']['vertices']:
+			relation = edge['@type'].replace('demo', '')
+			target = {"id": edge["@id"], "entity": edge["name"], 'refId': None, "entityIndex": None}
+			entity_array.append({"source": source, "relation": relation, "target": target})
+	else:
+		pprint(rst['errno'])
+		pprint(rst['msg'])
+	nodes, links = getQaAttachment(entity_array)
+	return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
+
+
+@app.get('/search_gremlin')
+def search_gremlin(query: Optional[str] = None, _id: Optional[str] = None):
+	"""
+	# 搜索接口,精确搜索
+	# 參數:
+	# query[可选]: 使用实体名称查询
+	# _id  [可选]: 使用实体 id 查询
+	# 同时填写时 _id 优先
+	g.has("name.@value",MATCH,"于策").outE("相关机构").inV.inE("相关机构").outV.with("*").graph
+	"""
+	# 请求地址
+	url = 'http://180.76.188.39:8085/mpks/api/extra/gremlin'
+	# 查询语句
+	if _id:
+		json_obj = {
+			"gremlin" : "g.key('"+ _id +"').both.with('*').graph"
+		}
+	elif query:
+		json_obj = {
+			"gremlin" : "g.has('name.@value', MATCH, '"+ query +"').both.with('*').graph"
+		}
+	else:
+		return {"errno": 3001, "msg": "can not get query or id", "data": []}
+	# 请求查询
+	r = requests.post(url, headers=headers, json=json_obj)
+	rst = json.loads(r.text)
+
+	entity_array = []
+	
+	# 查询成功
+	if (rst['errno'] == 0) and rst['data']:
+		# 当前实体
+		source = {'id': rst['data']['vertices'][0]['@id'], 'entity': rst['data']['vertices'][0]['name'],'refId': None, "entityIndex": None}
+		# 属性
+		for relation in rst['data']['vertices'][0].keys():
+			if relation not in ['@context', '@del', '@edge_number', '@formattype', '@fromtype', '@fromurl', '@id', '@kbid', '@nodeid', '@semiid', '@tags', '@type', '_id', '_type', 'alias', 'appId', 'name', 'nodeId', 'tags', '教育经历', '工作经历', '项目经历', '培训和海外经历', '最高学历学校', '当前公司']:
+				target = {'id': None, 'entity': rst['data']['vertices'][0][relation], 'refId': None, "entityIndex": None}
+				if rst['data']['vertices'][0][relation]:
+					entity_array.append({"source": source, "relation": relation, "target": target})
+		# 关系
+		if len(rst['data']['vertices']) > 1:
+			for index in range(1, len(rst['data']['vertices'])):
+				target = {'id': rst['data']['vertices'][index]['@id'], 'entity': rst['data']['vertices'][index]['name'], 'refId': None, "entityIndex": None}
+				for edge in rst['data']['edges']:
+					if edge['@from'] == rst['data']['vertices'][index]['@id']:
+						entity_array.append({"source": target, "relation": edge['@label'].replace('demo', ''), "target": source})
+						break
+				else:
+					entity_array.append({"source": source, "relation": rst['data']['vertices'][index]['@type'].replace('demo', ''), "target": target})
+		# 边
+		# for edge in rst['data']['edges']:
+		# 	target = {'id': edge['@to'], 'entity': None, "refId": edge['@from'], "entityIndex": source_index}
+		# 	relation = edge['@label']
+		# 	entity_array.append({"source": source, "relation": relation, "target": target})
+	else:
+		pprint(rst['errno'])
+		pprint(rst['msg'])
+	nodes, links = getQaAttachment(entity_array)
+	return {"errno": 0, "msg": [], "graph": entity_array, "nodes": nodes, "links": links}
+
+
+if __name__ == '__main__':
+	uvicorn.run(app=app, host='0.0.0.0', port=9000)

+ 83 - 0
KG搜索.py

@@ -0,0 +1,83 @@
+from pprint import pprint
+import requests
+import json
+
+headers = {
+	"token": '369ca613c7a74a58b0f95be2cfd59257',
+	"Content-Type": "application/json",
+}
+
+json_obj = {"query": "山东师范", # 最终检索的query,可能是点击sug触发 
+# "inputQuery": "山东师范",# ⽤户⼿动输⼊的query // 筛选条件 
+#"channelId": 1, # 频道id,int,可选,没有是不过滤 
+#"nodeId": 1, # 知识树节点id,int,可选,没有是不过滤,当前只⽀持叶⼦节点过滤 
+"timeFilter": {}, # 可选,时间范围过滤,为空表示不进⾏时间 过滤,begin和end为unix时间戳。 
+"tagFilter": ['教育经历'], # 可选,标签过滤,为空表示不进⾏标签过滤 
+#"docTypeFilter": "word", # 可选,⽂档类型筛选项,没有不进⾏⽂档类型过滤,当前枚举值: word|excel|pdf
+"sort": "relevance",# 排序⽅式默认相关性,其他排序时间,趋势热度
+"pn": 1, # ⻚码,从1开始编码
+"ps": 20, # pagesize,每⻚⻚⾯条数,默认不填20
+"resultFields": [],
+"searchType":"advanced",
+"logId": "8966036931719178386",# string类型,除了翻⻚,其他检索不要加logId
+"needCorrect": True, # 是否需要纠错,不需要则按照输⼊的query进⾏检索,需要则根据da返回的 纠错query以及置信度进⾏检索(显示看接⼝响应的escoreType&rewriteQuery)
+"saveHistory": True, # 是否保存历史记录
+"classesFilter":[] # 类别过滤(新增)
+}
+base_url = 'http://180.76.188.39:8085/mpks/api/search'
+r = requests.post(base_url, headers=headers, json=json_obj)
+rst = json.loads(r.text)
+pprint(rst)
+
+
+
+# url = 'http://180.76.188.39:8085/mpks/api/extra/tree/getAllParent'
+# params = {
+# 	'nodeId':1
+# }
+# response = requests.get(url=url, headers=headers, params=params)
+# pprint(json.loads(response.text))
+
+
+
+url = 'http://180.76.188.39:8085/mpks/api/extra/gremlin'
+json_obj = {
+	# 通过id查询实体
+	# "gremlin": "g.key('b67029eb1341457162598cf84198d7e2').with('*').graph"
+	# 2 查询实体并包括关联实体的数据
+	# "gremlin" : "g.key('b67029eb1341457162598cf84198d7e2').out.with('*').graph" 
+	# 4 图谱边关系显示范围控制
+	# "gremlin": "g.key('b67029eb1341457162598cf84198d7e2').out.out.out.with('*').graph" 
+	# 查询name为测试数据的实体以及出边
+	# "gremlin" : "g.has('name.@value', MATCH, '测试数据').out.with('*').graph"
+	"gremlin" : "g.has('type', MATCH, '人才特征demo').out.with('*').graph"
+}
+r = requests.post(url, headers=headers, json=json_obj)
+pprint(json.loads(r.text))
+
+
+# #  获取全部知识树信息
+# url = 'http://180.76.188.39:8085/mpks/api/extra/getNodes'
+# response = requests.get(url=url, headers=headers)
+# pprint(json.loads(response.text))
+
+# # 通过知识树节点获取所有相关⽂档
+# url = 'http://180.76.188.39:8085/mpks/api/extra/source/data/list'
+# json_obj = {
+# 	"contentType": "struct",
+# 	"inputTypes": "api_push",
+# }
+
+
+# 根据类⽬查询实体
+# url = 'http://180.76.188.39:8085/api/datamanage/graph/view'
+# json_obj = {
+# 	"type": "学校demo",
+# 	"wd": "山东师范",
+# 	"fuzzy": 1,
+# 	"mode": 0,
+# 	"pageNumbe": 1,
+# 	"pageSize": 10,
+# }
+# r = requests.post(url, headers=headers, json=json_obj)
+# pprint(json.loads(r.text))

+ 271 - 0
KG数据接入.py

@@ -0,0 +1,271 @@
+# !/usr/bin/python
+# -*- coding: utf-8 -*-
+# @Author: sprivacy
+# @Date:   2022-05-05 10:18:56
+# @Last Modified by:   sprivacy
+# @Last Modified time: 2022-05-06 16:07:41
+import sys
+import time
+import json
+from hashlib import md5
+import requests
+from pprint import pprint
+import pandas as pd
+
+# from unicodedata import name
+# from uuid import NAMESPACE_URL
+# import locale
+# locale.setlocale(locale.LC_CTYPE, 'chinese')
+
+# reload(sys)
+# sys.setdefaultencoding("utf-8")
+
+# 各种知识集构建
+global cookie, base_url, willpush, pushed, pushlen
+willpush = True
+cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
+base_url = 'http://180.76.188.39:8284/'
+
+pushed = []
+pushlen = 0
+
+def post_json(json_obj, token="1654940290763"):
+	""" 
+	将json数据提交到push接口
+	:param json_obj: 
+	:return: 
+	""" 
+	global willpush, pushed, pushlen
+	# 是否已推送
+	if json_obj['@id'] in pushed:
+		print(len(pushed))
+		return ''
+	# 推送
+	pushed.append(json_obj['@id'])
+
+	url = base_url + "data/api/access/push"
+	headers = {
+		"token": token,
+		"Cookie": cookie
+	}
+	# print(url)
+	if pushlen < 300:
+		return ""
+	else:
+		response = requests.post(url, json=json_obj, headers=headers) 
+		result = response.text
+		print(result)
+		pass
+	if json_obj["@type"] == "相关机构":
+		print(json_obj)
+	time.sleep(0.1)
+
+	return ""
+
+
+
+def main():
+	global pushlen
+	df = pd.read_excel('xxx3.xlsx', sheet_name='Sheet1')
+	df = df.fillna(value="")
+	df['id'] = df['id'].apply(str)
+	df['当前年薪(单位:万)'] = df['当前年薪(单位:万)'].apply(str)
+	df['意向年薪(单位:万)'] = df['意向年薪(单位:万)'].apply(str)
+	df['工作年限'] = df['工作年限'].apply(str)
+	df['年龄'] = df['年龄'].apply(str)
+	for row in df.iloc:
+		edu_list = []
+		job_list = []
+		pro_list = []
+		tra_list = []
+		org_list = []
+		orgs = []
+		for item in row[22].split():
+			cols = item.split('/')
+			if len(cols) > 3:
+				if cols[1] not in orgs:
+					org_list.append({"@value":cols[1]})
+					orgs.append(cols[1])
+				edu_obj = {
+					"id": row[0],
+					"@type": "教育经历demo",
+					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
+					"@contentType": "struct",
+					"@markdel": "0",
+					"name": [{"@value": item}],
+					"时间": [{"@value":cols[0]}],
+					"学校": [{"@value":cols[1]}],
+					"专业": [{"@value":cols[2]}],
+					"学历": [{"@value":cols[-1]}]
+				}
+				edu_list.append({"@value": item})
+				post_json({
+						"@type": "相关机构",
+						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
+						"@contentType": "struct",
+						"@markdel": "0",
+						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
+						"name": [{"@value":cols[1]}],
+					})
+				post_json(edu_obj)
+		for item in row[23].split():
+			cols = item.split('/')
+			if len(cols) > 3:
+				if cols[1] not in orgs:
+					org_list.append({"@value":cols[1]})
+					orgs.append(cols[1])
+				job_obj = {
+					"id": row[0],
+					"@type": "工作经历demo",
+					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
+					"name": [{"@value": item}],
+					"@markdel": "0",
+					"@contentType": "struct",
+					"时间": [{"@value":cols[0]}],
+					"公司": [{"@value":cols[1]}],
+					"行业": [{"@value":cols[2]}],
+					"职位": [{"@value":cols[3]}],
+					"工作内容": [{"@value":cols[-1]}]
+				}
+				job_list.append({"@value": item})
+				post_json({
+						"@type": "相关机构",
+						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"@contentType": "struct",
+						"@markdel": "0",
+						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"name": [{"@value":cols[1]}]
+					})
+				post_json(job_obj)
+		for item in row[24].split():
+			cols = item.split('/')
+			if len(cols) > 4:
+				if cols[1] not in orgs:
+					org_list.append({"@value":cols[1]})
+					orgs.append(cols[1])
+				pro_obj = {
+					"id": row[0],
+					"@type": "项目经历demo",
+					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
+					"name": [{"@value": item}],
+					"@markdel": "0",
+					"@contentType": "struct",
+					"时间": [{"@value":cols[0]}],
+					"公司": [{"@value":cols[1]}],
+					"项目": [{"@value":cols[2]}],
+					"职位": [{"@value":cols[3]}],
+					"成果": [{"@value":cols[-1]}]
+				}
+				pro_list.append({"@value": item})
+				post_json({
+						"@type": "相关机构",
+						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"@contentType": "struct",
+						"@markdel": "0",
+						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"name": [{"@value":cols[1]}]
+					})
+				post_json(pro_obj)
+		for item in row[34].split():
+			cols = item.split('/')
+			if len(cols) > 3:
+				if cols[1] not in orgs:
+					org_list.append({"@value":cols[1]})
+					orgs.append(cols[1])
+				tra_obj = {
+					"id": row[0], 
+					"@type": "培训和海外经历demo", 
+					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(), 
+					"name": [{"@value":item}], 
+					"@markdel": "0", 
+					"@contentType": "struct", 
+					"时间": [{"@value":cols[0]}], 
+					"培训机构": [{"@value":cols[1]}],
+					"培训名称": [{"@value":cols[2]}], 
+					"培训内容": [{"@value":cols[-1]}]
+				}
+				tra_list.append({"@value": item})
+				post_json({
+						"@type": "相关机构",
+						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"@contentType": "struct",
+						"@markdel": "0",
+						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
+						"name": [{"@value":cols[1]}]
+					})
+				post_json(tra_obj)
+		# break
+		json_obj = {
+			"@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID
+			"id": row[0],# 自增 id
+			"@type": "人才特征demo",# 数据类目
+			"name": [{"@value":row[1]}],# 实体名称 消歧
+			"@markdel": '0',# 写入/删除
+			"@contentType": "struct",# 资源类型
+			"姓名": [{"@value":row[1]}],
+			"性别": [{"@value":row[2]}],
+			"出生年月": [{"@value":row[3]}],
+			"婚姻状况": [{"@value":row[4]}],
+			"特长爱好": [{"@value":row[5]}],
+			"手机号码": [{"@value":row[6]}],
+			"电子邮箱": [{"@value":row[7]}],
+			"当前最高学历": [{"@value":row[8]}],
+			"当前最高学历专业": [{"@value":row[9]}],
+			"研究领域": [{"@value":row[10]}],
+			"人才标签": [{"@value":row[11]}],
+			"人才特点": [{"@value":row[12]}],
+			"参加工作时间": [{"@value":row[13]}],
+			"政治面貌": [{"@value":row[14]}],
+			"当前所在城市": [{"@value":row[15]}],
+			"当前行业": [{"@value":row[16]}],
+			"当前职位": [{"@value":row[17]}],
+			"当前年薪": [{"@value":row[18]}],
+			"意向城市": [{"@value":row[19]}],
+			"意向职位": [{"@value":row[20]}],
+			"意向年薪": [{"@value":row[21]}],
+			"教育经历": edu_list,
+			"工作经历": job_list,
+			"项目经历": pro_list,
+			"语言能力": [{"@value":row[25]}],
+			"专业证书": [{"@value":row[26]}],
+			"技术职称": [{"@value":row[27]}],
+			"入选人才": [{"@value":row[28]}],
+			"知识产权": [{"@value":row[29]}],
+			"获得荣誉及证明": [{"@value":row[30]}],
+			"备注信息": [{"@value":row[31]}],
+			"对报名岗位认识及工作设想": [{"@value":row[32]}],
+			"自我评价及主要工作业绩": [{"@value":row[33]}],
+			"培训和海外经历": tra_list,
+			"当前公司": [{"@value":row[35]}],
+			"毕业院校分类": [{"@value":row[36]}],
+			"工作年限": [{"@value":row[37]}],
+			"专业方向大类": [{"@value":row[38]}],
+			"最高学历学校": [{"@value":row[39]}],
+			"研究领域分类": [{"@value":row[40]}],
+			"报名岗位": [{"@value":row[41]}],
+			"年龄": [{"@value":row[42]}],
+			"相关机构": org_list,
+		}
+		# post_json({
+		# 		"@type": "相关机构",
+		# 		"@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
+		# 		"@contentType": "struct",
+		# 		"@markdel": "0",
+		# 		"id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
+		# 		"name": [{"@value":row[35]}]
+		# 	})
+		# post_json({
+		# 		"@type": "相关机构",
+		# 		"@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
+		# 		"@contentType": "struct",
+		# 		"@markdel": "0",
+		# 		"id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
+		# 		"name": [{"@value":row[39]}],
+		# 	})
+		post_json(json_obj)
+		pushlen += 1
+
+
+
+if __name__ == '__main__':
+	main()

+ 0 - 0
README.md