xzc
/
kg_person


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
							# !/usr/bin/python
# -*- coding: utf-8 -*-
# @Author: sprivacy
# @Date:   2022-05-05 10:18:56
# @Last Modified by:   privacy
# @Last Modified time: 2022-10-13 14:23:31
import sys
import time
import json
from hashlib import md5
import requests
from pprint import pprint
import pandas as pd

# 各种知识集构建
global cookie, base_url, willpush, pushed, pushlen
willpush = True
cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
base_url = 'http://172.16.128.159:8284/'

pushed = []
pushlen = 0

def post_json(json_obj, token="1664516531417"):
	""" 
	将json数据提交到push接口
	:param json_obj: 
	:return: 
	""" 
	global willpush, pushed, pushlen
	# 是否已推送
	if json_obj['@id'] in pushed:
		print(len(pushed))
		return ''
	# 推送
	pushed.append(json_obj['@id'])

	url = base_url + "data/api/access/push"
	headers = {
		"token": token,
		"Cookie": cookie
	}
	# print(url)
	if pushlen <= 500:
		return ""
	else:
		response = requests.post(url, json=json_obj, headers=headers) 
		result = response.text
		print(result)
		pass
	if json_obj["@type"] == "相关机构":
		print(json_obj)
	time.sleep(0.1)

	return ""


def main():
	global pushlen
	df = pd.read_excel('../xxx3.xlsx', sheet_name='Sheet1')
	df = df.fillna(value="")
	df['id'] = df['id'].apply(str)
	df['当前年薪（单位：万）'] = df['当前年薪（单位：万）'].apply(str)
	df['意向年薪（单位：万）'] = df['意向年薪（单位：万）'].apply(str)
	df['工作年限'] = df['工作年限'].apply(str)
	df['年龄'] = df['年龄'].apply(str)
	for row in df.iloc:
		edu_list = []
		job_list = []
		pro_list = []
		tra_list = []
		org_list = []
		orgs = []
		for item in row[22].split():
			cols = item.split('/')
			if len(cols) > 3:
				if cols[1] not in orgs:
					org_list.append({"@value":cols[1]})
					orgs.append(cols[1])
				edu_obj = {
					"id": row[0],
					"@type": "教育经历demo",
					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
					"@contentType": "struct",
					"@markdel": "0",
					"name": [{"@value": item}],
					"时间": [{"@value":cols[0]}],
					"学校": [{"@value":cols[1]}],
					"专业": [{"@value":cols[2]}],
					"学历": [{"@value":cols[-1]}]
				}
				edu_list.append({"@value": item})
				post_json({
						"@type": "相关机构",
						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
						"@contentType": "struct",
						"@markdel": "0",
						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
						"name": [{"@value":cols[1]}],
					})
				post_json(edu_obj)
		for item in row[23].split():
			cols = item.split('/')
			if len(cols) > 3:
				if cols[1] not in orgs:
					org_list.append({"@value":cols[1]})
					orgs.append(cols[1])
				job_obj = {
					"id": row[0],
					"@type": "工作经历demo",
					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
					"name": [{"@value": item}],
					"@markdel": "0",
					"@contentType": "struct",
					"时间": [{"@value":cols[0]}],
					"公司": [{"@value":cols[1]}],
					"行业": [{"@value":cols[2]}],
					"职位": [{"@value":cols[3]}],
					"工作内容": [{"@value":cols[-1]}]
				}
				job_list.append({"@value": item})
				post_json({
						"@type": "相关机构",
						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"@contentType": "struct",
						"@markdel": "0",
						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"name": [{"@value":cols[1]}]
					})
				post_json(job_obj)
		for item in row[24].split():
			cols = item.split('/')
			if len(cols) > 4:
				if cols[1] not in orgs:
					org_list.append({"@value":cols[1]})
					orgs.append(cols[1])
				pro_obj = {
					"id": row[0],
					"@type": "项目经历demo",
					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
					"name": [{"@value": item}],
					"@markdel": "0",
					"@contentType": "struct",
					"时间": [{"@value":cols[0]}],
					"公司": [{"@value":cols[1]}],
					"项目": [{"@value":cols[2]}],
					"职位": [{"@value":cols[3]}],
					"成果": [{"@value":cols[-1]}]
				}
				pro_list.append({"@value": item})
				post_json({
						"@type": "相关机构",
						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"@contentType": "struct",
						"@markdel": "0",
						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"name": [{"@value":cols[1]}]
					})
				post_json(pro_obj)
		for item in row[34].split():
			cols = item.split('/')
			if len(cols) > 3:
				if cols[1] not in orgs:
					org_list.append({"@value":cols[1]})
					orgs.append(cols[1])
				tra_obj = {
					"id": row[0], 
					"@type": "培训和海外经历demo", 
					"@id": md5(item.encode(encoding='UTF-8')).hexdigest(), 
					"name": [{"@value":item}], 
					"@markdel": "0", 
					"@contentType": "struct", 
					"时间": [{"@value":cols[0]}], 
					"培训机构": [{"@value":cols[1]}],
					"培训名称": [{"@value":cols[2]}], 
					"培训内容": [{"@value":cols[-1]}]
				}
				tra_list.append({"@value": item})
				post_json({
						"@type": "相关机构",
						"@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"@contentType": "struct",
						"@markdel": "0",
						"id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
						"name": [{"@value":cols[1]}]
					})
				post_json(tra_obj)
		# break
		json_obj = {
			"@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID
			"id": row[0],# 自增 id
			"@type": "人才特征demo",# 数据类目
			"name": [{"@value":row[1]}],# 实体名称 消歧
			"@markdel": '0',# 写入/删除
			"@contentType": "struct",# 资源类型
			"姓名": [{"@value":row[1]}],
			"性别": [{"@value":row[2]}],
			"出生年月": [{"@value":row[3]}],
			"婚姻状况": [{"@value":row[4]}],
			"特长爱好": [{"@value":row[5]}],
			"手机号码": [{"@value":row[6]}],
			"电子邮箱": [{"@value":row[7]}],
			"当前最高学历": [{"@value":row[8]}],
			"当前最高学历专业": [{"@value":row[9]}],
			"研究领域": [{"@value":row[10]}],
			"人才标签": [{"@value":row[11]}],
			"人才特点": [{"@value":row[12]}],
			"参加工作时间": [{"@value":row[13]}],
			"政治面貌": [{"@value":row[14]}],
			"当前所在城市": [{"@value":row[15]}],
			"当前行业": [{"@value":row[16]}],
			"当前职位": [{"@value":row[17]}],
			"当前年薪": [{"@value":row[18]}],
			"意向城市": [{"@value":row[19]}],
			"意向职位": [{"@value":row[20]}],
			"意向年薪": [{"@value":row[21]}],
			"教育经历": edu_list,
			"工作经历": job_list,
			"项目经历": pro_list,
			"语言能力": [{"@value":row[25]}],
			"专业证书": [{"@value":row[26]}],
			"技术职称": [{"@value":row[27]}],
			"入选人才": [{"@value":row[28]}],
			"知识产权": [{"@value":row[29]}],
			"获得荣誉及证明": [{"@value":row[30]}],
			"备注信息": [{"@value":row[31]}],
			"对报名岗位认识及工作设想": [{"@value":row[32]}],
			"自我评价及主要工作业绩": [{"@value":row[33]}],
			"培训和海外经历": tra_list,
			"当前公司": [{"@value":row[35]}],
			"毕业院校分类": [{"@value":row[36]}],
			"工作年限": [{"@value":row[37]}],
			"专业方向大类": [{"@value":row[38]}],
			"最高学历学校": [{"@value":row[39]}],
			"研究领域分类": [{"@value":row[40]}],
			"报名岗位": [{"@value":row[41]}],
			"年龄": [{"@value":row[42]}],
			"相关机构": org_list,
		}
		post_json({
				"@type": "相关机构",
				"@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
				"@contentType": "struct",
				"@markdel": "0",
				"id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
				"name": [{"@value":row[35]}]
			})
		post_json({
				"@type": "相关机构",
				"@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
				"@contentType": "struct",
				"@markdel": "0",
				"id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
				"name": [{"@value":row[39]}],
			})
		post_json(json_obj)
		pushlen += 1
		if pushlen >= 600:
			break


if __name__ == '__main__':
	main()