123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- # !/usr/bin/python
- # -*- coding: utf-8 -*-
- # @Author: sprivacy
- # @Date: 2022-05-05 10:18:56
- # @Last Modified by: privacy
- # @Last Modified time: 2022-10-13 14:23:31
- import sys
- import time
- import json
- from hashlib import md5
- import requests
- from pprint import pprint
- import pandas as pd
- # 各种知识集构建
- global cookie, base_url, willpush, pushed, pushlen
- willpush = True
- cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712"
- base_url = 'http://172.16.128.159:8284/'
- pushed = []
- pushlen = 0
- def post_json(json_obj, token="1664516531417"):
- """
- 将json数据提交到push接口
- :param json_obj:
- :return:
- """
- global willpush, pushed, pushlen
- # 是否已推送
- if json_obj['@id'] in pushed:
- print(len(pushed))
- return ''
- # 推送
- pushed.append(json_obj['@id'])
- url = base_url + "data/api/access/push"
- headers = {
- "token": token,
- "Cookie": cookie
- }
- # print(url)
- if pushlen <= 500:
- return ""
- else:
- response = requests.post(url, json=json_obj, headers=headers)
- result = response.text
- print(result)
- pass
- if json_obj["@type"] == "相关机构":
- print(json_obj)
- time.sleep(0.1)
- return ""
- def main():
- global pushlen
- df = pd.read_excel('../xxx3.xlsx', sheet_name='Sheet1')
- df = df.fillna(value="")
- df['id'] = df['id'].apply(str)
- df['当前年薪(单位:万)'] = df['当前年薪(单位:万)'].apply(str)
- df['意向年薪(单位:万)'] = df['意向年薪(单位:万)'].apply(str)
- df['工作年限'] = df['工作年限'].apply(str)
- df['年龄'] = df['年龄'].apply(str)
- for row in df.iloc:
- edu_list = []
- job_list = []
- pro_list = []
- tra_list = []
- org_list = []
- orgs = []
- for item in row[22].split():
- cols = item.split('/')
- if len(cols) > 3:
- if cols[1] not in orgs:
- org_list.append({"@value":cols[1]})
- orgs.append(cols[1])
- edu_obj = {
- "id": row[0],
- "@type": "教育经历demo",
- "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "name": [{"@value": item}],
- "时间": [{"@value":cols[0]}],
- "学校": [{"@value":cols[1]}],
- "专业": [{"@value":cols[2]}],
- "学历": [{"@value":cols[-1]}]
- }
- edu_list.append({"@value": item})
- post_json({
- "@type": "相关机构",
- "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id
- "name": [{"@value":cols[1]}],
- })
- post_json(edu_obj)
- for item in row[23].split():
- cols = item.split('/')
- if len(cols) > 3:
- if cols[1] not in orgs:
- org_list.append({"@value":cols[1]})
- orgs.append(cols[1])
- job_obj = {
- "id": row[0],
- "@type": "工作经历demo",
- "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value": item}],
- "@markdel": "0",
- "@contentType": "struct",
- "时间": [{"@value":cols[0]}],
- "公司": [{"@value":cols[1]}],
- "行业": [{"@value":cols[2]}],
- "职位": [{"@value":cols[3]}],
- "工作内容": [{"@value":cols[-1]}]
- }
- job_list.append({"@value": item})
- post_json({
- "@type": "相关机构",
- "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":cols[1]}]
- })
- post_json(job_obj)
- for item in row[24].split():
- cols = item.split('/')
- if len(cols) > 4:
- if cols[1] not in orgs:
- org_list.append({"@value":cols[1]})
- orgs.append(cols[1])
- pro_obj = {
- "id": row[0],
- "@type": "项目经历demo",
- "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value": item}],
- "@markdel": "0",
- "@contentType": "struct",
- "时间": [{"@value":cols[0]}],
- "公司": [{"@value":cols[1]}],
- "项目": [{"@value":cols[2]}],
- "职位": [{"@value":cols[3]}],
- "成果": [{"@value":cols[-1]}]
- }
- pro_list.append({"@value": item})
- post_json({
- "@type": "相关机构",
- "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":cols[1]}]
- })
- post_json(pro_obj)
- for item in row[34].split():
- cols = item.split('/')
- if len(cols) > 3:
- if cols[1] not in orgs:
- org_list.append({"@value":cols[1]})
- orgs.append(cols[1])
- tra_obj = {
- "id": row[0],
- "@type": "培训和海外经历demo",
- "@id": md5(item.encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":item}],
- "@markdel": "0",
- "@contentType": "struct",
- "时间": [{"@value":cols[0]}],
- "培训机构": [{"@value":cols[1]}],
- "培训名称": [{"@value":cols[2]}],
- "培训内容": [{"@value":cols[-1]}]
- }
- tra_list.append({"@value": item})
- post_json({
- "@type": "相关机构",
- "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":cols[1]}]
- })
- post_json(tra_obj)
- # break
- json_obj = {
- "@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID
- "id": row[0],# 自增 id
- "@type": "人才特征demo",# 数据类目
- "name": [{"@value":row[1]}],# 实体名称 消歧
- "@markdel": '0',# 写入/删除
- "@contentType": "struct",# 资源类型
- "姓名": [{"@value":row[1]}],
- "性别": [{"@value":row[2]}],
- "出生年月": [{"@value":row[3]}],
- "婚姻状况": [{"@value":row[4]}],
- "特长爱好": [{"@value":row[5]}],
- "手机号码": [{"@value":row[6]}],
- "电子邮箱": [{"@value":row[7]}],
- "当前最高学历": [{"@value":row[8]}],
- "当前最高学历专业": [{"@value":row[9]}],
- "研究领域": [{"@value":row[10]}],
- "人才标签": [{"@value":row[11]}],
- "人才特点": [{"@value":row[12]}],
- "参加工作时间": [{"@value":row[13]}],
- "政治面貌": [{"@value":row[14]}],
- "当前所在城市": [{"@value":row[15]}],
- "当前行业": [{"@value":row[16]}],
- "当前职位": [{"@value":row[17]}],
- "当前年薪": [{"@value":row[18]}],
- "意向城市": [{"@value":row[19]}],
- "意向职位": [{"@value":row[20]}],
- "意向年薪": [{"@value":row[21]}],
- "教育经历": edu_list,
- "工作经历": job_list,
- "项目经历": pro_list,
- "语言能力": [{"@value":row[25]}],
- "专业证书": [{"@value":row[26]}],
- "技术职称": [{"@value":row[27]}],
- "入选人才": [{"@value":row[28]}],
- "知识产权": [{"@value":row[29]}],
- "获得荣誉及证明": [{"@value":row[30]}],
- "备注信息": [{"@value":row[31]}],
- "对报名岗位认识及工作设想": [{"@value":row[32]}],
- "自我评价及主要工作业绩": [{"@value":row[33]}],
- "培训和海外经历": tra_list,
- "当前公司": [{"@value":row[35]}],
- "毕业院校分类": [{"@value":row[36]}],
- "工作年限": [{"@value":row[37]}],
- "专业方向大类": [{"@value":row[38]}],
- "最高学历学校": [{"@value":row[39]}],
- "研究领域分类": [{"@value":row[40]}],
- "报名岗位": [{"@value":row[41]}],
- "年龄": [{"@value":row[42]}],
- "相关机构": org_list,
- }
- post_json({
- "@type": "相关机构",
- "@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(row[35].encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":row[35]}]
- })
- post_json({
- "@type": "相关机构",
- "@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
- "@contentType": "struct",
- "@markdel": "0",
- "id": md5(row[39].encode(encoding='UTF-8')).hexdigest(),
- "name": [{"@value":row[39]}],
- })
- post_json(json_obj)
- pushlen += 1
- if pushlen >= 600:
- break
- if __name__ == '__main__':
- main()
|