# !/usr/bin/python # -*- coding: utf-8 -*- # @Author: sprivacy # @Date: 2022-05-05 10:18:56 # @Last Modified by: sprivacy # @Last Modified time: 2022-05-06 16:07:41 import sys import time import json from hashlib import md5 import requests from pprint import pprint import pandas as pd # from unicodedata import name # from uuid import NAMESPACE_URL # import locale # locale.setlocale(locale.LC_CTYPE, 'chinese') # reload(sys) # sys.setdefaultencoding("utf-8") # 各种知识集构建 global cookie, base_url, willpush, pushed, pushlen willpush = True cookie = "JSESSIONID=40225388-b817-471b-8fc0-7afb72389712" base_url = 'http://180.76.188.39:8284/' pushed = [] pushlen = 0 def post_json(json_obj, token="1654940290763"): """ 将json数据提交到push接口 :param json_obj: :return: """ global willpush, pushed, pushlen # 是否已推送 if json_obj['@id'] in pushed: print(len(pushed)) return '' # 推送 pushed.append(json_obj['@id']) url = base_url + "data/api/access/push" headers = { "token": token, "Cookie": cookie } # print(url) if pushlen < 300: return "" else: response = requests.post(url, json=json_obj, headers=headers) result = response.text print(result) pass if json_obj["@type"] == "相关机构": print(json_obj) time.sleep(0.1) return "" def main(): global pushlen df = pd.read_excel('xxx3.xlsx', sheet_name='Sheet1') df = df.fillna(value="") df['id'] = df['id'].apply(str) df['当前年薪(单位:万)'] = df['当前年薪(单位:万)'].apply(str) df['意向年薪(单位:万)'] = df['意向年薪(单位:万)'].apply(str) df['工作年限'] = df['工作年限'].apply(str) df['年龄'] = df['年龄'].apply(str) for row in df.iloc: edu_list = [] job_list = [] pro_list = [] tra_list = [] org_list = [] orgs = [] for item in row[22].split(): cols = item.split('/') if len(cols) > 3: if cols[1] not in orgs: org_list.append({"@value":cols[1]}) orgs.append(cols[1]) edu_obj = { "id": row[0], "@type": "教育经历demo", "@id": md5(item.encode(encoding='UTF-8')).hexdigest(), "@contentType": "struct", "@markdel": "0", "name": [{"@value": item}], "时间": [{"@value":cols[0]}], "学校": [{"@value":cols[1]}], "专业": [{"@value":cols[2]}], "学历": [{"@value":cols[-1]}] } edu_list.append({"@value": item}) post_json({ "@type": "相关机构", "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id "@contentType": "struct", "@markdel": "0", "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(),# 学校名作为 id "name": [{"@value":cols[1]}], }) post_json(edu_obj) for item in row[23].split(): cols = item.split('/') if len(cols) > 3: if cols[1] not in orgs: org_list.append({"@value":cols[1]}) orgs.append(cols[1]) job_obj = { "id": row[0], "@type": "工作经历demo", "@id": md5(item.encode(encoding='UTF-8')).hexdigest(), "name": [{"@value": item}], "@markdel": "0", "@contentType": "struct", "时间": [{"@value":cols[0]}], "公司": [{"@value":cols[1]}], "行业": [{"@value":cols[2]}], "职位": [{"@value":cols[3]}], "工作内容": [{"@value":cols[-1]}] } job_list.append({"@value": item}) post_json({ "@type": "相关机构", "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "@contentType": "struct", "@markdel": "0", "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "name": [{"@value":cols[1]}] }) post_json(job_obj) for item in row[24].split(): cols = item.split('/') if len(cols) > 4: if cols[1] not in orgs: org_list.append({"@value":cols[1]}) orgs.append(cols[1]) pro_obj = { "id": row[0], "@type": "项目经历demo", "@id": md5(item.encode(encoding='UTF-8')).hexdigest(), "name": [{"@value": item}], "@markdel": "0", "@contentType": "struct", "时间": [{"@value":cols[0]}], "公司": [{"@value":cols[1]}], "项目": [{"@value":cols[2]}], "职位": [{"@value":cols[3]}], "成果": [{"@value":cols[-1]}] } pro_list.append({"@value": item}) post_json({ "@type": "相关机构", "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "@contentType": "struct", "@markdel": "0", "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "name": [{"@value":cols[1]}] }) post_json(pro_obj) for item in row[34].split(): cols = item.split('/') if len(cols) > 3: if cols[1] not in orgs: org_list.append({"@value":cols[1]}) orgs.append(cols[1]) tra_obj = { "id": row[0], "@type": "培训和海外经历demo", "@id": md5(item.encode(encoding='UTF-8')).hexdigest(), "name": [{"@value":item}], "@markdel": "0", "@contentType": "struct", "时间": [{"@value":cols[0]}], "培训机构": [{"@value":cols[1]}], "培训名称": [{"@value":cols[2]}], "培训内容": [{"@value":cols[-1]}] } tra_list.append({"@value": item}) post_json({ "@type": "相关机构", "@id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "@contentType": "struct", "@markdel": "0", "id": md5(cols[1].encode(encoding='UTF-8')).hexdigest(), "name": [{"@value":cols[1]}] }) post_json(tra_obj) # break json_obj = { "@id": md5(row[0].encode(encoding='UTF-8')).hexdigest(),# 实体 id 页面显示 ID "id": row[0],# 自增 id "@type": "人才特征demo",# 数据类目 "name": [{"@value":row[1]}],# 实体名称 消歧 "@markdel": '0',# 写入/删除 "@contentType": "struct",# 资源类型 "姓名": [{"@value":row[1]}], "性别": [{"@value":row[2]}], "出生年月": [{"@value":row[3]}], "婚姻状况": [{"@value":row[4]}], "特长爱好": [{"@value":row[5]}], "手机号码": [{"@value":row[6]}], "电子邮箱": [{"@value":row[7]}], "当前最高学历": [{"@value":row[8]}], "当前最高学历专业": [{"@value":row[9]}], "研究领域": [{"@value":row[10]}], "人才标签": [{"@value":row[11]}], "人才特点": [{"@value":row[12]}], "参加工作时间": [{"@value":row[13]}], "政治面貌": [{"@value":row[14]}], "当前所在城市": [{"@value":row[15]}], "当前行业": [{"@value":row[16]}], "当前职位": [{"@value":row[17]}], "当前年薪": [{"@value":row[18]}], "意向城市": [{"@value":row[19]}], "意向职位": [{"@value":row[20]}], "意向年薪": [{"@value":row[21]}], "教育经历": edu_list, "工作经历": job_list, "项目经历": pro_list, "语言能力": [{"@value":row[25]}], "专业证书": [{"@value":row[26]}], "技术职称": [{"@value":row[27]}], "入选人才": [{"@value":row[28]}], "知识产权": [{"@value":row[29]}], "获得荣誉及证明": [{"@value":row[30]}], "备注信息": [{"@value":row[31]}], "对报名岗位认识及工作设想": [{"@value":row[32]}], "自我评价及主要工作业绩": [{"@value":row[33]}], "培训和海外经历": tra_list, "当前公司": [{"@value":row[35]}], "毕业院校分类": [{"@value":row[36]}], "工作年限": [{"@value":row[37]}], "专业方向大类": [{"@value":row[38]}], "最高学历学校": [{"@value":row[39]}], "研究领域分类": [{"@value":row[40]}], "报名岗位": [{"@value":row[41]}], "年龄": [{"@value":row[42]}], "相关机构": org_list, } # post_json({ # "@type": "相关机构", # "@id": md5(row[35].encode(encoding='UTF-8')).hexdigest(), # "@contentType": "struct", # "@markdel": "0", # "id": md5(row[35].encode(encoding='UTF-8')).hexdigest(), # "name": [{"@value":row[35]}] # }) # post_json({ # "@type": "相关机构", # "@id": md5(row[39].encode(encoding='UTF-8')).hexdigest(), # "@contentType": "struct", # "@markdel": "0", # "id": md5(row[39].encode(encoding='UTF-8')).hexdigest(), # "name": [{"@value":row[39]}], # }) post_json(json_obj) pushlen += 1 if __name__ == '__main__': main()