|
@@ -8,6 +8,7 @@ import sys
|
|
import re
|
|
import re
|
|
import json
|
|
import json
|
|
import time
|
|
import time
|
|
|
|
+import platform
|
|
from os import walk
|
|
from os import walk
|
|
import subprocess
|
|
import subprocess
|
|
import rarfile
|
|
import rarfile
|
|
@@ -33,62 +34,50 @@ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
import pdfplumber
|
|
import pdfplumber
|
|
from paddlenlp import Taskflow
|
|
from paddlenlp import Taskflow
|
|
|
|
|
|
-class Logger:
|
|
|
|
- def __init__(self, name: str, console_handler_level: str = logging.INFO, fmt: str = '%(asctime)s: %(name)s: %(levelname)s: %(filename)s: %(funcName)s: %(message)s'):
|
|
|
|
- self.logger = logging.getLogger(name)
|
|
|
|
- self.logger.setLevel(logging.INFO)
|
|
|
|
- self.fmt = logging.Formatter(fmt)
|
|
|
|
- self.set_console_handler(console_handler_level)
|
|
|
|
|
|
|
|
- def set_console_handler(self, console_handler_level: str = logging.INFO) -> None:
|
|
|
|
- ch = logging.StreamHandler()
|
|
|
|
- ch.setLevel(console_handler_level)
|
|
|
|
- ch.setFormatter(self.fmt)
|
|
|
|
- self.logger.addHandler(ch)
|
|
|
|
|
|
+from logger import Logger
|
|
|
|
+logger = Logger("resume_parse")
|
|
|
|
+logger.set_file_handler(filename='journal.log')
|
|
|
|
|
|
- def set_file_handler(self, filename: str, mode: str = "a", file_handler_level: str = logging.WARNING) -> None:
|
|
|
|
- fh = logging.FileHandler(filename, mode=mode, encoding='utf-8')
|
|
|
|
- fh.setLevel(file_handler_level)
|
|
|
|
- fh.setFormatter(self.fmt)
|
|
|
|
- self.logger.addHandler(fh)
|
|
|
|
|
|
|
|
- def debug(self, msg):
|
|
|
|
- self.logger.debug(msg)
|
|
|
|
|
|
+from rich.console import Console
|
|
|
|
+console = Console()
|
|
|
|
|
|
- def info(self, msg):
|
|
|
|
- self.logger.info(msg)
|
|
|
|
|
|
|
|
- def warning(self, msg):
|
|
|
|
- self.logger.warning(msg)
|
|
|
|
|
|
+global ner, ner_tag, base_info_ie, prize_ie, cet_ie, pro_ie, block, block_rev
|
|
|
|
|
|
- def error(self, msg):
|
|
|
|
- self.logger.error(msg)
|
|
|
|
|
|
|
|
- def critical(self, msg):
|
|
|
|
- self.logger.critical(msg)
|
|
|
|
|
|
+if not locals().get("ner"):
|
|
|
|
+ ner = Taskflow("ner", mode='fast')
|
|
|
|
+if not locals().get("ner_tag"):
|
|
|
|
+ ner_tag = Taskflow("ner")
|
|
|
|
+if not locals().get("base_info_ie"):
|
|
|
|
+ base_info_ie = Taskflow('information_extraction', schema=["姓名","性别","婚姻状况","邮箱地址","政治面貌","手机号码","籍贯","出生日期","现任职务","参加工作时间","英语水平","计算机水平","工作年限","当前单位","所在城市","职业资格"])
|
|
|
|
+if not locals().get("prize_ie"):
|
|
|
|
+ prize_ie = Taskflow('information_extraction', schema=["时间", "奖项"])
|
|
|
|
+if not locals().get("cet_ie"):
|
|
|
|
+ cet_ie = Taskflow('information_extraction', schema=["时间","证书"])
|
|
|
|
+if not locals().get("pro_ie"):
|
|
|
|
+ pro_ie = Taskflow("information_extraction", schema=["时间","项目名称","机构","职位"], task_path='./model_100')
|
|
|
|
|
|
-logger = Logger("resume_parse")
|
|
|
|
-logger.set_file_handler(filename='data.log')
|
|
|
|
|
|
+if not locals().get("block"):
|
|
|
|
+ with open("resources/SegmentName.json", "r", encoding="utf-8") as fp:
|
|
|
|
+ block = json.load(fp)
|
|
|
|
|
|
-from rich.console import Console
|
|
|
|
-console = Console()
|
|
|
|
|
|
+if not locals().get("block_rev"):
|
|
|
|
+ block_rev = {1:"基本信息", 2:"求职意向", 3:"教育经历", 4:"工作经历", 5:"项目经历", 6:"专业技能", 7:"intro", 8:"兴趣爱好", 9:"语言能力", 10:"证书", 11:"获奖情况", 12:"培训经历", 13:"家庭成员", "other":"其他"}
|
|
|
|
|
|
import uvicorn
|
|
import uvicorn
|
|
from fastapi import BackgroundTasks, FastAPI, File, UploadFile
|
|
from fastapi import BackgroundTasks, FastAPI, File, UploadFile
|
|
app = FastAPI()
|
|
app = FastAPI()
|
|
|
|
|
|
-ner = Taskflow("ner", mode='fast')
|
|
|
|
-ner_tag = Taskflow("ner")
|
|
|
|
-base_info_ie = Taskflow('information_extraction', schema=["姓名","性别","婚姻状况","电子邮箱","政治面貌","手机号码","籍贯","出生日期","现任职务","参加工作时间","英语水平","计算机水平","工作年限","当前单位","所在城市","职业资格"])
|
|
|
|
-prize_ie = Taskflow('information_extraction', schema=["时间", "奖项"])
|
|
|
|
-cet_ie = Taskflow('information_extraction', schema=["时间","证书"])
|
|
|
|
-pro_ie = Taskflow("information_extraction", schema=["时间","项目名称","机构","职位"], task_path='./model_100')
|
|
|
|
-global block, block_rev
|
|
|
|
-
|
|
|
|
-with open("resources/SegmentName.json", "r", encoding="utf-8") as fp:
|
|
|
|
- block = json.load(fp)
|
|
|
|
-block_rev = {1:"基本信息", 2:"求职意向", 3:"教育经历", 4:"工作经历", 5:"项目经历", 6:"专业技能", 7:"intro", 8:"兴趣爱好", 9:"语言能力", 10:"证书", 11:"获奖情况", 12:"培训经历", 13:"家庭成员", "other":"其他"}
|
|
|
|
|
|
|
|
|
|
+if not os.path.exists("./uploads"):
|
|
|
|
+ os.mkdir("./uploads")
|
|
|
|
+if not os.path.exists("./pdf"):
|
|
|
|
+ os.mkdir("./pdf")
|
|
|
|
+if not os.path.exists("./cache"):
|
|
|
|
+ os.mkdir("./cache")
|
|
|
|
|
|
|
|
|
|
# 基本信息(旧版)
|
|
# 基本信息(旧版)
|
|
@@ -165,22 +154,6 @@ def get_base_info(lines):
|
|
rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
|
|
rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
|
|
elif len(dates) == 3:
|
|
elif len(dates) == 3:
|
|
rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
|
|
rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
|
|
- normal = {
|
|
|
|
- "姓名":"name",
|
|
|
|
- "性别":"gender",
|
|
|
|
- "电子邮箱":"email",
|
|
|
|
- "政治面貌":"politics",
|
|
|
|
- "手机号码":"mobile",
|
|
|
|
- "籍贯":"birthplace",
|
|
|
|
- "出生日期":"birth_time",
|
|
|
|
- "现任职务":"current_job",
|
|
|
|
- "所在城市":"living_city",
|
|
|
|
- "参加工作时间":"work_begin_time",
|
|
|
|
- }
|
|
|
|
- for key in normal.keys():
|
|
|
|
- if rst.get(key):
|
|
|
|
- rst[normal[key]] = rst[key]
|
|
|
|
- del rst[key]
|
|
|
|
return {key:rst[key][0]["text"] for key in rst.keys()}
|
|
return {key:rst[key][0]["text"] for key in rst.keys()}
|
|
|
|
|
|
|
|
|
|
@@ -961,19 +934,19 @@ def get_lag_list(lines):
|
|
lan_list = []
|
|
lan_list = []
|
|
re_lan = re.compile(r'(\w+[语话])')
|
|
re_lan = re.compile(r'(\w+[语话])')
|
|
re_lev = re.compile(r'([公共级四专八]+)')
|
|
re_lev = re.compile(r'([公共级四专八]+)')
|
|
- lag_dict = {'lan_name':'', 'level':""}
|
|
|
|
|
|
+ lag_dict = {'语言':'', '熟练度':""}
|
|
for l in lines:
|
|
for l in lines:
|
|
if not l.strip():
|
|
if not l.strip():
|
|
continue
|
|
continue
|
|
lan_name = re.search(re_lan, l)
|
|
lan_name = re.search(re_lan, l)
|
|
lag_lev = re.search(re_lev, l)
|
|
lag_lev = re.search(re_lev, l)
|
|
if lag_lev and lag_lev.group(1):
|
|
if lag_lev and lag_lev.group(1):
|
|
- lag_dict["level"] = lag_lev.group(1)
|
|
|
|
|
|
+ lag_dict["熟练度"] = lag_lev.group(1)
|
|
if lan_name and lan_name.group(1):
|
|
if lan_name and lan_name.group(1):
|
|
- if lag_dict["lan_name"]:
|
|
|
|
|
|
+ if lag_dict["语言"]:
|
|
lan_list.append(lag_dict)
|
|
lan_list.append(lag_dict)
|
|
- lag_dict = {'lan_name':'', 'level':""}
|
|
|
|
- lag_dict['lan_name'] = lan_name.group(1)
|
|
|
|
|
|
+ lag_dict = {'语言':'', '熟练度':""}
|
|
|
|
+ lag_dict['语言'] = lan_name.group(1)
|
|
return lan_list
|
|
return lan_list
|
|
|
|
|
|
|
|
|
|
@@ -1151,7 +1124,7 @@ def parse_txt(path, save_dir):
|
|
page = {1: []}
|
|
page = {1: []}
|
|
if len(data.split("\n")) <= 2:
|
|
if len(data.split("\n")) <= 2:
|
|
for line in data.split("\n"):
|
|
for line in data.split("\n"):
|
|
- line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").strip()
|
|
|
|
|
|
+ line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").replace("·","").strip()
|
|
for word in line.split():
|
|
for word in line.split():
|
|
if word in block.keys():
|
|
if word in block.keys():
|
|
chun = block[word]
|
|
chun = block[word]
|
|
@@ -1160,7 +1133,7 @@ def parse_txt(path, save_dir):
|
|
page[chun].append(word)
|
|
page[chun].append(word)
|
|
else:
|
|
else:
|
|
for line in data.split("\n"):
|
|
for line in data.split("\n"):
|
|
- line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历")
|
|
|
|
|
|
+ line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").replace("·","")
|
|
regex = re.compile(u'[\u3000]+',re.UNICODE)
|
|
regex = re.compile(u'[\u3000]+',re.UNICODE)
|
|
line = regex.sub('', line.strip())
|
|
line = regex.sub('', line.strip())
|
|
if line in block.keys():
|
|
if line in block.keys():
|
|
@@ -1169,14 +1142,14 @@ def parse_txt(path, save_dir):
|
|
elif line:
|
|
elif line:
|
|
page[chun].append(line)
|
|
page[chun].append(line)
|
|
|
|
|
|
- result_data = []
|
|
|
|
|
|
+ result_data = dict()
|
|
for key in page.keys():
|
|
for key in page.keys():
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
if key == index:
|
|
if key == index:
|
|
- result_data.append({block_rev[index]:func(page[index])})
|
|
|
|
|
|
+ result_data[block_rev[index]] = func(page[index])
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
- json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
+ json.dump(result_data, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
# 纯文本 word 解析
|
|
# 纯文本 word 解析
|
|
@@ -1198,14 +1171,14 @@ def read_from_word(doc, path, save_dir):
|
|
elif line:
|
|
elif line:
|
|
page[chun].append(line)
|
|
page[chun].append(line)
|
|
|
|
|
|
- result_data = []
|
|
|
|
|
|
+ result_data = dict()
|
|
for key in page.keys():
|
|
for key in page.keys():
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
if key == index:
|
|
if key == index:
|
|
- result_data.append({block_rev[index]:func(page[index])})
|
|
|
|
|
|
+ result_data[block_rev[index]] = func(page[index])
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
- json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
+ json.dump(result_data, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
# 提取 word 表格(已完成)
|
|
# 提取 word 表格(已完成)
|
|
@@ -1267,14 +1240,14 @@ def check_word(path, save_dir):
|
|
line = line.replace(k+"\n", k+":")
|
|
line = line.replace(k+"\n", k+":")
|
|
page[chun].extend(line.split())
|
|
page[chun].extend(line.split())
|
|
|
|
|
|
- result_data = []
|
|
|
|
|
|
+ result_data = dict()
|
|
for key in page.keys():
|
|
for key in page.keys():
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
if key == index:
|
|
if key == index:
|
|
- result_data.append({block_rev[index]:func(page[index])})
|
|
|
|
|
|
+ result_data[block_rev[index]] = func(page[index])
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
- json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
+ json.dump(result_data, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
# pdf 解析句子(已完成)
|
|
# pdf 解析句子(已完成)
|
|
@@ -1340,15 +1313,15 @@ def read_from_pdf(path, save_dir):
|
|
result[key].extend(r[key])
|
|
result[key].extend(r[key])
|
|
else:
|
|
else:
|
|
result[key] = r[key]
|
|
result[key] = r[key]
|
|
- result_data = []
|
|
|
|
|
|
+ result_data = dict()
|
|
for key in result.keys():
|
|
for key in result.keys():
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
if key == index:
|
|
if key == index:
|
|
- result_data.append({block_rev[index]:func(result[index])})
|
|
|
|
|
|
+ result_data[block_rev[index]] = func(result[index])
|
|
|
|
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
- json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
+ json.dump(result_data, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
# pdf 表格解析 (已完成)
|
|
# pdf 表格解析 (已完成)
|
|
@@ -1395,14 +1368,14 @@ def parse_table_from_pdf(path, save_dir):
|
|
line = line.replace(k+"\n", k+":")
|
|
line = line.replace(k+"\n", k+":")
|
|
page[chun].extend(line.split())
|
|
page[chun].extend(line.split())
|
|
|
|
|
|
- result_data = []
|
|
|
|
|
|
+ result_data = dict()
|
|
for key in page.keys():
|
|
for key in page.keys():
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
|
|
if key == index:
|
|
if key == index:
|
|
- result_data.append({block_rev[index]:func(page[index])})
|
|
|
|
|
|
+ result_data[block_rev[index]] = func(page[index])
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
|
|
- json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
+ json.dump(result_data, fp, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
# 检测 pdf 格式 (已完成)
|
|
# 检测 pdf 格式 (已完成)
|
|
@@ -1437,6 +1410,54 @@ def decode_path(path):
|
|
return path_name
|
|
return path_name
|
|
|
|
|
|
|
|
|
|
|
|
+# 结果返回
|
|
|
|
+def push_back(tempdir):
|
|
|
|
+ for file in os.listdir(tempdir):
|
|
|
|
+ filename = os.path.join(tempdir, file)
|
|
|
|
+ with open(filename, "r", encoding="utf-8") as ff:
|
|
|
|
+ rst = json.load(ff)
|
|
|
|
+
|
|
|
|
+ with open("./resources/translate.json", "r", encoding="utf-8") as ft:
|
|
|
|
+ json_obj = json.load(ft)
|
|
|
|
+
|
|
|
|
+ for key in json_obj["base"].keys():
|
|
|
|
+ if rst["result"].get("基本信息"):
|
|
|
|
+ if rst["result"]["基本信息"].get(key):
|
|
|
|
+ rst["result"]["基本信息"][json_obj["base"][key]] = rst["result"]["基本信息"][key]
|
|
|
|
+ del rst["result"]["基本信息"][key]
|
|
|
|
+ if rst["result"].get("求职意向"):
|
|
|
|
+ if rst["result"]["求职意向"].get(key):
|
|
|
|
+ rst["result"]["求职意向"][json_obj["base"][key]] = rst["result"]["求职意向"][key]
|
|
|
|
+ del rst["result"]["求职意向"][key]
|
|
|
|
+
|
|
|
|
+ for key in json_obj["tal_vocational_qualification_certificate"].keys():
|
|
|
|
+ if rst["result"].get("证书"):
|
|
|
|
+ for idx in range(len(rst["result"]["证书"])):
|
|
|
|
+ if rst["result"]["证书"][idx].get(key):
|
|
|
|
+ rst["result"]["证书"][idx][json_obj["tal_vocational_qualification_certificate"][key]] = rst["result"]["证书"][idx][key]
|
|
|
|
+ del rst["result"]["证书"][idx][key]
|
|
|
|
+
|
|
|
|
+ for key in json_obj["tal_language"].keys():
|
|
|
|
+ if rst["result"].get("语言能力"):
|
|
|
|
+ for idx in range(len(rst["result"]["语言能力"])):
|
|
|
|
+ if rst["result"]["语言能力"][idx].get(key):
|
|
|
|
+ rst["result"]["语言能力"][idx][json_obj["tal_language"][key]] = rst["result"]["语言能力"][idx][key]
|
|
|
|
+ del rst["result"]["语言能力"][idx][key]
|
|
|
|
+
|
|
|
|
+ # url = "http://192.168.1.110:9999/talent/getResumeData"
|
|
|
|
+ # session = requests.Session()
|
|
|
|
+ # session.mount('http://', HTTPAdapter(max_retries = 3))
|
|
|
|
+ # try:
|
|
|
|
+ # headers = {
|
|
|
|
+ # 'contentType':'Application/json'
|
|
|
|
+ # }
|
|
|
|
+ # response = session.post(url=url, headers=headers, json={"ResumeData":rst}, timeout=10)
|
|
|
|
+ # print(response.text)
|
|
|
|
+ # except Exception as e:
|
|
|
|
+ # print(e)
|
|
|
|
+ console.print(rst, style="red", justify="left")
|
|
|
|
+
|
|
|
|
+
|
|
# 检测传入格式(已完成)
|
|
# 检测传入格式(已完成)
|
|
def detection_type(path, system):
|
|
def detection_type(path, system):
|
|
tempdir = time.strftime("%Y_%m_%dT%H_%M_%S")
|
|
tempdir = time.strftime("%Y_%m_%dT%H_%M_%S")
|
|
@@ -1528,23 +1549,9 @@ def detection_type(path, system):
|
|
# 传入为 txt
|
|
# 传入为 txt
|
|
elif os.path.isfile(filename) and filename.endswith('.txt'):
|
|
elif os.path.isfile(filename) and filename.endswith('.txt'):
|
|
parse_txt(filename, save_dir=tempdir)
|
|
parse_txt(filename, save_dir=tempdir)
|
|
- # 结果返回
|
|
|
|
- for file in os.listdir(tempdir):
|
|
|
|
- filename = os.path.join(tempdir, file)
|
|
|
|
- with open(filename, "r", encoding="utf-8") as ff:
|
|
|
|
- rst = json.load(ff)
|
|
|
|
- url = "http://192.168.1.110:9999/talent/getResumeData"
|
|
|
|
- session = requests.Session()
|
|
|
|
- session.mount('http://', HTTPAdapter(max_retries = 3))
|
|
|
|
- try:
|
|
|
|
- headers = {
|
|
|
|
- 'contentType':'Application/json'
|
|
|
|
- }
|
|
|
|
- response = session.post(url=url, headers=headers, json={"ResumeData":rst}, timeout=10)
|
|
|
|
- print(response.text)
|
|
|
|
- except Exception as e:
|
|
|
|
- print(e)
|
|
|
|
- console.print(rst, style="red", justify="left")
|
|
|
|
|
|
+
|
|
|
|
+ push_back(tempdir)
|
|
|
|
+
|
|
|
|
|
|
|
|
|
|
@app.post("/resume_parse")
|
|
@app.post("/resume_parse")
|
|
@@ -1556,24 +1563,9 @@ async def file_upload(background_tasks: BackgroundTasks, file: UploadFile = File
|
|
res = await file.read()
|
|
res = await file.read()
|
|
with open('./uploads/' + file.filename, "wb") as f:
|
|
with open('./uploads/' + file.filename, "wb") as f:
|
|
f.write(res)
|
|
f.write(res)
|
|
- background_tasks.add_task(detection_type, './uploads/' + file.filename, system)
|
|
|
|
|
|
+ background_tasks.add_task(detection_type, './uploads/' + file.filename, platform.system())
|
|
return {"errno": 0, "msg": "Upload Success"}
|
|
return {"errno": 0, "msg": "Upload Success"}
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
- import platform
|
|
|
|
- system = platform.system()
|
|
|
|
- if (system == "Windows"):
|
|
|
|
- logger.info("Windows")
|
|
|
|
- elif (system == "Linux"):
|
|
|
|
- logger.info("Linux")
|
|
|
|
- else:
|
|
|
|
- logger.error("Unnot support this system")
|
|
|
|
- if not os.path.exists("./uploads"):
|
|
|
|
- os.mkdir("./uploads")
|
|
|
|
- if not os.path.exists("./pdf"):
|
|
|
|
- os.mkdir("./pdf")
|
|
|
|
- if not os.path.exists("./cache"):
|
|
|
|
- os.mkdir("./cache")
|
|
|
|
-
|
|
|
|
- uvicorn.run(app=app, host="0.0.0.0", port=8320)
|
|
|
|
|
|
+ uvicorn.run(app="resume_parse:app", host="0.0.0.0", port=8320, reload=True, log_level="info")
|