소스 검색

modified: custom.py
modified: irafa.py
new file: logger.py
new file: resources/translate.json
modified: resume_parse.py

sprivacy 3 년 전
부모
커밋
a0afd8f21c
5개의 변경된 파일324개의 추가작업 그리고 182개의 파일을 삭제
  1. 61 60
      tools/custom.py
  2. 33 9
      tools/irafa.py
  3. 40 0
      tools/logger.py
  4. 85 0
      tools/resources/translate.json
  5. 105 113
      tools/resume_parse.py

+ 61 - 60
tools/custom.py

@@ -2,11 +2,12 @@
 # @Author: privacy
 # @Date:   2022-07-11 09:21:24
 # @Last Modified by:   privacy
-# @Last Modified time: 2022-07-13 15:31:50
+# @Last Modified time: 2022-07-14 11:00:31
 
 # 自定义模板
 
 import re
+import json
 import logging
 from pprint import pprint
 import requests
@@ -16,8 +17,8 @@ from docx.shared import Inches
 
 
 path = "d:\\desktop\\自定义.docx"
-# path = "d:\\desktop\\内部人才市场简历模板.docx"
 
+# 关键词字典
 keywords = [
 	"姓名",
 	"性别",
@@ -76,9 +77,19 @@ keywords = [
 	"其他情况说明",
 	"工作单位及职务",
 	"政治面貌",
-	"职业证书", "资格等级", "取得日期", "学校/培训机构", "专业", "起始时间", "毕业时间", "姓名", "职业", "与本人关系", "计算机水平"
+	"职业证书",
+    "资格等级",
+    "取得日期",
+    "学校/培训机构",
+    "专业",
+    "起始时间",
+    "毕业时间",
+    "职业",
+    "与本人关系",
+    "计算机水平"
 ]
 
+# 解析行内元素
 def parse_line(line):
     result = []
     key = None
@@ -92,50 +103,43 @@ def parse_line(line):
     return result
 
 
+# 解析文档布局
 def parse_layout(path):
     result = []
     doc = Document(path)
     lo = {}
-    tables = doc.tables
-    for _table in tables[:]:
+    for _table in doc.tables[:]:
         for i, row in enumerate(_table.rows[:]):
             row_content = []
             for cell in row.cells[:]:
                 c = cell.text
-                # row_content.append(c)
                 if c not in row_content:
                 	row_content.append(c)
             lo[len(lo.keys())] = row_content
 
-    kwln = -1
-    kwline = None
+    kwln = -1# 关键词行长度
+    kwline = None# 关键词行
     for key in lo.keys():
-        # pdb.set_trace()
         for val in lo[key]:# 通过全关键词,判断此行是否为关键词行
             if val and ''.join(val.split()) not in keywords:# 有非关键字元素,非关键词行,判断是否为关键词行元素
-                # pdb.set_trace()
-                perc = 0
+                perc = 0# 行内关键词数量
                 for c in lo[key]:
-                    # pdb.set_trace()
-                    if c and (''.join(c.split()) in keywords):
+                    if c and (''.join(c.split()) in keywords):# 找到此行有关键词
                         perc += 1
-                    if c and (''.join(c.split()) in keywords) and (perc > len(lo[key])/3):# 非关键词行元素
-                        # print(c)
-                        # print(perc)
-                        # print(lo[key])
-                        perc = 0
-                        result.extend(parse_line(lo[key]))
+                    if c and (''.join(c.split()) in keywords) and (perc > len(lo[key])/3):# 关键词数量超过1/3,判断此行非关键词行元素
+                        perc = 0# 清空行内关键词数
+                        result.extend(parse_line(lo[key]))# 添加并解析普通行级元素
                         break
                 else:# 关键词行元素
                     schema = dict()
-                    for key, val in zip(kwline, lo[key]):
+                    for key, val in zip(kwline, lo[key]):# 合并关键词行和行元素
                         if key:
                             schema[key] = val
                     result.append(schema)
                     break
                 break
         else:
-            # print("{}\t\t此行为关键词行".format(lo[key]))
+            # print("{}:此行为关键词行!".format(lo[key]))
             try:
                 kwline = [''.join(cell.split()) for cell in lo[key]]
             except Exception as e:
@@ -147,20 +151,20 @@ def parse_layout(path):
 # 格式化数据
 def formatter(datalist):
     result = dict()
-
     for d in datalist:
-        if len(d) == 1:
+        if len(d) == 1:# 普通键值对
             for key in d.keys():
                 result[key] = d[key]
-        else:
+        else:# 行级元素
             for k in list(d.keys()):
-                if k == "".join(d[k].split()):
+                if k == "".join(d[k].split()):# 行名
                     d.pop(k)
-                    if result.get(k):
+                    if result.get(k):# 多行元素合并
                         result[k].append(d)
                     else:
                         result[k] = [d]
 
+    ### 时间格式化
     if result.get("出生年月"):
         dates = re.findall(r'\d+' , result["出生年月"])
         if len(dates) == 1:
@@ -168,7 +172,7 @@ def formatter(datalist):
         elif len(dates) == 2:
             result["出生年月"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
         elif len(dates) == 3:
-            result["出生年月"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
+            result["出生年月"] = "{:4d}-{:02d}-{:02d}".format(int(dates[0]), int(dates[1]), int(dates[2]))
 
     if result.get("任职时间"):
         dates = re.findall(r'\d+' , result["任职时间"])
@@ -177,7 +181,7 @@ def formatter(datalist):
         elif len(dates) == 2:
             result["任职时间"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
         elif len(dates) == 3:
-            result["任职时间"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
+            result["任职时间"] = "{:4d}-{:02d}-{:02d}".format(int(dates[0]), int(dates[1]), int(dates[2]))
 
     if result.get("参加工作时间"):
         dates = re.findall(r'\d+' , result["参加工作时间"])
@@ -186,7 +190,7 @@ def formatter(datalist):
         elif len(dates) == 2:
             result["参加工作时间"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
         elif len(dates) == 3:
-            result["参加工作时间"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
+            result["参加工作时间"] = "{:4d}-{:02d}-{:02d}".format(int(dates[0]), int(dates[1]), int(dates[2]))
 
     if result.get("最高学历毕业院校及毕业时间"):
         dates = re.findall(r'\d+' , result["最高学历毕业院校及毕业时间"])
@@ -262,30 +266,20 @@ def formatter(datalist):
                 dates = re.findall(r'\d+' , fam["出生年月"])
                 if len(dates) == 2:
                     result["主要家庭成员及社会关系"][idx]["出生年月"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
-    normal = {
-        "姓名":"name",
-        "性别":"gender",
-        "邮箱地址":"email",
-        "政治面貌(加入时间)":"politics",
-        "联系电话":"mobile",
-        "籍贯":"birthplace",
-        "出生年月":"birth_time",
-        "现任职务":"current_job",
-        "所在城市":"living_city",
-        "参加工作时间":"work_begin_time",
-        "意向岗位":"intent_job",
-        "熟悉专业有何专长":"skills",
-    }
-    edunormal = {
-        "学校":"school_name",
-        "专业":"major",
-        "学历":"degree",
-        "是否全日制":"degree_type",
-    }
+
+    # 转译数据库字段名
+    with open("./resources/translate.json", "r", encoding="utf-8") as ff:
+        json_obj = json.load(ff)
+
+    normal = json_obj["base"]
+    edunormal = json_obj["tal_his_edu"]
+    family = json_obj["tal_family_social_relations"]
+
     for key in normal.keys():
         if result.get(key):
             result[normal[key]] = result[key]
             result.pop(key)
+
     for idx in range(len(result['学习经历'])):
         result['学习经历'][idx]['start_time'] = result['学习经历'][idx]["起止时间"].split("~")[0]
         result['学习经历'][idx]['end_time'] = result['学习经历'][idx]["起止时间"].split("~")[-1]
@@ -293,17 +287,24 @@ def formatter(datalist):
             if result['学习经历'][idx].get(key):
                 result['学习经历'][idx][edunormal[key]] = result['学习经历'][idx][key]
                 result['学习经历'][idx].pop(key)
-    url = "http://192.168.1.110:9999/talent/getResumeData"
-    session = requests.Session()
-    session.mount('http://', HTTPAdapter(max_retries = 3))
-    try:
-        headers = {
-            'contentType':'Application/json'
-        }
-        response = session.post(url=url, headers=headers, json={"ResumeData":result}, timeout=10)
-        print(response.text)
-    except Exception as e:
-        print(e)
+
+    for idx in range(len(result['主要家庭成员及社会关系'])):
+        for key in family.keys():
+            if result['主要家庭成员及社会关系'][idx].get(key):
+                result['主要家庭成员及社会关系'][idx][family[key]] = result['主要家庭成员及社会关系'][idx][key]
+                result['主要家庭成员及社会关系'][idx].pop(key)
+
+    # url = "http://192.168.1.110:9999/talent/getResumeData"
+    # session = requests.Session()
+    # session.mount('http://', HTTPAdapter(max_retries = 3))
+    # try:
+    #     headers = {
+    #         'contentType':'Application/json'
+    #     }
+    #     response = session.post(url=url, headers=headers, json={"ResumeData":result}, timeout=10)
+    #     print(response.text)
+    # except Exception as e:
+    #     print(e)
     return result
 
 

+ 33 - 9
tools/irafa.py

@@ -2,11 +2,11 @@
 # @Author: privacy
 # @Date:   2022-07-07 13:12:17
 # @Last Modified by:   privacy
-# @Last Modified time: 2022-07-13 16:46:02
+# @Last Modified time: 2022-07-14 09:39:42
 
 # 内部人才市场简历模板
 from pprint import pprint
-
+import re
 import docx
 from docx import Document
 from docx.shared import Inches
@@ -75,6 +75,25 @@ def parse_layout(path):
             # print("此行为关键词行")
             kwline = [''.join(cell.split()) for cell in lo[key]]
             kwln = len(lo[key])
+
+    job = {"工作经历":"工作经历"}
+    flag = None
+    for p in doc.paragraphs:
+        text = p.text.replace(":", ":")
+        if ":" in text:
+            text = re.sub(r'(\w+)\W{0,2}:', r'\n\1:', text)
+            for line in text.split("\n"):
+                if line.strip():
+                    i = line.split(":")
+                    if job.get(i[0].strip()):
+                        result.append(job)
+                        job = {"工作经历":"工作经历"}
+                    job[i[0].strip()] = i[1].strip()
+                    flag = i[0].strip()
+        elif flag == "工作描述":
+            job["工作描述"] += '\n' + text.strip()
+    else:
+        result.append(job)
     return result
 
 
@@ -119,13 +138,18 @@ def formatter(datalist):
         if result.get(key):
             result[normal[key]] = result[key]
             result.pop(key)
-    # for idx in range(len(result['学习经历'])):
-    #     result['学习经历'][idx]['start_time'] = result['学习经历'][idx]["起止时间"].split("~")[0]
-    #     result['学习经历'][idx]['end_time'] = result['学习经历'][idx]["起止时间"].split("~")[-1]
-    #     for key in edunormal.keys():
-    #         if result['学习经历'][idx].get(key):
-    #             result['学习经历'][idx][edunormal[key]] = result['学习经历'][idx][key]
-    #             result['学习经历'][idx].pop(key)
+
+    edunormal = {
+        "学校/培训机构":"school_name",
+        "专业":"major",
+        "起始时间":"start_time",
+        "毕业时间":"end_time"
+    }
+    for idx in range(len(result['学习经历'])):
+        for key in edunormal.keys():
+            if result['学习经历'][idx].get(key):
+                result['学习经历'][idx][edunormal[key]] = result['学习经历'][idx][key]
+                result['学习经历'][idx].pop(key)
     return result
 
 if __name__ == "__main__":

+ 40 - 0
tools/logger.py

@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# @Author: privacy
+# @Date:   2022-07-14 13:26:15
+# @Last Modified by:   privacy
+# @Last Modified time: 2022-07-14 13:27:46
+import logging
+
+class Logger:
+    def __init__(self, name: str, console_handler_level: str = logging.INFO, fmt: str = '%(asctime)s: %(name)s: %(levelname)s: %(filename)s: %(lineno)d: %(funcName)s: %(message)s'):
+        self.logger = logging.getLogger(name)
+        self.logger.setLevel(logging.INFO)
+        self.fmt = logging.Formatter(fmt)
+        self.set_console_handler(console_handler_level)
+
+    def set_console_handler(self, console_handler_level: str = logging.INFO) -> None:
+        ch = logging.StreamHandler()
+        ch.setLevel(console_handler_level)
+        ch.setFormatter(self.fmt)
+        self.logger.addHandler(ch)
+
+    def set_file_handler(self, filename: str, mode: str = "a", file_handler_level: str = logging.WARNING) -> None:
+        fh = logging.FileHandler(filename, mode=mode, encoding='utf-8')
+        fh.setLevel(file_handler_level)
+        fh.setFormatter(self.fmt)
+        self.logger.addHandler(fh)
+
+    def debug(self, msg):
+        self.logger.debug(msg)
+
+    def info(self, msg):
+        self.logger.info(msg)
+
+    def warning(self, msg):
+        self.logger.warning(msg)
+
+    def error(self, msg):
+        self.logger.error(msg)
+
+    def critical(self, msg):
+        self.logger.critical(msg)

+ 85 - 0
tools/resources/translate.json

@@ -0,0 +1,85 @@
+{
+    "base":{
+        "姓名":"name",
+        "性别":"gender",
+        "出生年月":"birth_time",
+        "出生日期":"birth_time",
+        "民族":"national",
+        "籍贯":"birthplace",
+        "户籍地":"household_register_address",
+        "参加工作时间":"work_begin_time",
+        "联系电话":"mobile",
+        "手机号码":"mobile",
+        "邮箱地址":"email",
+        "现任职务":"current_job",
+        "提职时间":"promotion_time",
+        "所在城市":"living_city",
+        "意向城市":"意向城市",
+        "意向岗位":"intent_job",
+        "期望职业":"intent_job",
+        "目前年薪":"current_salary_yearl",
+        "政治面貌(加入时间)":"politics",
+        "政治面貌":"politics",
+        "熟悉专业有何专长":"skills"
+    },
+    "tal_his_edu":{
+        "开始时间":"start_time",
+        "毕业时间":"end_time",
+        "学校":"school_name",
+        "专业":"major",
+        "学历":"degree",
+        "学位":"degree_in",
+        "研究方向":"research_direction",
+        "是否全日制":"is_full_time"
+    },
+    "tal_his_job":{
+        "工作单位":"company_name",
+        "职位":"job_name",
+        "开始时间":"start_time",
+        "结束时间":"end_time",
+        "工作描述":"job_desc"
+    },
+    "tal_his_project":{
+        "项目名":"project_name",
+        "公司名":"company_name",
+        "职位":"project_office",
+        "开始时间":"start_time",
+        "结束时间":"end_time",
+        "项目职责":"project_duty",
+        "业绩":"project_performance"
+    },
+    "tal_language":{
+        "语言":"lan_name",
+        "熟练度":"proficiency"
+    },
+    "tal_vocational_qualification_certificate":{
+        "证书名称":"vocational_qualification_certificate_name",
+        "证书":"vocational_qualification_certificate_name",
+        "获得时间":"vocational_certificate_obtaining_time"
+    },
+    "tal_professional_tech_certificate":{
+        "技术资格证明":"professional_tech_certificate_name",
+        "获得时间":"professional_certificate_obtaining_time"
+    },
+    "tal_training_institutions":{
+        "学校/培训机构":"school_training_institutions",
+        "专业":"major",
+        "开始时间":"start_time",
+        "结束时间":"end_time"
+    },
+    "tal_rewards_punishments":{
+        "项目名称":"name",
+        "项目单位":"rewards_punishments_unit",
+        "时间":"rewards_punishments_time"
+    },
+    "tal_family_social_relations":{
+        "称谓":"appellation",
+        "姓名":"name",
+        "出生年月":"birth_time",
+        "政治面貌":"politics",
+        "工作单位":"work_units",
+        "职务":"position",
+        "工作单位及职务":"position"
+    },
+    "其他":"intro"
+}

+ 105 - 113
tools/resume_parse.py

@@ -8,6 +8,7 @@ import sys
 import re
 import json
 import time
+import platform
 from os import walk
 import subprocess
 import rarfile
@@ -33,62 +34,50 @@ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
 import pdfplumber
 from paddlenlp import Taskflow
 
-class Logger:
-    def __init__(self, name: str, console_handler_level: str = logging.INFO, fmt: str = '%(asctime)s: %(name)s: %(levelname)s: %(filename)s: %(funcName)s: %(message)s'):
-        self.logger = logging.getLogger(name)
-        self.logger.setLevel(logging.INFO)
-        self.fmt = logging.Formatter(fmt)
-        self.set_console_handler(console_handler_level)
 
-    def set_console_handler(self, console_handler_level: str = logging.INFO) -> None:
-        ch = logging.StreamHandler()
-        ch.setLevel(console_handler_level)
-        ch.setFormatter(self.fmt)
-        self.logger.addHandler(ch)
+from logger import Logger
+logger = Logger("resume_parse")
+logger.set_file_handler(filename='journal.log')
 
-    def set_file_handler(self, filename: str, mode: str = "a", file_handler_level: str = logging.WARNING) -> None:
-        fh = logging.FileHandler(filename, mode=mode, encoding='utf-8')
-        fh.setLevel(file_handler_level)
-        fh.setFormatter(self.fmt)
-        self.logger.addHandler(fh)
 
-    def debug(self, msg):
-        self.logger.debug(msg)
+from rich.console import Console
+console = Console()
 
-    def info(self, msg):
-        self.logger.info(msg)
 
-    def warning(self, msg):
-        self.logger.warning(msg)
+global ner, ner_tag, base_info_ie, prize_ie, cet_ie, pro_ie, block, block_rev
 
-    def error(self, msg):
-        self.logger.error(msg)
 
-    def critical(self, msg):
-        self.logger.critical(msg)
+if not locals().get("ner"):
+    ner = Taskflow("ner", mode='fast')
+if not locals().get("ner_tag"):
+    ner_tag = Taskflow("ner")
+if not locals().get("base_info_ie"):
+    base_info_ie = Taskflow('information_extraction', schema=["姓名","性别","婚姻状况","邮箱地址","政治面貌","手机号码","籍贯","出生日期","现任职务","参加工作时间","英语水平","计算机水平","工作年限","当前单位","所在城市","职业资格"])
+if not locals().get("prize_ie"):
+    prize_ie = Taskflow('information_extraction', schema=["时间", "奖项"])
+if not locals().get("cet_ie"):
+    cet_ie = Taskflow('information_extraction', schema=["时间","证书"])
+if not locals().get("pro_ie"):
+    pro_ie = Taskflow("information_extraction", schema=["时间","项目名称","机构","职位"], task_path='./model_100')
 
-logger = Logger("resume_parse")
-logger.set_file_handler(filename='data.log')
+if not locals().get("block"):
+    with open("resources/SegmentName.json", "r", encoding="utf-8") as fp:
+        block = json.load(fp)
 
-from rich.console import Console
-console = Console()
+if not locals().get("block_rev"):
+    block_rev = {1:"基本信息", 2:"求职意向", 3:"教育经历", 4:"工作经历", 5:"项目经历", 6:"专业技能", 7:"intro", 8:"兴趣爱好", 9:"语言能力", 10:"证书", 11:"获奖情况", 12:"培训经历", 13:"家庭成员", "other":"其他"}
 
 import uvicorn
 from fastapi import BackgroundTasks, FastAPI, File, UploadFile
 app = FastAPI()
 
-ner = Taskflow("ner", mode='fast')
-ner_tag = Taskflow("ner")
-base_info_ie = Taskflow('information_extraction', schema=["姓名","性别","婚姻状况","电子邮箱","政治面貌","手机号码","籍贯","出生日期","现任职务","参加工作时间","英语水平","计算机水平","工作年限","当前单位","所在城市","职业资格"])
-prize_ie = Taskflow('information_extraction', schema=["时间", "奖项"])
-cet_ie = Taskflow('information_extraction', schema=["时间","证书"])
-pro_ie = Taskflow("information_extraction", schema=["时间","项目名称","机构","职位"], task_path='./model_100')
-global block, block_rev
-
-with open("resources/SegmentName.json", "r", encoding="utf-8") as fp:
-    block = json.load(fp)
-block_rev = {1:"基本信息", 2:"求职意向", 3:"教育经历", 4:"工作经历", 5:"项目经历", 6:"专业技能", 7:"intro", 8:"兴趣爱好", 9:"语言能力", 10:"证书", 11:"获奖情况", 12:"培训经历", 13:"家庭成员", "other":"其他"}
 
+if not os.path.exists("./uploads"):
+    os.mkdir("./uploads")
+if not os.path.exists("./pdf"):
+    os.mkdir("./pdf")
+if not os.path.exists("./cache"):
+    os.mkdir("./cache")
 
 
 # 基本信息(旧版)
@@ -165,22 +154,6 @@ def get_base_info(lines):
             rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
         elif len(dates) == 3:
             rst["参加工作时间"][0]["text"] = "{:4d}-{:02d}-01".format(int(dates[0]), int(dates[1]))
-    normal = {
-        "姓名":"name",
-        "性别":"gender",
-        "电子邮箱":"email",
-        "政治面貌":"politics",
-        "手机号码":"mobile",
-        "籍贯":"birthplace",
-        "出生日期":"birth_time",
-        "现任职务":"current_job",
-        "所在城市":"living_city",
-        "参加工作时间":"work_begin_time",
-    }
-    for key in normal.keys():
-        if rst.get(key):
-            rst[normal[key]] = rst[key]
-            del rst[key]
     return {key:rst[key][0]["text"] for key in rst.keys()}
 
 
@@ -961,19 +934,19 @@ def get_lag_list(lines):
     lan_list = []
     re_lan = re.compile(r'(\w+[语话])')
     re_lev = re.compile(r'([公共级四专八]+)')
-    lag_dict = {'lan_name':'', 'level':""}
+    lag_dict = {'语言':'', '熟练度':""}
     for l in lines:
         if not l.strip():
             continue
         lan_name = re.search(re_lan, l)
         lag_lev = re.search(re_lev, l)
         if lag_lev and lag_lev.group(1):
-            lag_dict["level"] = lag_lev.group(1)
+            lag_dict["熟练度"] = lag_lev.group(1)
         if lan_name and lan_name.group(1):
-            if lag_dict["lan_name"]:
+            if lag_dict["语言"]:
                 lan_list.append(lag_dict)
-                lag_dict = {'lan_name':'', 'level':""}
-            lag_dict['lan_name'] = lan_name.group(1)
+                lag_dict = {'语言':'', '熟练度':""}
+            lag_dict['语言'] = lan_name.group(1)
     return lan_list
 
 
@@ -1151,7 +1124,7 @@ def parse_txt(path, save_dir):
     page = {1: []}
     if len(data.split("\n")) <= 2:
         for line in data.split("\n"):
-            line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").strip()
+            line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").replace("·","").strip()
             for word in line.split():
                 if word in block.keys():
                     chun = block[word]
@@ -1160,7 +1133,7 @@ def parse_txt(path, save_dir):
                     page[chun].append(word)
     else:
         for line in data.split("\n"):
-            line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历")
+            line = line.replace("\xa0", "").replace("【","").replace("】","").replace("教育/培训","教育经历").replace("·","")
             regex = re.compile(u'[\u3000]+',re.UNICODE)
             line = regex.sub('', line.strip())
             if line in block.keys():
@@ -1169,14 +1142,14 @@ def parse_txt(path, save_dir):
             elif line:
                 page[chun].append(line)
 
-    result_data = []
+    result_data = dict()
     for key in page.keys():
         for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
             if key == index:
-                result_data.append({block_rev[index]:func(page[index])})
+                result_data[block_rev[index]] = func(page[index])
     filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
     with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
-            json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
+            json.dump(result_data, fp, indent=4, ensure_ascii=False)
 
 
 # 纯文本 word 解析
@@ -1198,14 +1171,14 @@ def read_from_word(doc, path, save_dir):
         elif line:
             page[chun].append(line)
 
-    result_data = []
+    result_data = dict()
     for key in page.keys():
         for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
             if key == index:
-                result_data.append({block_rev[index]:func(page[index])})
+                result_data[block_rev[index]] = func(page[index])
     filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
     with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
-            json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
+            json.dump(result_data, fp, indent=4, ensure_ascii=False)
 
 
 # 提取 word 表格(已完成)
@@ -1267,14 +1240,14 @@ def check_word(path, save_dir):
                 line = line.replace(k+"\n", k+":")
             page[chun].extend(line.split())
 
-    result_data = []
+    result_data = dict()
     for key in page.keys():
         for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
             if key == index:
-                result_data.append({block_rev[index]:func(page[index])})
+                result_data[block_rev[index]] = func(page[index])
     filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
     with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
-            json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
+            json.dump(result_data, fp, indent=4, ensure_ascii=False)
 
 
 # pdf 解析句子(已完成)
@@ -1340,15 +1313,15 @@ def read_from_pdf(path, save_dir):
                     result[key].extend(r[key])
                 else:
                     result[key] = r[key]
-        result_data = []
+        result_data = dict()
         for key in result.keys():
             for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
                 if key == index:
-                    result_data.append({block_rev[index]:func(result[index])})
+                    result_data[block_rev[index]] = func(result[index])
 
         filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
         with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
-            json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
+            json.dump(result_data, fp, indent=4, ensure_ascii=False)
 
 
 # pdf 表格解析 (已完成)
@@ -1395,14 +1368,14 @@ def parse_table_from_pdf(path, save_dir):
             line = line.replace(k+"\n", k+":")
         page[chun].extend(line.split())
 
-    result_data = []
+    result_data = dict()
     for key in page.keys():
         for index, func in zip([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [get_base_info, get_job_intention, get_edu_list, get_job_list, get_pro_list, get_other_list, get_other_list, get_other_list, get_lag_list, get_cet_list, get_prize_list, get_cultivate_list]):
             if key == index:
-                result_data.append({block_rev[index]:func(page[index])})
+                result_data[block_rev[index]] = func(page[index])
     filename = os.path.splitext(os.path.split(path)[-1])[0]+'.json'
     with open(os.path.join(save_dir, filename), 'w', encoding="utf-8") as fp:
-            json.dump({"result":result_data}, fp, indent=4, ensure_ascii=False)
+            json.dump(result_data, fp, indent=4, ensure_ascii=False)
 
 
 # 检测 pdf 格式 (已完成)
@@ -1437,6 +1410,54 @@ def decode_path(path):
     return path_name
 
 
+# 结果返回
+def push_back(tempdir):
+    for file in os.listdir(tempdir):
+        filename = os.path.join(tempdir, file)
+        with open(filename, "r", encoding="utf-8") as ff:
+            rst = json.load(ff)
+
+        with open("./resources/translate.json", "r", encoding="utf-8") as ft:
+            json_obj = json.load(ft)
+
+        for key in json_obj["base"].keys():
+            if rst["result"].get("基本信息"):
+                if rst["result"]["基本信息"].get(key):
+                    rst["result"]["基本信息"][json_obj["base"][key]] = rst["result"]["基本信息"][key]
+                    del rst["result"]["基本信息"][key]
+            if rst["result"].get("求职意向"):
+                if rst["result"]["求职意向"].get(key):
+                    rst["result"]["求职意向"][json_obj["base"][key]] = rst["result"]["求职意向"][key]
+                    del rst["result"]["求职意向"][key]
+        
+        for key in json_obj["tal_vocational_qualification_certificate"].keys():
+            if rst["result"].get("证书"):
+                for idx in range(len(rst["result"]["证书"])):
+                    if rst["result"]["证书"][idx].get(key):
+                        rst["result"]["证书"][idx][json_obj["tal_vocational_qualification_certificate"][key]] = rst["result"]["证书"][idx][key]
+                        del rst["result"]["证书"][idx][key]
+        
+        for key in json_obj["tal_language"].keys():
+            if rst["result"].get("语言能力"):
+                for idx in range(len(rst["result"]["语言能力"])):
+                    if rst["result"]["语言能力"][idx].get(key):
+                        rst["result"]["语言能力"][idx][json_obj["tal_language"][key]] = rst["result"]["语言能力"][idx][key]
+                        del rst["result"]["语言能力"][idx][key]
+
+        # url = "http://192.168.1.110:9999/talent/getResumeData"
+        # session = requests.Session()
+        # session.mount('http://', HTTPAdapter(max_retries = 3))
+        # try:
+        #     headers = {
+        #         'contentType':'Application/json'
+        #     }
+        #     response = session.post(url=url, headers=headers, json={"ResumeData":rst}, timeout=10)
+        #     print(response.text)
+        # except Exception as e:
+        #     print(e)
+        console.print(rst, style="red", justify="left")
+
+
 # 检测传入格式(已完成)
 def detection_type(path, system):
     tempdir = time.strftime("%Y_%m_%dT%H_%M_%S")
@@ -1528,23 +1549,9 @@ def detection_type(path, system):
             # 传入为 txt
             elif os.path.isfile(filename) and filename.endswith('.txt'):
                 parse_txt(filename, save_dir=tempdir)
-    # 结果返回
-    for file in os.listdir(tempdir):
-        filename = os.path.join(tempdir, file)
-        with open(filename, "r", encoding="utf-8") as ff:
-            rst = json.load(ff)
-        url = "http://192.168.1.110:9999/talent/getResumeData"
-        session = requests.Session()
-        session.mount('http://', HTTPAdapter(max_retries = 3))
-        try:
-            headers = {
-                'contentType':'Application/json'
-            }
-            response = session.post(url=url, headers=headers, json={"ResumeData":rst}, timeout=10)
-            print(response.text)
-        except Exception as e:
-            print(e)
-        console.print(rst, style="red", justify="left")
+
+        push_back(tempdir)
+
 
 
 @app.post("/resume_parse")
@@ -1556,24 +1563,9 @@ async def file_upload(background_tasks: BackgroundTasks, file: UploadFile = File
     res = await file.read()
     with open('./uploads/' + file.filename, "wb") as f:
         f.write(res)
-    background_tasks.add_task(detection_type, './uploads/' + file.filename, system)
+    background_tasks.add_task(detection_type, './uploads/' + file.filename, platform.system())
     return {"errno": 0, "msg": "Upload Success"}
 
 
 if __name__ == '__main__':
-    import platform
-    system = platform.system()
-    if (system == "Windows"):
-        logger.info("Windows")
-    elif (system == "Linux"):
-        logger.info("Linux")
-    else:
-        logger.error("Unnot support this system")
-    if not os.path.exists("./uploads"):
-        os.mkdir("./uploads")
-    if not os.path.exists("./pdf"):
-        os.mkdir("./pdf")
-    if not os.path.exists("./cache"):
-        os.mkdir("./cache")
- 
-    uvicorn.run(app=app, host="0.0.0.0", port=8320)
+    uvicorn.run(app="resume_parse:app", host="0.0.0.0", port=8320, reload=True, log_level="info")