|
@@ -2,12 +2,16 @@
|
|
|
# @Author: privacy
|
|
|
# @Date: 2022-07-07 13:12:17
|
|
|
# @Last Modified by: privacy
|
|
|
-# @Last Modified time: 2022-07-12 18:02:21
|
|
|
+# @Last Modified time: 2022-07-13 16:46:02
|
|
|
|
|
|
+# 内部人才市场简历模板
|
|
|
+from pprint import pprint
|
|
|
|
|
|
+import docx
|
|
|
from docx import Document
|
|
|
from docx.shared import Inches
|
|
|
|
|
|
+
|
|
|
path = "d:\\desktop\\内部人才市场简历模板.docx"
|
|
|
|
|
|
keywords = ["姓名", "性别", "出生日期", "民族", "籍贯", "健康状况", "政治面貌", "参加工作时间", "外语水平", "专业技术资格(取得时间)", "计算机水平", "熟悉专业有何专长", "工作单位", "现任职务", "任职时间", "联系电话", "对报名岗位认识及工作设想", "意向地区", "意向岗位", "意向单位", "意向专业", "职业证书", "资格等级", "取得日期", "学校/培训机构", "专业", "起始时间", "毕业时间", "姓名", "职业", "与本人关系"]
|
|
@@ -24,38 +28,106 @@ def parse_line(line):
|
|
|
key = None
|
|
|
return result
|
|
|
|
|
|
-doc = Document(path)
|
|
|
-lo = {}
|
|
|
-tables = doc.tables
|
|
|
-for _table in tables[:]:
|
|
|
- for i, row in enumerate(_table.rows[:]):
|
|
|
- row_content = []
|
|
|
- for cell in row.cells[:]:
|
|
|
- c = cell.text
|
|
|
- row_content.append(c)
|
|
|
- lo[len(lo.keys())] = row_content
|
|
|
-
|
|
|
-kwln = -1
|
|
|
-kwline = None
|
|
|
-for key in lo.keys():
|
|
|
- # pdb.set_trace()
|
|
|
- for val in lo[key]:# 通过全关键词,判断此行是否为关键词行
|
|
|
- if val and ''.join(val.split()) not in keywords:# 有非关键字元素,非关键词行,判断是否为关键词行元素
|
|
|
- # pdb.set_trace()
|
|
|
- for c in lo[key]:
|
|
|
+
|
|
|
+def parse_layout(path):
|
|
|
+ result = []
|
|
|
+ doc = Document(path)
|
|
|
+
|
|
|
+ lo = {}
|
|
|
+ tables = doc.tables
|
|
|
+ for _table in tables[:]:
|
|
|
+ for i, row in enumerate(_table.rows[:]):
|
|
|
+ row_content = []
|
|
|
+ for cell in row.cells[:]:
|
|
|
+ c = cell.text
|
|
|
+ row_content.append(c)
|
|
|
+ lo[len(lo.keys())] = row_content
|
|
|
+
|
|
|
+ kwln = -1
|
|
|
+ kwline = None
|
|
|
+ for key in lo.keys():
|
|
|
+ # pdb.set_trace()
|
|
|
+ for val in lo[key]:# 通过全关键词,判断此行是否为关键词行
|
|
|
+ if val and ''.join(val.split()) not in keywords:# 有非关键字元素,非关键词行,判断是否为关键词行元素
|
|
|
# pdb.set_trace()
|
|
|
- if c and ''.join(c.split()) in keywords:# 非关键词行元素
|
|
|
- print(parse_line(lo[key]))
|
|
|
+ for c in lo[key]:
|
|
|
+ # pdb.set_trace()
|
|
|
+ if c and ''.join(c.split()) in keywords:# 非关键词行元素
|
|
|
+ result.extend(parse_line(lo[key]))
|
|
|
+ break
|
|
|
+ else:# 关键词行元素
|
|
|
+ schema = dict()
|
|
|
+ for key, val in zip(kwline, lo[key]):
|
|
|
+ if key:
|
|
|
+ schema[key] = val
|
|
|
+ if "学校/培训机构" in schema.keys():
|
|
|
+ schema["学习经历"] = "学习经历"
|
|
|
+ elif "与本人关系" in schema.keys():
|
|
|
+ schema["家庭成员"] = "家庭成员"
|
|
|
+ elif "意向地区" in schema.keys():
|
|
|
+ schema["职业发展管理"] = "职业发展管理"
|
|
|
+ elif "职业证书" in schema.keys():
|
|
|
+ schema["职业资格证书"] = "职业资格证书"
|
|
|
+ result.append(schema)
|
|
|
break
|
|
|
- else:# 关键词行元素
|
|
|
- schema = dict()
|
|
|
- for key, val in zip(kwline, lo[key]):
|
|
|
- if key:
|
|
|
- schema[key] = val
|
|
|
- print(schema)
|
|
|
break
|
|
|
- break
|
|
|
- else:
|
|
|
- # print("此行为关键词行")
|
|
|
- kwline = [''.join(cell.split()) for cell in lo[key]]
|
|
|
- kwln = len(lo[key])
|
|
|
+ else:
|
|
|
+ # print("此行为关键词行")
|
|
|
+ kwline = [''.join(cell.split()) for cell in lo[key]]
|
|
|
+ kwln = len(lo[key])
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+# 格式化数据
|
|
|
+def formatter(datalist):
|
|
|
+ result = dict()
|
|
|
+
|
|
|
+ for d in datalist:
|
|
|
+ if len(d) == 1:
|
|
|
+ for key in d.keys():
|
|
|
+ result[key] = d[key]
|
|
|
+ else:
|
|
|
+ for k in list(d.keys()):
|
|
|
+ if k == "".join(d[k].split()):
|
|
|
+ d.pop(k)
|
|
|
+ if result.get(k):
|
|
|
+ result[k].append(d)
|
|
|
+ else:
|
|
|
+ result[k] = [d]
|
|
|
+
|
|
|
+ normal = {
|
|
|
+ "姓名":"name",
|
|
|
+ "性别":"gender",
|
|
|
+ "邮箱地址":"email",
|
|
|
+ "政治面貌":"politics",
|
|
|
+ "联系电话":"mobile",
|
|
|
+ "籍贯":"birthplace",
|
|
|
+ "出生日期":"birth_time",
|
|
|
+ "现任职务":"current_job",
|
|
|
+ "所在城市":"living_city",
|
|
|
+ "参加工作时间":"work_begin_time",
|
|
|
+ "意向岗位":"intent_job",
|
|
|
+ "熟悉专业有何专长":"skills",
|
|
|
+ }
|
|
|
+ edunormal = {
|
|
|
+ "学校":"school_name",
|
|
|
+ "专业":"major",
|
|
|
+ "学历":"degree",
|
|
|
+ "是否全日制":"degree_type",
|
|
|
+ }
|
|
|
+ for key in normal.keys():
|
|
|
+ if result.get(key):
|
|
|
+ result[normal[key]] = result[key]
|
|
|
+ result.pop(key)
|
|
|
+ # for idx in range(len(result['学习经历'])):
|
|
|
+ # result['学习经历'][idx]['start_time'] = result['学习经历'][idx]["起止时间"].split("~")[0]
|
|
|
+ # result['学习经历'][idx]['end_time'] = result['学习经历'][idx]["起止时间"].split("~")[-1]
|
|
|
+ # for key in edunormal.keys():
|
|
|
+ # if result['学习经历'][idx].get(key):
|
|
|
+ # result['学习经历'][idx][edunormal[key]] = result['学习经历'][idx][key]
|
|
|
+ # result['学习经历'][idx].pop(key)
|
|
|
+ return result
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ pprint(formatter(parse_layout(path)))
|
|
|
+
|