|
@@ -1190,7 +1190,15 @@ def detection_type(path, system):
|
|
# 传入为 doc
|
|
# 传入为 doc
|
|
logging.info(filename)
|
|
logging.info(filename)
|
|
if filename.endswith('.doc') and not filename.startswith('.~'):
|
|
if filename.endswith('.doc') and not filename.startswith('.~'):
|
|
- doc2pdf(docPath = filename, pdfPath = './', system=system)
|
|
|
|
|
|
+ doc2pdf(docPath = filename, pdfPath = './pdf', system=system)
|
|
|
|
+ newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
|
|
|
|
+ if os.path.exists(newfile):
|
|
|
|
+ rst = check_pdf(filename)
|
|
|
|
+ if "Table" in rst:
|
|
|
|
+ parse_table_from_pdf(filename)
|
|
|
|
+ pass
|
|
|
|
+ if "Word" in rst:
|
|
|
|
+ read_from_pdf(filename)
|
|
# 传入为 docx
|
|
# 传入为 docx
|
|
elif os.path.isfile(filename) and filename.endswith('.docx'):
|
|
elif os.path.isfile(filename) and filename.endswith('.docx'):
|
|
check_word(filename)
|
|
check_word(filename)
|
|
@@ -1207,7 +1215,15 @@ def detection_type(path, system):
|
|
parse_txt(filename)
|
|
parse_txt(filename)
|
|
# 传入为 doc
|
|
# 传入为 doc
|
|
elif os.path.isfile(path) and path.endswith('.doc'):
|
|
elif os.path.isfile(path) and path.endswith('.doc'):
|
|
- doc2pdf(docPath = path, pdfPath = './', system=system)
|
|
|
|
|
|
+ doc2pdf(docPath = path, pdfPath = './pdf', system=system)
|
|
|
|
+ newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
|
|
|
|
+ if os.path.exists(newfile):
|
|
|
|
+ rst = check_pdf(filename)
|
|
|
|
+ if "Table" in rst:
|
|
|
|
+ parse_table_from_pdf(filename)
|
|
|
|
+ pass
|
|
|
|
+ if "Word" in rst:
|
|
|
|
+ read_from_pdf(filename)
|
|
# 传入为 docx
|
|
# 传入为 docx
|
|
elif os.path.isfile(path) and path.endswith('.docx'):
|
|
elif os.path.isfile(path) and path.endswith('.docx'):
|
|
check_word(path)
|
|
check_word(path)
|