|
@@ -897,7 +897,7 @@ def doc2pdf_linux(docPath, pdfPath):
|
|
需要在linux中下载好libreoffice
|
|
需要在linux中下载好libreoffice
|
|
"""
|
|
"""
|
|
# 注意cmd中的libreoffice要和linux中安装的一致
|
|
# 注意cmd中的libreoffice要和linux中安装的一致
|
|
- cmd = 'libreoffice --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
|
|
|
|
|
|
+ cmd = 'libreoffice6.3 --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
|
|
# cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [docPath]
|
|
# cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [docPath]
|
|
p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
|
p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
|
p.wait(timeout=30) # 停顿30秒等待转化
|
|
p.wait(timeout=30) # 停顿30秒等待转化
|
|
@@ -1193,12 +1193,12 @@ def detection_type(path, system):
|
|
doc2pdf(docPath = filename, pdfPath = './pdf', system=system)
|
|
doc2pdf(docPath = filename, pdfPath = './pdf', system=system)
|
|
newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
|
|
newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
|
|
if os.path.exists(newfile):
|
|
if os.path.exists(newfile):
|
|
- rst = check_pdf(filename)
|
|
|
|
|
|
+ rst = check_pdf(newfile)
|
|
if "Table" in rst:
|
|
if "Table" in rst:
|
|
- parse_table_from_pdf(filename)
|
|
|
|
|
|
+ parse_table_from_pdf(newfile)
|
|
pass
|
|
pass
|
|
if "Word" in rst:
|
|
if "Word" in rst:
|
|
- read_from_pdf(filename)
|
|
|
|
|
|
+ read_from_pdf(newfile)
|
|
# 传入为 docx
|
|
# 传入为 docx
|
|
elif os.path.isfile(filename) and filename.endswith('.docx'):
|
|
elif os.path.isfile(filename) and filename.endswith('.docx'):
|
|
check_word(filename)
|
|
check_word(filename)
|
|
@@ -1216,14 +1216,14 @@ def detection_type(path, system):
|
|
# 传入为 doc
|
|
# 传入为 doc
|
|
elif os.path.isfile(path) and path.endswith('.doc'):
|
|
elif os.path.isfile(path) and path.endswith('.doc'):
|
|
doc2pdf(docPath = path, pdfPath = './pdf', system=system)
|
|
doc2pdf(docPath = path, pdfPath = './pdf', system=system)
|
|
- newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
|
|
|
|
|
|
+ newfile = './pdf/' + os.path.splitext(os.path.split(path)[-1])[0] + '.pdf'
|
|
if os.path.exists(newfile):
|
|
if os.path.exists(newfile):
|
|
- rst = check_pdf(filename)
|
|
|
|
|
|
+ rst = check_pdf(newfile)
|
|
if "Table" in rst:
|
|
if "Table" in rst:
|
|
- parse_table_from_pdf(filename)
|
|
|
|
|
|
+ parse_table_from_pdf(newfile)
|
|
pass
|
|
pass
|
|
if "Word" in rst:
|
|
if "Word" in rst:
|
|
- read_from_pdf(filename)
|
|
|
|
|
|
+ read_from_pdf(newfile)
|
|
# 传入为 docx
|
|
# 传入为 docx
|
|
elif os.path.isfile(path) and path.endswith('.docx'):
|
|
elif os.path.isfile(path) and path.endswith('.docx'):
|
|
check_word(path)
|
|
check_word(path)
|