#!/usr/bin/python # -*- coding=utf-8 -*- # @Create Time: 2024-08-22 14:50:03 # @Last Modified time: 2024-12-02 16:58:14 import logging import subprocess from win32com.client import Dispatch, constants, gencache def doc2pdf(src: str, dst: str): if os.path.exists(dst): return True word = Dispatch('Word.Application') # word = gencache.EnsureDispatch('Word.Application') try: # 打开文件 doc = word.Documents.Open(src, ReadOnly=1) # 转换文件 # doc.ExportAsFixedFormat(dst, # constants.wdExportFormatPDF, # Item=constants.wdExportDocumentWithMarkup, # CreateBookmarks=constants.wdExportCreateHeadingBookmarks) doc.SaveAs(f'{dst}.pdf', 17) doc.Close() logging.info('转换成功') except Exception: doc.Close() logging.exception("转换失败") finally: word.Quit(constants.wdDoNotSaveChanges) def doc2pdf_linux(src: str, dst: str): """ 允许的文档格式:doc,docx 需要在linux中下载好libreoffice """ # 注意cmd中的libreoffice要和linux中安装的一致 cmd = 'libreoffice --headless --convert-to pdf'.split() + [src] + ['--outdir'] + [dst] # cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [src] p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) p.wait(timeout=30) # 停顿30秒等待转化 stdout, stderr = p.communicate() if stderr: raise subprocess.SubprocessError(stderr) if __name__ == '__main__': import os from glob import glob # for wordpath in glob('./data/0预审查初审详审测试数据/*/*.doc*'): for wordpath in glob('./工程量清单(含采购文件和技术规范)/*/*/*.doc*'): wordpath = os.path.abspath(wordpath) filename = ''.join(wordpath.split('\\')[-1].split('.')[:-1]) pdfpath = os.path.join('\\'.join(wordpath.split('\\')[:-2]), filename) pdfpath = os.path.abspath(pdfpath) print(wordpath) print(pdfpath) doc2pdf(wordpath, pdfpath)