1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- #!/usr/bin/python
- # -*- coding=utf-8 -*-
- # @Create Time: 2024-08-22 14:50:03
- # @Last Modified time: 2024-12-02 16:58:14
- import logging
- import subprocess
- from win32com.client import Dispatch, constants, gencache
- def doc2pdf(src: str, dst: str):
- if os.path.exists(dst):
- return True
- word = Dispatch('Word.Application')
- # word = gencache.EnsureDispatch('Word.Application')
- try:
- # 打开文件
- doc = word.Documents.Open(src, ReadOnly=1)
- # 转换文件
- # doc.ExportAsFixedFormat(dst,
- # constants.wdExportFormatPDF,
- # Item=constants.wdExportDocumentWithMarkup,
- # CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
- doc.SaveAs(f'{dst}.pdf', 17)
- doc.Close()
- logging.info('转换成功')
- except Exception:
- doc.Close()
- logging.exception("转换失败")
- finally:
- word.Quit(constants.wdDoNotSaveChanges)
- def doc2pdf_linux(src: str, dst: str):
- """
- 允许的文档格式:doc,docx
- 需要在linux中下载好libreoffice
- """
- # 注意cmd中的libreoffice要和linux中安装的一致
- cmd = 'libreoffice --headless --convert-to pdf'.split() + [src] + ['--outdir'] + [dst]
- # cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [src]
- p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- p.wait(timeout=30) # 停顿30秒等待转化
- stdout, stderr = p.communicate()
- if stderr:
- raise subprocess.SubprocessError(stderr)
- if __name__ == '__main__':
- import os
- from glob import glob
- # for wordpath in glob('./data/0预审查初审详审测试数据/*/*.doc*'):
- for wordpath in glob('./工程量清单(含采购文件和技术规范)/*/*/*.doc*'):
- wordpath = os.path.abspath(wordpath)
- filename = ''.join(wordpath.split('\\')[-1].split('.')[:-1])
- pdfpath = os.path.join('\\'.join(wordpath.split('\\')[:-2]), filename)
- pdfpath = os.path.abspath(pdfpath)
- print(wordpath)
- print(pdfpath)
- doc2pdf(wordpath, pdfpath)
|