doc2pdf.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. #!/usr/bin/python
  2. # -*- coding=utf-8 -*-
  3. # @Create Time: 2024-08-22 14:50:03
  4. # @Last Modified time: 2024-12-02 16:58:14
  5. import logging
  6. import subprocess
  7. from win32com.client import Dispatch, constants, gencache
  8. def doc2pdf(src: str, dst: str):
  9. if os.path.exists(dst):
  10. return True
  11. word = Dispatch('Word.Application')
  12. # word = gencache.EnsureDispatch('Word.Application')
  13. try:
  14. # 打开文件
  15. doc = word.Documents.Open(src, ReadOnly=1)
  16. # 转换文件
  17. # doc.ExportAsFixedFormat(dst,
  18. # constants.wdExportFormatPDF,
  19. # Item=constants.wdExportDocumentWithMarkup,
  20. # CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
  21. doc.SaveAs(f'{dst}.pdf', 17)
  22. doc.Close()
  23. logging.info('转换成功')
  24. except Exception:
  25. doc.Close()
  26. logging.exception("转换失败")
  27. finally:
  28. word.Quit(constants.wdDoNotSaveChanges)
  29. def doc2pdf_linux(src: str, dst: str):
  30. """
  31. 允许的文档格式:doc,docx
  32. 需要在linux中下载好libreoffice
  33. """
  34. # 注意cmd中的libreoffice要和linux中安装的一致
  35. cmd = 'libreoffice --headless --convert-to pdf'.split() + [src] + ['--outdir'] + [dst]
  36. # cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [src]
  37. p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
  38. p.wait(timeout=30) # 停顿30秒等待转化
  39. stdout, stderr = p.communicate()
  40. if stderr:
  41. raise subprocess.SubprocessError(stderr)
  42. if __name__ == '__main__':
  43. import os
  44. from glob import glob
  45. # for wordpath in glob('./data/0预审查初审详审测试数据/*/*.doc*'):
  46. for wordpath in glob('./工程量清单(含采购文件和技术规范)/*/*/*.doc*'):
  47. wordpath = os.path.abspath(wordpath)
  48. filename = ''.join(wordpath.split('\\')[-1].split('.')[:-1])
  49. pdfpath = os.path.join('\\'.join(wordpath.split('\\')[:-2]), filename)
  50. pdfpath = os.path.abspath(pdfpath)
  51. print(wordpath)
  52. print(pdfpath)
  53. doc2pdf(wordpath, pdfpath)