Procházet zdrojové kódy

modified: resume_parse.py

sprivacy před 3 roky
rodič
revize
f0d7ab5ca3
1 změnil soubory, kde provedl 8 přidání a 8 odebrání
  1. 8 8
      tools/resume_parse.py

+ 8 - 8
tools/resume_parse.py

@@ -897,7 +897,7 @@ def doc2pdf_linux(docPath, pdfPath):
     需要在linux中下载好libreoffice
     """
     #  注意cmd中的libreoffice要和linux中安装的一致
-    cmd = 'libreoffice --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
+    cmd = 'libreoffice6.3 --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
     # cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [docPath]
     p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
     p.wait(timeout=30)  # 停顿30秒等待转化
@@ -1193,12 +1193,12 @@ def detection_type(path, system):
                 doc2pdf(docPath = filename, pdfPath = './pdf', system=system)
                 newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
                 if os.path.exists(newfile):
-                    rst = check_pdf(filename)
+                    rst = check_pdf(newfile)
                     if "Table" in rst:
-                        parse_table_from_pdf(filename)
+                        parse_table_from_pdf(newfile)
                         pass
                     if "Word" in rst:
-                        read_from_pdf(filename)
+                        read_from_pdf(newfile)
             # 传入为 docx
             elif os.path.isfile(filename) and filename.endswith('.docx'):
                 check_word(filename)
@@ -1216,14 +1216,14 @@ def detection_type(path, system):
     # 传入为 doc
     elif os.path.isfile(path) and path.endswith('.doc'):
         doc2pdf(docPath = path, pdfPath = './pdf', system=system)
-        newfile = './pdf/' + os.path.splitext(os.path.split(newfile)[-1])[0] + '.pdf'
+        newfile = './pdf/' + os.path.splitext(os.path.split(path)[-1])[0] + '.pdf'
         if os.path.exists(newfile):
-            rst = check_pdf(filename)
+            rst = check_pdf(newfile)
             if "Table" in rst:
-                parse_table_from_pdf(filename)
+                parse_table_from_pdf(newfile)
                 pass
             if "Word" in rst:
-                read_from_pdf(filename)
+                read_from_pdf(newfile)
     # 传入为 docx
     elif os.path.isfile(path) and path.endswith('.docx'):
         check_word(path)