Browse Source

修复图片保存

sprivacy 1 year ago
parent
commit
73e579578c
1 changed files with 86 additions and 20 deletions
  1. 86 20
      get_info.py

+ 86 - 20
get_info.py

@@ -2,7 +2,7 @@
 # @Author: privacy
 # @Date:   2024-06-11 13:43:14
 # @Last Modified by:   privacy
-# @Last Modified time: 2024-07-03 16:44:17
+# @Last Modified time: 2024-07-04 09:59:10
 
 # import os
 
@@ -124,26 +124,94 @@ def export_image(image: LTImage, path: str) -> str:
 
     if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
         name = _save_jpeg(image, path)
+        return name
 
     elif len(filters) == 1 and filters[0][0] in LITERALS_JPX_DECODE:
         name = _save_jpeg2000(image, path)
+        return name
 
-    elif image.bits == 1:
-        name = _save_bmp(image, width, height, (width + 7) // 8, image.bits, path)
+    # elif image.bits == 1:
+    #     name = _save_bmp(image, width, height, (width + 7) // 8, image.bits, path)
 
-    elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace:
-        name = _save_bmp(image, width, height, width * 3, image.bits * 3, path)
+    # elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace:
+    #     name = _save_bmp(image, width, height, width * 3, image.bits * 3, path)
 
-    elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace:
-        name = _save_bmp(image, width, height, width, image.bits, path)
+    # elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace:
+    #     name = _save_bmp(image, width, height, width, image.bits, path)
 
-    elif len(filters) == 1 and filters[0][0] in LITERALS_FLATE_DECODE:
-        name = _save_bytes(image)
+    # elif len(filters) == 1 and filters[0][0] in LITERALS_FLATE_DECODE:
+    #     name = _save_bytes(image)
 
+    # else:
+    #     name = _save_raw(image)
+    data = image.stream.get_data()
+    raw_data = image.stream.get_rawdata()
+
+    if data:
+        if data[:2] == b'\xff\xd8' and data[-2:] == b'\xff\xd9':
+            path += '.jpg'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+        elif data[:8] == b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a':
+            path += '.png'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+        elif data[:2] == b'\x42\x4d':
+            path += '.bmp'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+        elif data[:6] == b'\x47\x49\x46\x38\x37\x61' or data[:6] == b'\x47\x49\x46\x38\x39\x61':
+            path += '.gif'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+        elif data[:2] == b'\x4d\x4d' or data[:2] == b'\x49\x49':
+            path += '.tiff'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+        else:
+            path += '.unk'
+            with open(path, 'wb') as file:
+                file.write(data)
+            return path
+    elif raw_data:
+        if raw_data[:2] == b'\xff\xd8' and raw_data[-2:] == b'\xff\xd9':
+            path += '.jpg'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
+        elif raw_data[:8] == b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a':
+            path += '.png'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
+        elif raw_data[:2] == b'\x42\x4d':
+            path += '.bmp'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
+        elif raw_data[:6] == b'\x47\x49\x46\x38\x37\x61' or raw_data[:6] == b'\x47\x49\x46\x38\x39\x61':
+            path += '.gif'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
+        elif raw_data[:2] == b'\x4d\x4d' or raw_data[:2] == b'\x49\x49':
+            path += '.tiff'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
+        else:
+            path += '.unk'
+            with open(path, 'wb') as file:
+                file.write(raw_data)
+            return path
     else:
-        name = _save_raw(image)
+        return None
 
-    return name
 
 def _save_jpeg(image: LTImage, path: str) -> str:
     """Save a JPEG encoded image"""
@@ -191,15 +259,11 @@ def _save_jpeg2000(image: LTImage, path: str) -> str:
 
 def _save_bmp(image: LTImage, width: int, height: int, bytes_per_line: int, bits: int, path: str) -> str:
     """Save a BMP encoded image"""
+    data = image.stream.get_data()
     path = path + ".bmp"
     with open(path, "wb") as fp:
-        bmp = BMPWriter(fp, bits, width, height)
-        data = image.stream.get_data()
-        i = 0
-        for y in range(height):
-            bmp.write_line(y, data[i : i + bytes_per_line])
-            i += bytes_per_line
-    return name
+        fp.write(data)
+    return path
 
 def main_parse(pdf_path: str, title_path: str, image_dir: str) -> None:
     texts = []
@@ -421,8 +485,10 @@ class PdfExtractAttr(object):
 
 
 if __name__ == '__main__':
-    pdf_path = './投标文件-修改版9-5-1-1.pdf'
-    title_path = './投标文件-修改版9-5-1-1.json'
+    # pdf_path = './投标文件-修改版9-5-1-1.pdf'
+    pdf_path = './南方电网数字研究院有限公司.pdf'
+    # title_path = './投标文件-修改版9-5-1-1.json'
+    title_path = './南方电网数字研究院有限公司.json'
     image_dir = './extracted_images'
     os.makedirs(image_dir, exist_ok=True)
     main_parse(pdf_path=pdf_path, title_path=title_path, image_dir=image_dir)