signature_test.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. import os
  2. import re
  3. import json
  4. import logging
  5. import requests
  6. import cv2
  7. import numpy as np
  8. # ocr外部接口
  9. class OcrAgent():
  10. def __init__(self, url):
  11. self.url = url
  12. self.datetime_re = r'\d{4}年\d{1,2}月\d{1,2}日至(?:\d{4}年\d{1,2}月\d{1,2}日|长期)'
  13. # 不同类型证书资质正则
  14. self.re_dict = {
  15. "business_license" : r'营业执照',
  16. "deposit": r'^(?:开户许可证|[\u4e00-\u9fff]+存款账户[\u4e00-\u9fff]+)$',
  17. "production_license": r'\b[\u4e00-\u9fff]*许可证\b',
  18. "qualtifications" : r'\b[\u4e00-\u9fff]*证书',
  19. "proof": r'\b[\u4e00-\u9fff]*证明',
  20. }
  21. def get_content(self, image_path):
  22. try:
  23. with open(image_path, 'rb') as image_file:
  24. files = {"file": ("image.jpg", image_file, "image/jpeg")}
  25. # files = {"file": ("image.png", image_file, "image/png")}
  26. response = requests.post(self.url, files=files)
  27. return response.json()
  28. except:
  29. raise ValueError(f"传入图像{image_path}已损坏")
  30. # 用于识别固定位置是否有公司法人签名
  31. def signature_recognition(self, image_path):
  32. content = self.get_content(image_path=image_path)
  33. def remove_red_seal(input_img):
  34. # 分离图片的通道
  35. blue_c, green_c, red_c = cv2.split(input_img)
  36. #利用大津法自动选择阈值
  37. thresh, ret = cv2.threshold(red_c, 0, 255, cv2.THRESH_OTSU)
  38. #对阈值进行调整
  39. filter_condition = int(thresh * 1.0)
  40. #移除红色的印章
  41. _, red_thresh = cv2.threshold(red_c, filter_condition, 255, cv2.THRESH_BINARY)
  42. # 把图片转回3通道
  43. result_img = np.expand_dims(red_thresh, axis=2)
  44. result_img = np.concatenate((result_img, result_img, result_img), axis=-1)
  45. return result_img
  46. # 用于判断固定位置的长方形框内是否存在签名字迹
  47. def ink_recognition(input_img, sign_path, meta: dict):
  48. left = meta["left"]
  49. right = meta["right"]
  50. top = meta["top"]
  51. bottom = meta["bottom"]
  52. cv2.rectangle(input_img, (left, top), (right, bottom), (0, 255, 0), 2) # 绿色框,线宽为2
  53. cv2.imwrite(sign_path, input_img)
  54. if __name__ == "__main__":
  55. url = "http://120.48.103.13:18000/ctr_ocr"
  56. ocr = OcrAgent(url=url)
  57. test_img = "/home/stf/miner_pdf/test_img/example_1.png"
  58. save_path = '/home/stf/miner_pdf/test_img/example_1_roi.png'
  59. sign_path = '/home/stf/miner_pdf/test_img/example_1_sign.png'
  60. input_img = cv2.imread(test_img)
  61. remove_seal = remove_red_seal(input_img)
  62. cv2.imwrite(save_path, remove_seal)
  63. keywords = ['投标函', '法定代表人CA电子印章', '法定代表人:']
  64. key_pos = {}
  65. content = ocr.get_content(save_path)
  66. image_info = content["rawjson"]["ret"]
  67. for info in image_info:
  68. word = info['word']
  69. left = info['rect']['left']
  70. top = info['rect']['top']
  71. width = info['rect']['width']
  72. height = info['rect']['height']
  73. right = left + width
  74. bottom = top + height
  75. for keyword in keywords:
  76. if keyword in word:
  77. key_pos[keyword] = {
  78. "word": word,
  79. "left": left,
  80. "right": right,
  81. "top": top,
  82. "bottom": bottom
  83. }
  84. break
  85. # 如果不存在"投标函"、"法定代表人"等关键字,则返回False
  86. if len(key_pos) == 0:
  87. print("NO FOUND")
  88. # 定位到法定代表人所在位置
  89. if key_pos.get('法定代表人:') is not None and key_pos.get('法定代表人CA电子印章') is not None:
  90. # 此时签名应在两者之间
  91. l = key_pos['法定代表人:']['right']
  92. l_word = key_pos['法定代表人:']['word']
  93. r = key_pos['法定代表人CA电子印章']['left']
  94. r_word = key_pos['法定代表人CA电子印章']['word']
  95. t = min(key_pos['法定代表人:']['top'], key_pos['法定代表人CA电子印章']['top'])
  96. b = max(key_pos['法定代表人:']['bottom'], key_pos['法定代表人CA电子印章']['bottom'])
  97. if l_word[-6:] != '法定代表人:' or r_word != '法定代表人CA电子印章':
  98. print("找寻到签名")
  99. exit(0)
  100. else:
  101. ink_recognition(
  102. input_img=remove_seal,
  103. sign_path=sign_path,
  104. meta={
  105. "left": l,
  106. "right": r,
  107. "top": t,
  108. "bottom": b
  109. }
  110. )
  111. elif key_pos.get('法定代表人CA电子印章') is not None:
  112. # 此时签名应已包含
  113. key_word = key_pos['法定代表人CA电子印章']['word']
  114. key_word = key_word.replace('法定代表人CA电子印章', '').replace('法定代表人:', '')
  115. if key_word != '':
  116. print("found sign")
  117. else:
  118. print("No FOUND")
  119. elif key_pos.get('法定代表人:') is not None:
  120. # 此时签名在右边或已包含
  121. word = key_pos['法定代表人:']['word']
  122. l = key_pos['法定代表人:']['left']
  123. r = l + 100
  124. t = key_pos['法定代表人:']['top']
  125. b = key_pos['法定代表人:']['bottom']
  126. if word[-6:] != '法定代表人:':
  127. print("found sign")
  128. else:
  129. ink_recognition(
  130. input_img=remove_seal,
  131. sign_path=sign_path,
  132. meta={
  133. "left": l,
  134. "right": r,
  135. "top": t,
  136. "bottom": b
  137. }
  138. )
  139. else:
  140. print("NO FOUND")