营业执照信息识别
作者:互联网
python依赖
paddlepaddle==2.3.1 paddleocr==2.5.0.3
python示例
#!/user/bin/env python # coding=utf-8 from paddleocr import PaddleOCR class BusinessLicense: def __init__(self, img, **kwargs): self.ocr_cls = PaddleOCR(use_angle_cls=kwargs.get("use_angle_cls", True), use_gpu=kwargs.get("use_gpu", False), lang=kwargs.get("use_gpu", "ch")) self.img = img self.flag = False def verification_credit_code(self, credit_code): if credit_code: _sum = 0 code_dict = { '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'J': 18, 'K': 19, 'L': 20, 'M': 21, 'N': 22, 'P': 23, 'Q': 24, 'R': 25, 'T': 26, 'U': 27, 'W': 28, 'X': 29, 'Y': 30} weights = ['1', '3', '9', '27', '19', '26', '16', '17', '20', '29', '25', '13', '8', '24', '10', '30', '28'] for i in range(len(credit_code) - 1): _sum += code_dict[credit_code[i]] * int(weights[i]) mod = 31 - _sum % 31 if (mod == code_dict[credit_code[-1]]) or ((mod == 31) and code_dict[credit_code[-1]] == 0): self.flag = True return self.flag def parse(self): credit_code = "" company_name = "" address = "" legal_person = "" ocr_cls = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch") ocr_result = ocr_cls.ocr(self.img, cls=True) for i, x in enumerate(ocr_result): info = x[-1][0] if info.startswith("统一社会信用代码"): credit_code = info.split("统一社会信用代码")[1].strip() elif info.find("公司") != -1: if info.startswith("称") and info.endswith("公司"): company_name = info.replace(":", "").replace(":", "")[1:].strip() elif info.startswith("名称") and info.endswith("公司"): company_name = info.replace(":", "").replace(":", "")[2:].strip() elif info.endswith("公司"): company_name = info.strip() elif info.startswith("住"): if info == "住" and i < len(ocr_result) - 1: if ocr_result[i + 1][-1][0].replace(":", "").replace(":", "") == "所": address = ocr_result[i + 2][-1][0].replace(":", "").replace(":", "").strip() elif info == "住所" and i < len(ocr_result) - 1: address = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip() elif info.startswith("住所"): address = info.split("住所").replace(":", "").replace(":", "").strip() elif info.startswith("法定代表人"): if info == "法定代表人": legal_person = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip() else: legal_person = info.replace(":", "").replace(":", "").split("法定代表人")[1].strip() return {"company_name": company_name, "credit_code": credit_code, "address": address, "legal_person": legal_person} img_path = "./zhi.jpeg" bl = BusinessLicense(img=img_path) data = bl.parse() print(data) print(bl.verification_credit_code(data.get("unified_social_credit_code")))
标签:info,code,营业执照,信息,replace,strip,credit,ocr,识别 来源: https://www.cnblogs.com/navysummer/p/16583898.html