票据识别(Yolo + OCR）【过程清晰，简单美好】-EW帮帮网

目录：
背景介绍
推荐处理流程
项目结构
Yolo数据集yaml文件
识别影响因素
数据集标注
DeBlur 代码参考（轻量级）
文本框预处理代码参考
核心代码

背景介绍
这是一个简单的Pytorch 票据识别项目，证明可以跑通，训练到不错的识别准确率。
后续再增加学习率设计、正则后处理、剪枝、量化等，可以稍微提高精度，加快推理速度，降低模型复杂度。

推荐处理流程
图片预处理（对比度，纠偏，去模糊等）→Yolo分类票据类型（增值税发票，交易发票，机票等） → 检测出该类票据的各种文本框 →
→ 文本框预处理（降噪，去模糊等） → PaddleOCR识别文本框 → 后处理（正则提取等）→ 存入数据库
我这里只完成了上述的核心部分，其余部分可根据需要补充

项目结构
在这里插入图片描述

Yolo数据集yaml文件
在这里插入图片描述

识别影响因素
●票据种类很多，首先需要明确要操作的是哪一种票据，好定制对应的识别操作模板
●有些扫描的图样并不规整，需要纠偏操作
●图像透视操作以后会有形状损失（仿射变换会好一点）
●票据版面上有很多墨迹干扰，需要DeBlur去模糊
●票据版面上的感兴趣区域难以定位，因为前期处理后，ROI位置会随之变动，难以用规则直接限制
●印章区域如何识别
●不能程序处理的图像应该如何处理（标记出来，转给人工）

数据集标注
使用Labelimg进行yolo格式的数据集标注即可，B站上有各种教程。

DeBlur 代码参考（轻量级）

# 可选调整建议：
# amount 控制锐化强度，值越大越锐利，通常在 1.0~2.0 之间；
# threshold 可避免对低对比区域锐化，防止产生噪点；
# 如果图像为灰度图可加判断：if len(image.shape) == 2: 做适配。
# 如需进一步提升去模糊效果，也可以考虑结合深度学习方法（如DeblurGAN或Real-ESRGAN）

import cv2
import numpy as np


# 定义反锐化掩膜函数（Unsharp Mask），用于图像锐化处理
def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.5, threshold=0):
    # 对图像进行高斯模糊处理
    blurred = cv2.GaussianBlur(image, kernel_size, sigma)

    # 通过原图与模糊图的差异，增强边缘细节，实现图像锐化
    sharpened = float(amount + 1) * image - float(amount) * blurred

    # 限制像素值不小于0
    sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))

    # 限制像素值不大于255
    sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))

    # 将像素值四舍五入并转换为uint8类型
    sharpened = sharpened.round().astype(np.uint8)

    # 可选：若图像局部对比度低于设定阈值，则保留原图像像素，避免过度锐化
    if threshold > 0:
        low_contrast_mask = np.absolute(image - blurred) < threshold
        np.copyto(sharpened, image, where=low_contrast_mask)

    # 返回锐化后的图像
    return sharpened


# 增强亮度和对比度的函数
def adjust_brightness_contrast(image, alpha=1.8, beta=-120):
    """
    alpha: 对比度控制（1.0~3.0之间，>1增强对比度）
    beta: 亮度控制（0~100之间，越大越亮）
    """
    adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    return adjusted

# 图像路径（支持中文路径）
image_path = r"../datasets/Invoice/images/train/17242152.jpg"

# 使用cv2.imdecode解决中文路径读取问题，读取彩色图像
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)

# 第一步：去模糊（锐化）
deblurred = unsharp_mask(image)

# 保存去模糊后的图像
cv2.imwrite('deblurred_invoice.jpg', deblurred)

# 第二步：增强亮度和对比度(可选，可调节)
enhanced = adjust_brightness_contrast(deblurred, alpha=1.8, beta=-180)

# 保存最终处理后的图像
cv2.imwrite('enhanced_invoice.jpg', enhanced)

文本框预处理代码参考

用于票据、身份证、发票等文档图像的前处理步骤，使文字更加清晰、背景更干净，为后续 OCR 或边缘检测做准备。
"""文本框预处理(参数记得根据业务调整)"""
image_path = "../datasets/增值税发票1.jpg"
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)  # 解决中文路径问题，使用 imdecode 读取图片

# ✅ 1. 调整对比度和亮度（让文字更突出，背景更压暗，便于下一步处理）
alpha = 1.8  # 对比度因子【alpha=1.8：提升图像对比度（1.0 表示不变，>1.0 增强）】
beta = -120  # 亮度因子【beta=-35：整体降低亮度，使图像更暗。】
adjusted_image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)  # 相当于逐像素应用公式：new_pixel=clip(α * pixel + β)

# width = int(adjusted_image.shape[1] * 0.45)
# height = int(adjusted_image.shape[0] * 0.45)
# resized_image = cv2.resize(adjusted_image, (width, height), interpolation=cv2.INTER_AREA)
# cv2.imshow('Adjusted Image', resized_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()


# ✅ 2.转为灰度图（简化后续图像操作，如滤波、二值化等）
gray = cv2.cvtColor(adjusted_image, cv2.COLOR_BGR2GRAY)  # 将彩色图转换为灰度图，减少通道数，提高处理速度。

# width = int(adjusted_image.shape[1] * 0.45)
# height = int(adjusted_image.shape[0] * 0.45)
# resized_image = cv2.resize(gray, (width, height), interpolation=cv2.INTER_AREA)
# cv2.imshow('Adjusted Image', resized_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()


# ✅ 3. 双边滤波（清除轻微噪声，同时保持文字边缘清晰）
# 区别于高斯模糊：能在去噪的同时保留边缘。
# 【参数说明】13：滤波窗口大小；26：空间高斯函数标准差（决定范围）；6：像素值高斯函数标准差（决定强度差异保留）
filter_image = cv2.bilateralFilter(gray, 13, 26, 6)  # 对图像做 双边滤波，保边去噪。

# width = int(adjusted_image.shape[1] * 0.45)
# height = int(adjusted_image.shape[0] * 0.45)
# resized_image = cv2.resize(filter_image, (width, height), interpolation=cv2.INTER_AREA)
# cv2.imshow('Adjusted Image', resized_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()


# ✅ 4. 二值化 + 反转（把文字变白、背景变黑，方便轮廓提取或 OCR 识别）
# 【参数说明】像素值 > 210 → 设置为 0（黑）；像素值 ≤ 210 → 设置为 255（白）；并且取反（THRESH_BINARY_INV）
# _, binary_image = cv2.threshold(filter_image, 190, 255, cv2.THRESH_BINARY_INV)

# 使用自适应二值化，效果明显更好
# 11：邻域大小，用于计算阈值的像素范围（必须是奇数）; # 2：从计算出的平均值中减去的常数 C
binary_image = cv2.adaptiveThreshold(filter_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

width = int(adjusted_image.shape[1] * 0.45)
height = int(adjusted_image.shape[0] * 0.45)
resized_image = cv2.resize(binary_image, (width, height), interpolation=cv2.INTER_AREA)
cv2.imshow('Adjusted Image', resized_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

核心代码
C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\yolo11_invoice_文本框检测_自定义.py

# 训练时，使用原始图片
# 推理时，先对图片deblur，再传入模型

import warnings
warnings.filterwarnings('ignore')

from ultralytics import YOLO
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)  # cuda


# 数据集配置文件
invoice_yaml = '../datasets/Invoice/invoice.yaml'

# # 模型训练
if __name__ == '__main__':
    # 加载预训练模型
    model = YOLO("yolo11m.pt")

    # 直接加载已训练模型，也ok
    # model = YOLO(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\runs\detect\invoice_finish_self\weights\best.pt")

    # 如果需要，也可以加载已有的权重参数（例如继续训练）
    # model.load(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\runs\detect\train3\weights\best.pt")

    # 推理（目标检测）
    # 训练集：10849624.jpg，17242152.jpg，22749863.jpg
    # results = model(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\datasets\Invoice\images\val\13625254.jpg",
    #                 rect=True,
    #                 imgsz=1200,
    #                 conf=0.44,
    #                 # device = device
    # )
    # results[0].show()  # 显示结果

    # metrics = model.val(data=invoice_yaml, imgsz=1200)
    # print(metrics)

    print("推理结束", "**************************************************" * 3)
    # exit()


    # 训练模型
    results = model.train(data=invoice_yaml,  # 数据集配置文件
                        resume=False,   # 是否继续训练
                        pretrained=False,  # 是否使用预训练参数
                        epochs=1000,
                        batch=4,  # 避免爆显存
                        cls=1.0,   # 分类损失权重（默认是 1.0）
                        dfl=1,  # Distribution Focal Loss 是否启用（默认是 1.0）
                        cos_lr=False,  # 不使用余弦退火学习率策略（使用线性下降）
                        imgsz=1200,   # 输入图像大小，自动缩放为1200*1200（需保证训练图分辨率适配），避免爆显存
                        lr0=0.001,  # 初始学习率
                        lrf=0.001,  # 最终学习率因子（最终lr=lr0*lrf）
                        translate=0,   # 关闭平移增强（不打乱票据结构）
                        scale=0,   # 关闭缩放增强
                        fliplr=0,   # 关闭左右翻转增强
                        mosaic=0,   # 关闭 Mosaic 数据增强（适用于结构敏感任务）
                        erasing=0,   # 关闭随机擦除
                        hsv_h=0.1,  # 色调扰动幅度（0~1）
                        hsv_s=0.1,  # 饱和度扰动幅度（票据一般建议调低）
                        hsv_v=0.1,  # 明度扰动幅度
                        rect = True,  # 使用矩形训练模式，更适合文本/票据类任务
                        device=device,
                        workers=0,  # 数据加载线程数（Windows下建议设为0）
                        verbose=True,  # 打印每一步的详细训练信息
                        patience = 50
    )
    # 关注box_loss、cls_loss数据就知道了，其实是加载自定义的参数权重，是成功了的

    print("训练结束", "**************************************************" * 3)

    # 评估模型在验证集上的性能
    metrics = model.val()

    print("评估结束", "**************************************************" * 3)

C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\yolo11_invoice_文本框检测_默认.py

# 训练时，使用原始图片
# 推理时，先对图片deblur，再传入模型

import warnings
warnings.filterwarnings('ignore')

from ultralytics import YOLO
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)  # cuda


# 数据集配置文件
invoice_yaml = '../datasets/Invoice/invoice.yaml'

# # 模型训练
if __name__ == '__main__':
    # 加载预训练模型
    model = YOLO("yolo11m.pt")

    # 直接加载已训练模型，也ok
    # model = YOLO(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\runs\detect\invoice_finish_default\weights\best.pt")

    # 如果需要，也可以加载已有的权重参数（例如继续训练）
    # model.load(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\study\runs\detect\train3\weights\best.pt")

    # 推理（目标检测）
    # 训练集：10849624.jpg，17242152.jpg，22749863.jpg
    # results = model(r"C:\baidu_sync\BaiduSyncdisk\PythonFiles\ultralytics_github_20250729\datasets\Invoice\images\val\13625254.jpg",
    #                 rect=True,
    #                 imgsz=1200,
    #                 conf=0.3,
    #                 device = device
    # )
    # results[0].show()  # 显示结果


    print("推理结束", "**************************************************" * 3)
    # exit()


    # 训练模型
    results = model.train(data=invoice_yaml,  # 数据集配置文件
                        # resume=False,   # 是否继续训练(默认False)
                        # pretrained=True,  # 是否使用预训练参数(默认True)
                        epochs=10,
                        batch=4,  # 别太大了，避免爆显存
    )

    print("训练结束", "**************************************************" * 3)

    # 评估模型在验证集上的性能
    metrics = model.val()

    print("评估结束", "**************************************************" * 3)

C:\baidu_sync\BaiduSyncdisk\PythonFiles\CV_liuyuan\OCR_Paddle\票据检测识别_yolo2ocr.py

# 训练时，使用原始图片
# 推理时，先对图片deblur，再传入模型

import warnings
warnings.filterwarnings('ignore')

import numpy as np, cv2
from paddleocr import PaddleOCR
from ultralytics import YOLO
from utils.deblur import unsharp_mask, adjust_brightness_contrast

# from utils.text_preprocess import  # 对截取出的文本框进行预处理，方便后续OCR识别（可选）


print("数据准备，模型准备", "**************************************************" * 3)

# 原始票据图片
img_raw = "data/invoice1/13625254.jpg"  # 10849624, 13625254, 17242152, 22749863

# 直接加载已训练的yolo检测模型（检测票据的文本框）
model = YOLO("invoice_recognition_model/invoice_finish_self.pt")

# 初始化 PaddleOCR 实例（识别文本框的文字）
ocr = PaddleOCR(
    use_doc_orientation_classify=False,
    # lang='en',
    doc_orientation_classify_model_dir=r"C:\paddle_models\PP-LCNet_x1_0_doc_ori",
    text_detection_model_dir=r"C:\paddle_models\PP-OCRv5_server_det",
    text_recognition_model_dir=r"C:\paddle_models\PP-OCRv5_server_rec_infer",
    use_doc_unwarping=False,
    doc_unwarping_model_dir=r"C:\paddle_models\UVDoc",
    use_textline_orientation=False,
    # text_line_orientation_model_dir="C:\paddle_models\PP-LCNet_x0_25_textline_ori",
)


print("票据图片预处理", "**************************************************" * 3)

# 使用cv2.imdecode解决中文路径读取问题，读取彩色图像
img_raw = cv2.imdecode(np.fromfile(img_raw, dtype=np.uint8), cv2.IMREAD_COLOR)

# 【检测前预处理】第一步：去模糊（锐化）
deblurred = unsharp_mask(img_raw)

# 【检测前预处理】第二步：增强亮度和对比度(可选，可调节)
enhanced = adjust_brightness_contrast(deblurred, alpha=1.8, beta=-180)

# 【检测前预处理】第三步：图像纠偏，采用自适应阈值 Canny 算子提取灰度图边缘，再通过 Hough 变换完成票据纠偏（根据图片状况，可选）


print("Yolo 检测文本框", "**************************************************" * 3)

# 推理（检测票据的目标字段）
results = model(source=enhanced,
                rect=True,
                imgsz=1200,
                conf=0.29
                )
results[0].show()  # 显示结果

# 类别ID和标签名的映射字典
id2name = results[0].names  # {0: '购买方名称', 1: '纳税人识别号', 2: '服务名称', 3: '数量', 4: '金额', 5: '发票号码'}


# results是一个列表，只有1个元素（此处只推理了一张图片）。
# type(results[0]) = <class 'ultralytics.engine.results.Results'>
# results[0].names = {0: '购买方名称', 1: '纳税人识别号', 2: '服务名称', 3: '数量', 4: '金额', 5: '发票号码'}
# results[0].orig_img.shape = (845, 1443, 3)  # （H,W,C）

# 核心是每个元素的boxes属性
# type(results[0].boxes) = <class 'ultralytics.engine.results.Boxes'>
# results[0].boxes.cls  = tensor([   2.,     5.,     4.,     3.,     1.,     0.,      1.])
# results[0].boxes.conf = tensor([0.9443, 0.8806, 0.8699, 0.8635, 0.6659, 0.6443, 0.5550])
# results[0].boxes.shape = torch.Size([7, 6])
# results[0].boxes.data.shape = torch.Size([7, 6])
# results[0].boxes.xywh.shape = torch.Size([7, 4])
# results[0].boxes.xywhn.shape = torch.Size([7, 4])  # n:归一化坐标
# results[0].boxes.xyxy.shape = torch.Size([7, 4])
# results[0].boxes.xyxyn.shape = torch.Size([7, 4])  # n:归一化坐标

# print(results[0].boxes)
# ultralytics.engine.results.Boxes object with attributes:
# cls: tensor([2., 5., 4., 3., 1., 0., 1.])
# conf: tensor([0.9443, 0.8806, 0.8699, 0.8635, 0.6659, 0.6443, 0.5550])
# data: tensor([[1.2014e+02, 3.1602e+02, 4.2491e+02, 4.1378e+02, 9.4428e-01, 2.0000e+00],
#         [9.9623e+02, 6.6175e+01, 1.1500e+03, 1.1560e+02, 8.8058e-01, 5.0000e+00],
#         [9.0427e+02, 3.1837e+02, 1.0705e+03, 3.9286e+02, 8.6989e-01, 4.0000e+00],
#         [6.5411e+02, 3.1618e+02, 7.7279e+02, 3.8745e+02, 8.6345e-01, 3.0000e+00],
#         [1.9674e+02, 2.1012e+02, 7.9345e+02, 2.4433e+02, 6.6592e-01, 1.0000e+00],
#         [2.0384e+02, 1.8056e+02, 7.9884e+02, 2.1874e+02, 6.4429e-01, 0.0000e+00],
#         [1.7624e+02, 2.1539e+02, 7.9203e+02, 2.5171e+02, 5.5499e-01, 1.0000e+00]])
# id: None
# is_track: False
# orig_shape: (845, 1443)
# shape: torch.Size([7, 6])
# xywh: tensor([[ 272.5238,  364.9023,  304.7654,   97.7650],
#         [1073.1395,   90.8899,  153.8099,   49.4291],
#         [ 987.3865,  355.6122,  166.2319,   74.4934],
#         [ 713.4526,  351.8164,  118.6826,   71.2672],
#         [ 495.0966,  227.2293,  596.7053,   34.2094],
#         [ 501.3398,  199.6515,  595.0074,   38.1783],
#         [ 484.1343,  233.5509,  615.7881,   36.3267]])
# xywhn: tensor([[0.1889, 0.4318, 0.2112, 0.1157],
#         [0.7437, 0.1076, 0.1066, 0.0585],
#         [0.6843, 0.4208, 0.1152, 0.0882],
#         [0.4944, 0.4164, 0.0822, 0.0843],
#         [0.3431, 0.2689, 0.4135, 0.0405],
#         [0.3474, 0.2363, 0.4123, 0.0452],
#         [0.3355, 0.2764, 0.4267, 0.0430]])
# xyxy: tensor([[ 120.1410,  316.0198,  424.9065,  413.7849],
#         [ 996.2346,   66.1754, 1150.0444,  115.6044],
#         [ 904.2705,  318.3655, 1070.5024,  392.8589],
#         [ 654.1114,  316.1828,  772.7939,  387.4500],
#         [ 196.7440,  210.1246,  793.4493,  244.3340],
#         [ 203.8361,  180.5623,  798.8435,  218.7407],
#         [ 176.2402,  215.3876,  792.0284,  251.7143]])
# xyxyn: tensor([[0.0833, 0.3740, 0.2945, 0.4897],
#         [0.6904, 0.0783, 0.7970, 0.1368],
#         [0.6267, 0.3768, 0.7419, 0.4649],
#         [0.4533, 0.3742, 0.5355, 0.4585],
#         [0.1363, 0.2487, 0.5499, 0.2892],
#         [0.1413, 0.2137, 0.5536, 0.2589],
#         [0.1221, 0.2549, 0.5489, 0.2979]])


print("OCR 处理部分", "**************************************************" * 3)

# 创建一个列表来存储最终的识别结果
ocr_results = []

# 遍历所有检测到的结果（此处只推理1张图片，只有1个结果）
for i, r in enumerate(results):
    # 获取检测到的文本框（boxes）
    boxes = r.boxes

    print(f"处理第 {i + 1} 个YOLO检测结果，共检测到 {len(boxes)} 个文本框。")

    # 遍历每个包围框
    for j, box in enumerate(boxes):
        # type(box) = <class 'ultralytics.engine.results.Boxes'>  → 和上面的boxes有同样的属性和字段
        # box.xyxy.shape = torch.Size([1, 4])
        # box.xyxy = tensor([[120.1410, 316.0198, 424.9065, 413.7849]])  # 举例

        # 获取该文本框的标签名和置信度
        class_id = int(box.cls.detach().cpu().item())
        label_name = id2name[class_id]  # 标签名
        label_conf = box.conf.detach().cpu().item()  # 置信度

        # 获取包围框的坐标，并将其转换为整数
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        print(f"  - 处理第 {j + 1} 个文本框，坐标: [{x1}, {y1}, {x2}, {y2}]")

        # 裁剪出检测到的文本框区域，注意使用增强后的图像 enhanced
        cropped_img = enhanced[y1:y2, x1:x2]

        # OCR识别裁剪图像, 整理结果后存入 ocr_results
        if cropped_img.size > 0:
            # 日志
            print(f"    - 裁剪图像尺寸:H*W = {cropped_img.shape[0]}x{cropped_img.shape[1]}")

            # 调试：显示裁剪区域
            # cv2.imshow(f"Cropped Image {j+1}", cropped_img)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()

            # 使用 PaddleOCR 进行识别
            result = ocr.ocr(cropped_img)

            # 日志：只打印识别的文本内容
            if result and result[0] and 'rec_texts' in result[0]:
                print(f"    - 识别文本: {result[0]['rec_texts']}")  #

            txts = []
            # 解析OCR的识别结果
            if result and result[0] and 'rec_texts' in result[0] and 'rec_scores' in result[0]:
                rec_texts = result[0]['rec_texts']
                rec_scores = result[0]['rec_scores']

                # 遍历识别出的每个文本和置信度
                for k in range(len(rec_texts)):
                    txt = rec_texts[k]
                    txts.append(txt)
            else:
                print("    - WARNING: PaddleOCR did not return a valid result for this crop.")

            # 将识别结果添加到 ocr_results
            ocr_results.append({
                "label_name": label_name,
                "label_conf": label_conf,
                "box_coords": [x1, y1, x2, y2],
                "text": txts,
            })
        else:
            print("    - WARNING: Cropped image is empty or invalid. Skipping.")


print("最终OCR识别结果", "**************************************************" * 3)
if ocr_results:
    for res in ocr_results:
        print(f"【{res['label_name']}】文本框xyxy坐标: {res['box_coords']}, 置信度: {res['label_conf']:.2f}, 【识别文本】: {res['text']}")
else:
    print("没有识别到任何文本。请检查图像和模型。")

# 【发票号码】文本框xyxy坐标: [1005, 73, 1162, 120], 置信度: 0.82, 【识别文本】: ['13625254']
# 【服务名称】文本框xyxy坐标: [115, 316, 429, 404], 置信度: 0.81, 【识别文本】: ['货物或应税劳务、服务名称', '*企业管理服务*物业管理费']
# 【金额】文本框xyxy坐标: [899, 329, 1080, 419], 置信度: 0.81, 【识别文本】: ['额', '1153.40']
# 【数量】文本框xyxy坐标: [655, 323, 781, 415], 置信度: 0.62, 【识别文本】: ['数量', '1']
# 【纳税人识别号】文本框xyxy坐标: [193, 213, 823, 253], 置信度: 0.60, 【识别文本】: ['微庆藏标料技有限公司', '税人识别号：', '91500112MA5YXE2J33']
# 【购买方名称】文本框xyxy坐标: [215, 174, 820, 221], 置信度: 0.29, 【识别文本】: ['能85213829092600618259', '称：重庆麒森科技有限公司']


print("后处理部分，正则表达式提取关键字段信息，然后入库等等", "**************************************************" * 3)
# 这块的话根据实际业务，设计正则表达式提取即可，前面处理的越好，这一块的处理逻辑就越简单

票据识别(Yolo + OCR）【过程清晰，简单美好】

网站公告

今日签到

热门文章

最新发布