自学内容网 自学内容网

[PaddlePaddle飞桨] PaddleOCR图像小模型部署

PaddleOCR的GitHub项目地址
推荐环境:

PaddlePaddle >= 2.1.2
Python >= 3.7
CUDA >= 10.1 
CUDNN >= 7.6

pip下载指令:

python -m pip install paddlepaddle-gpu==2.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple  

pip install paddleocr==2.7.3

小模型配置代码:

from paddleocr import PaddleOCR

# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
OCR = PaddleOCR(
    lang="ch",
    use_angle_cls=True,
    use_gpu=True
)  # need to run only once to download and load model into memory

图片文件保存代码:

import io
import cv2
import os
import uuid
import numpy as np
def save_image_file(file_path, file_name, file_content):
    # 生成一个唯一的文件名
    unique_filename = str(uuid.uuid4()) + os.path.splitext(file_name)[1]

    # 确保目录存在
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    try:
        # 将文件流转换为ndarray
        nparr = np.frombuffer(file_content.read(), np.uint8)
        file_nd_array = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

        # 构建完整的文件路径
        file_path_with_file_name = os.path.join(file_path, unique_filename)

        # 保存图像到文件
        cv2.imwrite(filename=file_path_with_file_name, img=file_nd_array)

        return file_path_with_file_name

    except Exception as e:
        print(f"Error saving file: {e}")
        return None

获取OCR结果代码:

import os
# 获取指定文件的OCR结果(数组)
def get_text_with_ocr(file_path_with_file_name):
    if not os.path.exists(file_path_with_file_name):
        return None
    ocr_result = OCR.ocr(file_path_with_file_name)
    # for idx in range(len(ocr_result)):
    #     res = ocr_result[idx]
    #     for line in res:
    #         print(line)
    return ocr_result

图像文字提取代码:

# OCR(图像文字提取)
def optical_character_recognition(file_content, file_name):
    file_path_without_file_name = '.' + STATIC_IMAGE_PATH + "/"
    if not os.path.exists(file_path_without_file_name):
        os.makedirs(file_path_without_file_name)
    file_path_with_file_name = save_image_file(file_path_without_file_name, file_name, file_content)
    ocr_result = get_text_with_ocr(file_path_with_file_name)
    # 提取文本信息
    text_only = '\n'.join([item[1][0] for sublist in ocr_result for item in sublist])
    return text_only

原文地址:https://blog.csdn.net/qq_45831414/article/details/140333027

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!