《Opencv》图像的透视变换--处理发票
目录
在计算机视觉领域,透视变换是一种常用的技术,用于将图像从一个视角转换到另一个视角。透视变换在图像处理中有着广泛的应用,例如在文档扫描、车牌识别、以及本文将要介绍的发票处理中。本文将详细介绍如何使用Python和OpenCV库对发票图像进行透视变换,并对其进行后续处理。
1. 引言
在实际应用中,我们经常会遇到需要从图像中提取特定区域的需求。例如,在处理发票时,我们可能需要将发票从背景中提取出来,并将其转换为一个标准的矩形图像,以便后续的OCR(光学字符识别)处理。透视变换正是实现这一目标的关键技术。
2. 准备工作
在开始之前,我们需要安装并导入必要的Python库:
import numpy as np
import cv2
3. 图像预处理
首先,我们需要加载并预处理图像。为了便于处理,我们将图像的高度调整为500像素,并保持其宽高比不变。
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
img = cv2.imread('./images/fapiao.jpg')
ratio = img.shape[0] / 500.0
orig = img.copy()
img = resize(orig, height=500)
4. 轮廓检测
接下来,我们需要检测图像中的轮廓。首先将图像转换为灰度图,然后使用二值化处理来突出边缘。
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edged = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2]
img_contours = cv2.drawContours(img.copy(), cnts, -1, (0, 0, 255), 1)
5. 获取最大轮廓
我们假设发票是图像中最大的轮廓,因此我们可以通过计算轮廓的面积来找到最大的轮廓。
screenCnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
peri = cv2.arcLength(screenCnt, True)
screenCnt = cv2.approxPolyDP(screenCnt, 0.02 * peri, True)
6. 透视变换
找到发票的轮廓后,我们需要对其进行透视变换,将其转换为一个标准的矩形图像。
def four_point_transform(image, pts):
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = distance(br, bl)
widthB = distance(tr, tl)
maxWidth = max(int(widthA), int(widthB))
hightA = distance(tl, bl)
hightB = distance(tr, br)
maxHeight = max(int(hightA), int(hightB))
dst = np.array([[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
warped = cv2.rotate(warped, cv2.ROTATE_90_COUNTERCLOCKWISE)
7. 二值化处理
为了便于后续的OCR处理,我们需要对透视变换后的图像进行二值化处理。
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped_th = cv2.threshold(warped, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
kernel = np.ones((2, 2), np.uint8)
warped_th = cv2.erode(warped_th, kernel, iterations=2)
warped_th = cv2.dilate(warped_th, kernel, iterations=1)
warped_th = resize(warped_th, height=800)
8. 结果展示
最后,我们将处理后的图像显示出来,并保存结果。
cv2.namedWindow('xx', cv2.WINDOW_NORMAL)
cv2.imshow("xx", warped)
cv2.waitKey(0)
b = np.zeros((800, 544), 'uint8')
r = np.zeros((800, 544), 'uint8')
warped_th[warped_th == 0] = 100
AA = cv2.merge((b, warped_th, r))
cv_show('AA', AA)
9、完整代码
import numpy as np
import cv2
# 变换图片大小的函数
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
#参数interpolation指定了在图像大小调整过程中如何处理像素插值的方法。cv2.INTER_AREA具体意味着使用面积插值方法。
return resized
# 定义显示图片函数
def cv_show(name, image):
cv2.imshow(name, image)
cv2.waitKey(0)
# 计算欧氏距离
def distance(p1,p2):
return np.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2))
# 确定原图四个顶点(排序)
def order_points(pts):
rect = np.zeros((4,2),dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
d = np.diff(pts,axis=1)
rect[1] = pts[np.argmin(d)]
rect[3] = pts[np.argmax(d)]
return rect
def four_point_transform(image,pts):
rect = order_points(pts)
(tl,tr,br,bl) = rect
# 计算w,h
widthA = distance(br,bl)
widthB = distance(tr,tl)
maxWidth = max(int(widthA),int(widthB))
hightA = distance(tl,bl)
hightB = distance(tr,br)
maxHeight = max(int(hightA),int(hightB))
dst = np.array([[0,0],[maxWidth,0],[maxWidth,maxHeight],[0,maxHeight]],dtype="float32")
M = cv2.getPerspectiveTransform(rect,dst)
warped = cv2.warpPerspective(image,M,(maxWidth,maxHeight))
return warped
img = cv2.imread('./images/fapiao.jpg')
cv_show('image',img)
ratio = img.shape[0] / 500.0
orig = img.copy()
img = resize(orig,height=500)
cv_show('1',img)
# 轮廓检测
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edged = cv2.threshold(gray,0,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(edged.copy(),cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2]
img_contours = cv2.drawContours(img.copy(),cnts,-1,(0,0,255),1)
cv_show('img_contours',img_contours)
# 获取最大轮廓
screenCnt = sorted(cnts,key=cv2.contourArea,reverse=True)[0]
# 周长
peri = cv2.arcLength(screenCnt,True)
# 轮廓近似
screenCnt = cv2.approxPolyDP(screenCnt,0.02*peri,True)
print(screenCnt.shape)
img_contour = cv2.drawContours(img.copy(),[screenCnt],-1,(0,255,0),2)
cv_show('img_contour',img_contour)
# 透视变换
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
warped = cv2.rotate(warped,cv2.ROTATE_90_COUNTERCLOCKWISE)
# cv2.imwrite('image/invoice_new.jpg', warped)
cv2.namedWindow('xx',cv2.WINDOW_NORMAL)
cv2.imshow("xx", warped)
cv2.waitKey(0)
# 二值化处理
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped_th = cv2.threshold(warped,0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
kernel = np.ones((2,2),np.uint8)
# warped_th = cv2.morphologyEx(warped_th,cv2.MORPH_CLOSE,kernel)
warped_th = cv2.erode(warped_th,kernel,iterations=2)
warped_th = cv2.dilate(warped_th,kernel,iterations=1)
warped_th = resize(warped_th,height=800)
cv_show('warped_th',warped_th)
# 800*544
b = np.zeros((800,544),'uint8')
r = np.zeros((800,544),'uint8')
# b[b==0]=255
# r[r==0]=255
warped_th[warped_th==0] = 100
AA = cv2.merge((b,warped_th,r))
cv_show('AA',AA)
10、结果展示
(1)、原图
(2)、处理后的图
11. 结论
通过本文的介绍,我们学习了如何使用Python和OpenCV对发票图像进行透视变换。透视变换不仅可以帮助我们提取图像中的特定区域,还可以为后续的图像处理任务(如OCR)提供标准化的输入。
原文地址:https://blog.csdn.net/qq_61600833/article/details/145252921
免责声明:本站文章内容转载自网络资源,如侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!