【问题标题】:How to detect and align tilted images after cropping裁剪后如何检测和对齐倾斜的图像
【发布时间】:2021-06-04 21:29:40
【问题描述】:

我已经在我的解决方案上实现了一个效果很好的裁剪算法。问题是当图像倾斜时,裁剪会起作用,但它会像图像一样显示背景空间。

种植流程:

第一步:

第二步:

最终结果:

我已经搜索/尝试了多种解决方案,但无法获得不错的结果,或者我的想法不正确。

预期的结果是这样的:

编辑[最终结果]:

import cv2
import numpy as np

def order_corner_points(corners):
    # Separate corners into individual points
    # Index 0 - top-right
    #       1 - top-left
    #       2 - bottom-left
    #       3 - bottom-right
    corners = [(corner[0][0], corner[0][1]) for corner in corners]
    top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
    return (top_l, top_r, bottom_r, bottom_l)

def perspective_transform(image, corners):
    # Order points in clockwise order
    ordered_corners = order_corner_points(corners)
    top_l, top_r, bottom_r, bottom_l = ordered_corners

    # Determine width of new image which is the max distance between 
    # (bottom right and bottom left) or (top right and top left) x-coordinates
    width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
    width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
    width = max(int(width_A), int(width_B))

    # Determine height of new image which is the max distance between 
    # (top right and bottom right) or (top left and bottom left) y-coordinates
    height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
    height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
    height = max(int(height_A), int(height_B))

    # Construct new points to obtain top-down view of image in 
    # top_r, top_l, bottom_l, bottom_r order
    dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], 
                    [0, height - 1]], dtype = "float32")

    # Convert to Numpy format
    ordered_corners = np.array(ordered_corners, dtype="float32")

    # Find perspective transform matrix
    matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)

    # Return the transformed image
    return cv2.warpPerspective(image, matrix, (width, height))

def get_image_width_height(image):
    image_width = image.shape[1]  # current image's width
    image_height = image.shape[0]  # current image's height
    return image_width, image_height

def calculate_scaled_dimension(scale, image):
    image_width, image_height = get_image_width_height(image)
    ratio_of_new_with_to_old = scale / image_width


    dimension = (scale, int(image_height * ratio_of_new_with_to_old))
    return dimension

def scale_image(image, size):
    image_resized_scaled = cv2.resize(
        image,
        calculate_scaled_dimension(
            size,
            image
        ),
        interpolation=cv2.INTER_AREA
    )
    return image_resized_scaled

def rotate_image(image, angle):
    # Grab the dimensions of the image and then determine the center
    (h, w) = image.shape[:2]
    (cX, cY) = (w / 2, h / 2)

    # grab the rotation matrix (applying the negative of the
    # angle to rotate clockwise), then grab the sine and cosine
    # (i.e., the rotation components of the matrix)
    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # Compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    # Perform the actual rotation and return the image
    return cv2.warpAffine(image, M, (nW, nH))

image = cv2.imread('images/damina_cc_back.jpg')
original_image = image.copy()

image = scale_image(image, 500)

# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)

cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None

# loop over our contours
for c in cnts:
    # approximate the contour
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.015 * peri, True)

    if len(approx) == 4:
        screen_cnt = approx
        transformed = perspective_transform(image, screen_cnt)
        break

# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)

(h, w) = transformed.shape[:2]

if (h > w):
    rotated = rotate_image(transformed, 90)
else:
    rotated = transformed

cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)

【问题讨论】:

    标签: python opencv image-processing


    【解决方案1】:

    要在裁剪后对齐图像,我们可以使用perspective transformation。首先,我们将矩形的四个角分成cv2.approxPolyDP() 给我们的各个点。我们使用此函数将点重新排列为顺时针方向(左上、右上、右下、左下):

    def order_corner_points(corners):
        # Separate corners into individual points
        # Index 0 - top-right
        #       1 - top-left
        #       2 - bottom-left
        #       3 - bottom-right
        corners = [(corner[0][0], corner[0][1]) for corner in corners]
        top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
        return (top_l, top_r, bottom_r, bottom_l)
    

    这个函数为我们提供了 ROI 的边界框坐标

    现在有了孤立的角点,我们可以使用cv2.getPerspectiveTransform()获得变换矩阵,并使用cv2.warpPerspective()实际获得变换后的图像。

    def perspective_transform(image, corners):
        # Order points in clockwise order
        ordered_corners = order_corner_points(corners)
        top_l, top_r, bottom_r, bottom_l = ordered_corners
    
        # Determine width of new image which is the max distance between 
        # (bottom right and bottom left) or (top right and top left) x-coordinates
        width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
        width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
        width = max(int(width_A), int(width_B))
    
        # Determine height of new image which is the max distance between 
        # (top right and bottom right) or (top left and bottom left) y-coordinates
        height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
        height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
        height = max(int(height_A), int(height_B))
    
        # Construct new points to obtain top-down view of image in 
        # top_r, top_l, bottom_l, bottom_r order
        dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], 
                        [0, height - 1]], dtype = "float32")
    
        # Convert to Numpy format
        ordered_corners = np.array(ordered_corners, dtype="float32")
    
        # Find perspective transform matrix
        matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
    
        # Return the transformed image
        return cv2.warpPerspective(image, matrix, (width, height))
    

    这是结果

    我们可以用这个函数来旋转图像

    def rotate_image(image, angle):
        # Grab the dimensions of the image and then determine the center
        (h, w) = image.shape[:2]
        (cX, cY) = (w / 2, h / 2)
    
        # grab the rotation matrix (applying the negative of the
        # angle to rotate clockwise), then grab the sine and cosine
        # (i.e., the rotation components of the matrix)
        M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
        cos = np.abs(M[0, 0])
        sin = np.abs(M[0, 1])
    
        # Compute the new bounding dimensions of the image
        nW = int((h * sin) + (w * cos))
        nH = int((h * cos) + (w * sin))
    
        # Adjust the rotation matrix to take into account translation
        M[0, 2] += (nW / 2) - cX
        M[1, 2] += (nH / 2) - cY
    
        # Perform the actual rotation and return the image
        return cv2.warpAffine(image, M, (nW, nH))
    

    旋转后的最终结果:

    完整代码

    import cv2
    import numpy as np
    
    def order_corner_points(corners):
        # Separate corners into individual points
        # Index 0 - top-right
        #       1 - top-left
        #       2 - bottom-left
        #       3 - bottom-right
        corners = [(corner[0][0], corner[0][1]) for corner in corners]
        top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
        return (top_l, top_r, bottom_r, bottom_l)
    
    def perspective_transform(image, corners):
        # Order points in clockwise order
        ordered_corners = order_corner_points(corners)
        top_l, top_r, bottom_r, bottom_l = ordered_corners
    
        # Determine width of new image which is the max distance between 
        # (bottom right and bottom left) or (top right and top left) x-coordinates
        width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
        width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
        width = max(int(width_A), int(width_B))
    
        # Determine height of new image which is the max distance between 
        # (top right and bottom right) or (top left and bottom left) y-coordinates
        height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
        height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
        height = max(int(height_A), int(height_B))
    
        # Construct new points to obtain top-down view of image in 
        # top_r, top_l, bottom_l, bottom_r order
        dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], 
                        [0, height - 1]], dtype = "float32")
    
        # Convert to Numpy format
        ordered_corners = np.array(ordered_corners, dtype="float32")
    
        # Find perspective transform matrix
        matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
    
        # Return the transformed image
        return cv2.warpPerspective(image, matrix, (width, height))
    
    def rotate_image(image, angle):
        # Grab the dimensions of the image and then determine the center
        (h, w) = image.shape[:2]
        (cX, cY) = (w / 2, h / 2)
    
        # grab the rotation matrix (applying the negative of the
        # angle to rotate clockwise), then grab the sine and cosine
        # (i.e., the rotation components of the matrix)
        M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
        cos = np.abs(M[0, 0])
        sin = np.abs(M[0, 1])
    
        # Compute the new bounding dimensions of the image
        nW = int((h * sin) + (w * cos))
        nH = int((h * cos) + (w * sin))
    
        # Adjust the rotation matrix to take into account translation
        M[0, 2] += (nW / 2) - cX
        M[1, 2] += (nH / 2) - cY
    
        # Perform the actual rotation and return the image
        return cv2.warpAffine(image, M, (nW, nH))
    
    image = cv2.imread('1.PNG')
    original_image = image.copy()
    
    # convert the image to grayscale, blur it, and find edges in the image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 11, 17, 17)
    edged = cv2.Canny(gray, 30, 200)
    
    cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
    screen_cnt = None
    
    # loop over our contours
    for c in cnts:
        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.015 * peri, True)
    
        if len(approx) == 4:
            screen_cnt = approx
            transformed = perspective_transform(original_image, screen_cnt)
            break
    
    # Draw ROI
    cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 3)
    
    # Rotate image
    rotated = rotate_image(transformed, -90)
    
    cv2.imshow("image", original_image)
    cv2.imshow("ROI", image)
    cv2.imshow("transformed", transformed)
    cv2.imshow("rotated", rotated)
    cv2.waitKey(0)
    

    【讨论】:

    • 谢谢你,我相信这会很有帮助,你能告诉我如何将 4 点转换为计数,以便我可以使用你的代码吗?我有这个: cv2.rectangle(image, (best_box[0], best_box[1]), (best_box[2], best_box[3]), (0, 255, 0), 1) 但我需要通过countour perpective_transform 函数
    • 你好,我想我明白了 :) 你能看到我的编辑并检查它是否看起来不错,也许建议一些优化?再次感谢您。
    • 另外,这段代码是否考虑了文档及其更正?如果没有,那很难做到吗?
    【解决方案2】:

    我假设您正在寻找找到边缘(或者可能是某些分位数)的最小和最大 u 和 v 位置,以找到裁剪的矩形。即遍历所有标记为边缘的图像像素并更新 u/v/ min/max 值。

    如果计算时间对您来说不是问题,您可以简单地保持算法不变,另外循环多次旋转并更新每个旋转的特殊值。伪代码:

    for v
      for u
        if (u,v) is edge
          for rotation_matrix
            (ur, vr) = rotation_matrix * (u,v)
            update boundary for given rotation matrix
    

    最后你可以选择最小的旋转矩阵的边界框。

    如果上述算法对您的用例来说太慢,您也可以尝试使用 opencv HoughLinesP 函数提取主轴。这当然不适用于所有类型的图像,但对于身份证来说可能就足够了。

    最后,要应用旋转校正,请参阅this 教程。

    【讨论】:

      猜你喜欢
      • 2019-03-12
      • 2017-01-10
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2013-01-28
      • 2020-10-11
      • 2018-10-19
      • 2018-10-13
      相关资源
      最近更新 更多