【问题标题】:Calculating skew of text OpenCV计算文本OpenCV的倾斜
【发布时间】:2014-06-04 19:37:29
【问题描述】:

我正在尝试计算图像中文本的倾斜度,以便进行校正以获得最佳 OCR 结果。

目前这是我正在使用的功能:

double compute_skew(Mat &img)
{

    // Binarize
    cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);

    // Invert colors
    cv::bitwise_not(img, img);

    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
    cv::erode(img, img, element);

    std::vector<cv::Point> points;
    cv::Mat_<uchar>::iterator it = img.begin<uchar>();
    cv::Mat_<uchar>::iterator end = img.end<uchar>();
    for (; it != end; ++it)
        if (*it)
            points.push_back(it.pos());

    cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);

    return angle;
}

当我在调试中查看 then 角度时,我得到 0.000000

但是,当我给它这个图像时,我得到了大约 16 度倾斜的正确结果:

如何正确检测第一张图片中的歪斜?

【问题讨论】:

    标签: c++ opencv skew


    【解决方案1】:

    还有其他几种方法可以获得倾斜度,1)通过霍夫变换 2)通过水平投影轮廓。在不同的角度箱中旋转图像并计算水平投影。具有最大水平直方图值的角度是偏斜角。

    我提供了以下 1) 的实现。我相信这比您使用的拳击方法要好,因为它要求您完全清除图像中的任何噪音,而这在大多数情况下是不可能的。

    您应该知道,如果噪音太大,该方法将无法正常工作。您可以根据您希望将哪种类型的“线”视为图像中最主要的“线”,以不同的方式减少噪点。我为此提供了两种方法。一定要玩参数和阈值等。

    结果(全部使用 preprocess2 运行,全部使用相同的参数集运行)

    代码

    #include <opencv2/opencv.hpp>
    using namespace cv;
    using namespace std;
    
    void hough_transform(Mat& im,Mat& orig,double* skew)
    {
        double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
        int angleBins = 180;
        Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
        int cenx = im.cols/2;
        int ceny = im.rows/2;
        for(int x=1;x<im.cols-1;x++)
        {
            for(int y=1;y<im.rows-1;y++)
            {
                if(im.at<uchar>(y,x)==255)
                {
                    for(int t=0;t<angleBins;t++)
                    {
                        double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t    /angleBins*CV_PI);
                        r+=max_r;
                        acc.at<int>(t,int(r))++;
                    }
                }
            }
        }
        Mat thresh;
        normalize(acc,acc,255,0,NORM_MINMAX);
        convertScaleAbs(acc,acc);
        /*debug
        Mat cmap;
        applyColorMap(acc,cmap,COLORMAP_JET);
        imshow("cmap",cmap);
        imshow("acc",acc);*/
    
        Point maxLoc;
        minMaxLoc(acc,0,0,0,&maxLoc);
        double theta = (double)maxLoc.y/angleBins*CV_PI;
        double rho = maxLoc.x-max_r;
        if(abs(sin(theta))<0.000001)//check vertical
        {
            //when vertical, line equation becomes
            //x = rho
            double m = -cos(theta)/sin(theta);
            Point2d p1 = Point2d(rho+im.cols/2,0);
            Point2d p2 = Point2d(rho+im.cols/2,im.rows);
            line(orig,p1,p2,Scalar(0,0,255),1);
            *skew=90;
            cout<<"skew angle "<<" 90"<<endl;
        }else
        {
            //convert normal form back to slope intercept form
            //y = mx + b
            double m = -cos(theta)/sin(theta);
            double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
            Point2d p1 = Point2d(0,b);
            Point2d p2 = Point2d(im.cols,im.cols*m+b);
            line(orig,p1,p2,Scalar(0,0,255),1);
            double skewangle;
            skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2.    x-p1.x)*180./CV_PI);
            *skew=skewangle;
            cout<<"skew angle "<<skewangle<<endl;
        }
        imshow("orig",orig);
    }
    
    Mat preprocess1(Mat& im)
    {
        Mat ret = Mat::zeros(im.size(),CV_32SC1);
    
        for(int x=1;x<im.cols-1;x++)
        {
            for(int y=1;y<im.rows-1;y++)
            {
    
                int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
                    +2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
                    +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
                int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
                    +2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
                    +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
                int g2 = (gy*gy + gx*gx);
                ret.at<int>(y,x)=g2;
            }
        }
        normalize(ret,ret,255,0,NORM_MINMAX);
        ret.convertTo(ret,CV_8UC1);
        threshold(ret,ret,50,255,THRESH_BINARY);
        return ret;
    }
    
    Mat preprocess2(Mat& im)
    {
        // 1) assume white on black and does local thresholding
        // 2) only allow voting top is white and buttom is black(buttom text line)
        Mat thresh;
        //thresh=255-im;
        thresh=im.clone();
        adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
        Mat ret = Mat::zeros(im.size(),CV_8UC1);
        for(int x=1;x<thresh.cols-1;x++)
        {
            for(int y=1;y<thresh.rows-1;y++)
            {
                bool toprowblack = thresh.at<uchar>(y-1,x)==0 ||  thresh.at<uchar>(y-1,x-1)==0     || thresh.at<uchar>(y-1,x+1)==0;
                bool belowrowblack = thresh.at<uchar>(y+1,x)==0 ||  thresh.at<uchar>(y+1,    x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;
    
                uchar pix=thresh.at<uchar>(y,x);
                if((!toprowblack && pix==255 && belowrowblack))
                {
                    ret.at<uchar>(y,x) = 255;
                }
            }
        }
        return ret;
    }
    Mat rot(Mat& im,double thetaRad)
    {
        cv::Mat rotated;
        double rskew = thetaRad* CV_PI/180;
        double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
        double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
        cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
        Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
        pos.at<double>(0)=(nw-im.cols)*.5;
        pos.at<double>(1)=(nh-im.rows)*.5;
        Mat res = rot_mat*pos;
        rot_mat.at<double>(0,2) += res.at<double>(0);
        rot_mat.at<double>(1,2) += res.at<double>(1);
        cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
        return rotated;
    }
    
    int main(int argc, char** argv)
    {
        string src="C:/data/skew.png";
        Mat im= imread(src);
        Mat gray;
        cvtColor(im,gray,CV_BGR2GRAY);
    
        Mat preprocessed = preprocess2(gray);
        imshow("preprocessed2",preprocessed);
        double skew;
        hough_transform(preprocessed,im,&skew);
        Mat rotated = rot(im,skew* CV_PI/180);
        imshow("corrected",rotated);
    
        waitKey(0);
        return 0;
    }
    

    【讨论】:

    • 我改变了我的想法,你的照片很好。但代码不是。以错误的角度旋转我的图像...
    • 你能发布图片吗?也许我可以看看他们是否可以工作
    • 感谢您的回复。你能帮我像stackoverflow.com/questions/23125359/…这样的裁剪文本吗?我无法实现到我的代码的链接。可能是在我需要再次旋转之后。
    • 谢谢你,先生,你是一个活的救星
    【解决方案2】:

    您发布的方法有其自己的“理想二值化”假设。阈值直接影响过程。利用 otsu 阈值,或者考虑使用DFT 来获得通用解决方案。

    大津试炼:

    int main()
    {
        Mat input = imread("your text");
        cvtColor(input, input, CV_BGR2GRAY);
        Mat img;
        cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);
    
        cv::bitwise_not(img, img);
        imshow("img ", img);
        waitKey(0);
    
        vector<Point> points;
        findNonZero(img, points);
        cv::RotatedRect box = cv::minAreaRect(points);
    
        double angle = box.angle;
        if (angle < -45.)
            angle += 90.;
    
        cv::Point2f vertices[4];
        box.points(vertices);
        for(int i = 0; i < 4; ++i)
            cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
        imshow("img ", img);
        waitKey(0);
    
        return 0;
    }
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2016-02-03
      • 1970-01-01
      • 1970-01-01
      • 2023-03-23
      • 2014-05-21
      • 2013-07-19
      • 2012-11-15
      相关资源
      最近更新 更多