【问题标题】:detect text contours in image检测图像中的文本轮廓
【发布时间】:2016-02-24 10:49:55
【问题描述】:

我在这里有一个程序可以将图像与文本进行二值化。在程序的一部分中,您可以启用检测文本轮廓的裁剪功能。但在某些情况下它不会检测到所有文本轮廓

如果您使用-d 参数,程序会在输出图像中将文本轮廓绘制为矩形而不进行裁剪

文本轮廓检测(和矩形绘制)的逻辑在detect_text_box函数中

命令

/var/txtbin /var/in.png -d /var/out.png

代码

/*
 *  Compile
 *  # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
 *
 *  Get opencv version
 *  # pkg-config --modversion opencv
 *
 *  Run
 *  # ./txtbin input.jpg output.png
 */

#include "string"
#include "fstream"
#include "/var/bin/opencv/include/opencv2/opencv.hpp"
//#include "/usr/include/opencv2/opencv.hpp"
#include "/usr/include/boost/tuple/tuple.hpp"

using namespace std;
using namespace cv;
using namespace boost;

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){
    /*
     *  blockSide: set greater for larger fonts in image and vice versa
     *  contrast: set smaller for lower contrast image
     */

    Mat I;
    Img.convertTo(I, CV_32FC1);
    Res = Mat::zeros(Img.rows / blockSide, Img.cols / blockSide, CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m, s;

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){
        for(int j = 0; j < Img.cols - blockSide; j += blockSide){
            patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1));
            meanStdDev(patch, m, s);

            if(s[0] > contrast){
                Res.at<float>(i / blockSide, j / blockSide) = m[0];
            }
            else{
                Res.at<float>(i / blockSide, j / blockSide) = 0;
            }
        }
    }

    resize(I, smallImg, Res.size());

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg, CV_8UC1, 255);

    inpaintmask.convertTo(inpaintmask, CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    resize(inpainted, Res, Img.size());
    Res.convertTo(Res, CV_32FC1, 1.0 / 255.0);
}

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){
    Mat large = imread(input);

    bool test_output = false;

    int
        top = large.rows,
        bottom = 0,
        left = large.cols,
        right = 0;

    int
        rect_bottom,
        rect_right;

    Mat rgb;
    // downsample and use it for processing
    pyrDown(large, rgb);
    pyrDown(rgb, rgb);
    Mat small;
    cvtColor(rgb, small, CV_BGR2GRAY);
    // morphological gradient
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
    // binarize
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    // connect horizontally oriented regions
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
    // find contours
    Mat mask = Mat::zeros(bw.size(), CV_8UC1);
    vector<vector<Point> > contours;
    vector<Vec4i> hierarchy;
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

    Scalar color = Scalar(0, 255, 0);
    Scalar color2 = Scalar(0, 0, 255);
    int thickness = 2;

    // filter contours
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
        Rect rect = boundingRect(contours[idx]);
        Mat maskROI(mask, rect);
        maskROI = Scalar(0, 0, 0);
        // fill the contour
        drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
        // ratio of non-zero pixels in the filled region
        double r = (double)countNonZero(maskROI) / (rect.width * rect.height);

        // assume at least 25% of the area is filled if it contains text
        if (r > 0.25 && 
        (rect.height > 8 && rect.width > 8) // constraints on region size
        // these two conditions alone are not very robust. better to use something 
        //like the number of significant peaks in a horizontal projection as a third condition
        ){
            if(draw_contours){
                rectangle(res, Rect(rect.x * 4, rect.y * 4, rect.width * 4, rect.height * 4), color, thickness);
            }

            if(test_output){
                rectangle(rgb, rect, color, thickness);
            }

            if(rect.y < top){
                top = rect.y;
            }
            rect_bottom = rect.y + rect.height;
            if(rect_bottom > bottom){
                bottom = rect_bottom;
            }
            if(rect.x < left){
                left = rect.x;
            }
            rect_right = rect.x + rect.width;
            if(rect_right > right){
                right = rect_right;
            }
        }
    }

    if(draw_contours){
        rectangle(res, Point(left * 4, top * 4), Point(right * 4, bottom * 4), color2, thickness);
    }

    if(test_output){
        rectangle(rgb, Point(left, top), Point(right, bottom), color2, thickness);
        imwrite(string("test_text_contours.jpg"), rgb);
    }

    return make_tuple(left * 4, top * 4, (right - left) * 4, (bottom - top) * 4);
}

int main(int argc, char* argv[]){
    string input;
    string output = "output.png";

    int
        width = 0,
        height = 0,
        blockside = 9;

    bool
        crop = false,
        draw = false;

    float margin = 0;

    cout << "OpenCV version: " << CV_VERSION << endl;

    //  Return error if arguments are missing
    if(argc < 3){
        cerr << "\nUsage: txtbin input [options] output\n\n"
            "Options:\n"
            "\t-w <number>          -- set max width (keeps aspect ratio)\n"
            "\t-h <number>          -- set max height (keeps aspect ratio)\n"
            "\t-c                   -- crop text content contour\n"
            "\t-m <number>          -- add margins (number in %)\n"
            "\t-b <number>          -- set blockside\n"
            "\t-d                   -- draw text content contours (debugging)\n" << endl;
        return 1;
    }

    //  Parse arguments
    for(int i = 1; i < argc; i++){
        if(i == 1){
            input = string(argv[i]);

            //  Return error if input file is invalid
            ifstream stream(input.c_str());
            if(!stream.good()){
                cerr << "Error: Input file is invalid!" << endl;
                return 1;
            }
        }
        else if(string(argv[i]) == "-w"){
            width = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-h"){
            height = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-c"){
            crop = true;
        }
        else if(string(argv[i]) == "-m"){
            margin = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-b"){
            blockside = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-d"){
            draw = true;
        }
        else if(i == argc - 1){
            output = string(argv[i]);
        }
    }

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE);
    Mat res;
    Img.convertTo(Img, CV_32FC1, 1.0 / 255.0);
    CalcBlockMeanVariance(Img, res, blockside);
    res = 1.0 - res;
    res = Img + res;
    threshold(res, res, 0.85, 1, THRESH_BINARY);

    int
        txt_x,
        txt_y,
        txt_width,
        txt_height;

    if(crop || draw){
        tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw);
    }

    if(crop){
        //res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone();
        res = res(Rect(txt_x, txt_y, txt_width, txt_height));
    }

    if(margin){
        int border = res.cols * margin / 100;
        copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255));
    }

    float
        width_input = res.cols,
        height_input = res.rows;

    bool resized = false;

    //  Downscale image
    if(width > 0 && width_input > width){
        float scale = width_input / width;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(height > 0 && height_input > height){
        float scale = height_input / height;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(resized){
        resize(res, res, Size(round(width_input), round(height_input)));
    }

    imwrite(output, res * 255);

    return 0;
}

图像 1 输入

图像 1 输出

图像 2 输入

图像 2 输出

更新

我将您的代码放在一个类中,但出现错误

这个类被命名为XYcut,在下面的代码中我得到一个编译错误

int n_labels = partition(filteredRects, labels, [max_distance2](const cv::Rect& lhs, const cv::Rect& rhs){
    if(XYcut::ed2(lhs.tl(), cv::Point(rhs.br().x, rhs.tl().y)) < max_distance2){
        return true;
    }
    if(XYcut::ed2(rhs.tl(), cv::Point(lhs.br().x, lhs.tl().y)) < max_distance2){
        return true;
    }
    return false;
});

错误

 error: ‘this’ was not captured for this lambda function
   if(XYcut::ed2(lhs.tl(), cv::Point(rhs.br().x, rhs.tl().y)) < max_distance2){

如何将ed2 方法引用到XYcut 类..

类和方法

这是方法

int XYcut::ed2(const cv::Point& lhs, const cv::Point& rhs){
    return (lhs.x - rhs.x)*(lhs.x - rhs.x) + (lhs.y - rhs.y)*(lhs.y - rhs.y);
}

【问题讨论】:

    标签: c++ opencv


    【解决方案1】:

    我只是想提出一种不同的方法。它基于XY-Cut 算法,并且由于您的文本是轴对齐的,因此效果很好。


    在您的输入图像上,计算 XY-Cut,并获取边界框:

    您看到您正确识别了一组字符,而不是整个单词。所以,首先我们移除非常小的矩形,它们只是噪声:

    然后我们将彼此非常接近的矩形分组。您可以为此使用 cv::partition 和适当的谓词:

    现在每个单词都有一个边界框。您最终可以获得每条线的边界框。在您的第二张图片上,您将获得:

    这是我使用的代码:

    #include <opencv2\opencv.hpp>
    #include <vector>
    using namespace std;
    using namespace cv;
    
    Mat3b dbg;
    
    vector<Rect> XYCut_projH(const Mat1b& src, Rect roi)
    {
        rectangle(dbg, roi, Scalar(255, 0, 0));
    
        Mat1b projH;
        reduce(src(roi), projH, 1, CV_REDUCE_MAX);
    
        vector<Rect> rects;
    
        bool bOut = true;
        vector<int> coords;
    
        for (int i = 0; i < projH.rows; ++i)
        {
            if (bOut && projH(i) > 0)
            {
                coords.push_back(i);
                bOut = false;
            }
            else if (!bOut && projH(i) == 0)
            {
                coords.push_back(i);
                bOut = true;
            }
        }
    
        if (!bOut)
        {
            coords.push_back(projH.rows);
        }
    
    
        for (int i = 0; i < coords.size() - 1; i += 2)
        {
            Rect r(0, coords[i], src.cols, coords[i + 1] - coords[i]);
            r = (r + roi.tl()) & roi;
            rects.push_back(r);
    
            rectangle(dbg, r, Scalar(0, 255, 0));
        }
    
        if ((rects.size() == 1) && (rects[0] == roi))
        {
            return vector<Rect>();
        }
    
        return rects;
    }
    
    vector<Rect> XYCut_projV(const Mat1b& src, Rect roi)
    {
        rectangle(dbg, roi, Scalar(255, 0, 0));
    
        Mat1b projV;
        reduce(src(roi), projV, 0, CV_REDUCE_MAX);
    
        vector<Rect> rects;
    
        bool bOut = true;
        vector<int> coords;
    
        for (int i = 0; i < projV.cols; ++i)
        {
            if (bOut && projV(i) > 0)
            {
                coords.push_back(i);
                bOut = false;
            }
            else if (!bOut && projV(i) == 0)
            {
                coords.push_back(i);
                bOut = true;
            }
        }
    
        if (!bOut)
        {
            coords.push_back(projV.cols);
        }
    
        for (int i = 0; i < coords.size() - 1; i += 2)
        {
            Rect r(coords[i], 0, coords[i + 1] - coords[i], src.rows);
            r = (r + roi.tl()) & roi;
            rects.push_back(r);
    
            rectangle(dbg, r, Scalar(0, 255, 0));
        }
    
        if ((rects.size() == 1) && (rects[0] == roi))
        {
            return vector<Rect>();
        }
    
        return rects;
    }
    
    void XYCut_step(const Mat1b& src, Rect roi, vector<Rect>& rects, bool bAlternate)
    {
        vector<Rect> step;
        if (bAlternate)
        {
            step = XYCut_projH(src, roi);
    
            if (step.empty())
            {
                rects.push_back(roi);
                return;
            }
        }
        else
        {
            step = XYCut_projV(src, roi);
    
            if (step.empty())
            {
                rects.push_back(roi);
                return;
            }
        }
    
        for (int i = 0; i < step.size(); ++i)
        {
            XYCut_step(src, step[i], rects, !bAlternate);
        }
    }
    
    void XYCut(const Mat1b& src, vector<Rect>& rects)
    {
        bool bAlternate = true;
        Rect roi(0, 0, src.cols, src.rows);
    
        XYCut_step(src, roi, rects, bAlternate);
    }
    
    int ed2(const Point& lhs, const Point& rhs)
    {
        return (lhs.x - rhs.x)*(lhs.x - rhs.x) + (lhs.y - rhs.y)*(lhs.y - rhs.y);
    }
    
    int main()
    {
        // Load image
        Mat1b img = imread("path_to_image", IMREAD_GRAYSCALE);
        cvtColor(img, dbg, COLOR_GRAY2BGR);
    
        // invert image, if needed
        img = ~img;
    
        // Apply XY Cut
        vector<Rect> rects;
        XYCut(img, rects);
    
        // Show XY results
        Mat3b xyres;
        cvtColor(img, xyres, COLOR_GRAY2BGR);
        for (int i = 0; i < rects.size(); ++i)
        {
            rectangle(xyres, rects[i], Scalar(0, 0, 255), 2);
        }
    
        //imshow("XY-Cut Result", xyres);
        //waitKey(1);
    
        // Remove small bounding boxes (noise)
        int min_area = 10;
        vector<Rect> filteredRects;
        for (const auto& r : rects)
        {
            if (r.area() > min_area)
            {
                filteredRects.push_back(r);
            }
        }
    
        // Show Filtered results
        Mat3b filtres;
        cvtColor(img, filtres, COLOR_GRAY2BGR);
        for (int i = 0; i < filteredRects.size(); ++i)
        {
            rectangle(filtres, filteredRects[i], Scalar(255, 0, 0), 2);
        }
    
        //imshow("Filtered Result", filtres);
        //waitKey(1);
    
        // Group near rectangles
        int max_distance = 10;
    
        vector<int> labels;
        int max_distance2 = max_distance*max_distance;
        int n_labels = partition(filteredRects, labels, [max_distance2](const Rect& lhs, const Rect& rhs)
        {
            if (ed2(lhs.tl(), Point(rhs.br().x, rhs.tl().y)) < max_distance2) { return true; }
            if (ed2(rhs.tl(), Point(lhs.br().x, lhs.tl().y)) < max_distance2) { return true; }
            return false;
        });
    
        // Make a bounding box for rects grouped together
        vector<vector<Point>> pts(n_labels);
        for (int i = 0; i < filteredRects.size(); ++i)
        {
            pts[labels[i]].push_back(filteredRects[i].tl());
            pts[labels[i]].push_back(filteredRects[i].br());
        }
    
        // Show Grouped results
        vector<Rect> groupedRects(n_labels);
        for (int i = 0; i < pts.size(); ++i)
        {
            groupedRects[i] = boundingRect(pts[i]);
        }
    
    
        // Show Grouped results
        Mat3b groupres;
        cvtColor(img, groupres, COLOR_GRAY2BGR);
        for (int i = 0; i < groupedRects.size(); ++i)
        {
            rectangle(groupres, groupedRects[i], Scalar(0, 255, 0), 2);
        }
    
    
        //imshow("Grouped Result", groupres);
        //waitKey(1);
    
    
    
    
        return 0;
    }
    

    【讨论】:

    • 当我尝试编译你的代码时,我得到这个`# g++ org.cpp -o ./test/test pkg-config opencv --cflags --libs org.cpp: In function 'int main()': org.cpp :175:22: 错误:ISO C++ 禁止声明 'r' 没有类型 [-fpermissive] for (const auto& r : rects) ^ org.cpp:175:26: 错误:基于范围的 'for' 循环不是在 C++98 模式下允许 (const auto& r : rects) ^`
    • 需要开启C++11特性:"-std=c++11"
    • 有没有一种快速的方法可以将所有矩形合并为一个矩形?
    • 所有矩形 ...哪些? 快速的方式 ...你分析过它吗?这里的解决方案应该足够快。但是,您可以将矩形 OR 在一起,例如:rect = rect1 | rect2 生成包含 rect1 和 rect2 的最小面积矩形
    • 我只需要一个矩形来限制所有过滤的矩形。我需要这个来剪切图像,所以只剩下带有文本轮廓的区域。在我的问题底部做了一个简短的更新我希望你能为我解决这个问题。我是 C++ 新手 :)
    猜你喜欢
    • 2018-07-13
    • 2014-04-04
    • 2018-09-30
    • 1970-01-01
    • 1970-01-01
    • 2020-10-25
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多