如果您没有遵循我的代码中的任何内容,请告诉我。这个概念最大的缺陷是
1:(如果您在主框线中有嘈杂的中断,会将其分成单独的 blob)
2:如果这是可以手写文本的东西,但字母与框的边缘重叠可能会很糟糕。
3:它绝对不进行方向检查,(您实际上可能想要改进它,因为我认为它不会太糟糕并且会给您更准确的句柄)。我的意思是这取决于你的盒子与 xy 轴大致对齐,如果它们足够倾斜,它会给你所有盒子角的总偏移(尽管它仍然应该找到它们)
我稍微调整了阈值设置点以使所有文本与边缘分开,如果有必要,您可以在开始打破主线之前将其拉得更低。此外,如果您担心换行符,您可以将足够大的 blob 添加到最终图像中。
基本上,第一步是调整阈值,使其处于最稳定(可能仍然保持连接框的最低值)的截止值,用于将文本和噪声与框分离。
第二次找到最大的正 blob(应该是 boxgrid)。如果你的盒子没有保持在一起,你可能想要拿几个最高的斑点......虽然这会变得很粘,所以试着获得阈值,这样你就可以把它当作一个单一的斑点。
最后一步是获取矩形,为此,我只寻找负斑点(忽略第一个外部区域)。
这里是代码(很抱歉它是用 C++ 编写的,但希望你理解这个概念并且无论如何都会自己编写它):
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <stdio.h>
#include <opencv2/opencv.hpp>
using namespace cv;
//Attempts to find the largest connected group of points (assumed to be the interconnected boundaries of the textbox grid)
Mat biggestComponent(Mat targetImage, int connectivity=8)
{
Mat inputImage;
inputImage = targetImage.clone();
Mat finalImage;// = inputImage;
int greatestBlobSize=0;
std::cout<<"Top"<<std::endl;
std::cout<<inputImage.rows<<std::endl;
std::cout<<inputImage.cols<<std::endl;
for(int i=0;i<inputImage.cols;i++)
{
for(int ii=0;ii<inputImage.rows;ii++)
{
if(inputImage.at<uchar>(ii,i)!=0)
{
Mat lastImage;
lastImage = inputImage.clone();
Rect* boundbox;
int blobSize = floodFill(inputImage, cv::Point(i,ii), Scalar(0),boundbox,Scalar(200),Scalar(255),connectivity);
if(greatestBlobSize<blobSize)
{
greatestBlobSize=blobSize;
std::cout<<blobSize<<std::endl;
Mat tempDif = lastImage-inputImage;
finalImage = tempDif.clone();
}
//std::cout<<"Loop"<<std::endl;
}
}
}
return finalImage;
}
//Takes an image that only has outlines of boxes and gets handles for each textbox.
//Returns a vector of points which represent the top left corners of the text boxes.
std::vector<Rect> boxCorners(Mat processedImage, int connectivity=4)
{
std::vector<Rect> boxHandles;
Mat inputImage;
bool outerRegionFlag=true;
inputImage = processedImage.clone();
std::cout<<inputImage.rows<<std::endl;
std::cout<<inputImage.cols<<std::endl;
for(int i=0;i<inputImage.cols;i++)
{
for(int ii=0;ii<inputImage.rows;ii++)
{
if(inputImage.at<uchar>(ii,i)==0)
{
Mat lastImage;
lastImage = inputImage.clone();
Rect boundBox;
if(outerRegionFlag) //This is to floodfill the outer zone of the page
{
outerRegionFlag=false;
floodFill(inputImage, cv::Point(i,ii), Scalar(255),&boundBox,Scalar(0),Scalar(50),connectivity);
}
else
{
floodFill(inputImage, cv::Point(i,ii), Scalar(255),&boundBox,Scalar(0),Scalar(50),connectivity);
boxHandles.push_back(boundBox);
}
}
}
}
return boxHandles;
}
Mat drawTestBoxes(Mat originalImage, std::vector<Rect> boxes)
{
Mat outImage;
outImage = originalImage.clone();
outImage = outImage*0; //really I am just being lazy, this should just be initialized with dimensions
for(int i=0;i<boxes.size();i++)
{
rectangle(outImage,boxes[i],Scalar(255));
}
return outImage;
}
int main() {
Mat image;
Mat thresholded;
Mat processed;
image = imread( "Images/W2.png", 1 );
Mat channel[3];
split(image, channel);
threshold(channel[0],thresholded,150,255,1);
std::cout<<"Coputing biggest object"<<std::endl;
processed = biggestComponent(thresholded);
std::vector<Rect> textBoxes = boxCorners(processed);
Mat finalBoxes = drawTestBoxes(image,textBoxes);
namedWindow("Original", WINDOW_AUTOSIZE );
imshow("Original", channel[0]);
namedWindow("Thresholded", WINDOW_AUTOSIZE );
imshow("Thresholded", thresholded);
namedWindow("Processed", WINDOW_AUTOSIZE );
imshow("Processed", processed);
namedWindow("Boxes", WINDOW_AUTOSIZE );
imshow("Boxes", finalBoxes);
std::cout<<"waiting for user input"<<std::endl;
waitKey(0);
return 0;
}