tesserocr 未检测到任何文本,因为文本高度较小或文本大小较小。通过裁剪文本区域并使用该图像,pytesseract 可以提取文本。
由于文本尺寸较小,使用轮廓和膨胀来检测文本区域也不起作用。为了检测文本区域,我使用EAST 模型使用this 解决方案提取所有区域并将所有区域组合在一起。将提取的组合区域图像传递给 tesseract 返回文本。要运行此脚本,您需要下载可在here 找到的模型并安装所需的依赖项。
Python 脚本:
import numpy as np
import cv2
from imutils.object_detection import non_max_suppression
import matplotlib.pyplot as plt
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # I am using windows
image1 = cv2.imread("r9Do4.png",cv2.IMREAD_COLOR)
ima_org = image1.copy()
(height1, width1) = image1.shape[:2]
size = 640 #size must be multiple of 32. Haven't tested with smaller size which can increase speed but might decrease accuracy.
(height2, width2) = (size, size)
image2 = cv2.resize(image1, (width2, height2))
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
blob = cv2.dnn.blobFromImage(image2, 1.0, (width2, height2), (123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])
(rows, cols) = scores.shape[2:4] # grab the rows and columns from score volume
rects = [] # stores the bounding box coordiantes for text regions
confidences = [] # stores the probability associated with each bounding box region in rects
for y in range(rows):
scoresdata = scores[0, 0, y]
xdata0 = geometry[0, 0, y]
xdata1 = geometry[0, 1, y]
xdata2 = geometry[0, 2, y]
xdata3 = geometry[0, 3, y]
angles = geometry[0, 4, y]
for x in range(cols):
if scoresdata[x] < 0.5: # if score is less than min_confidence, ignore
continue
# print(scoresdata[x])
offsetx = x * 4.0
offsety = y * 4.0
# EAST detector automatically reduces volume size as it passes through the network
# extracting the rotation angle for the prediction and computing their sine and cos
angle = angles[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xdata0[x] + xdata2[x]
w = xdata1[x] + xdata3[x]
# print(offsetx,offsety,xdata1[x],xdata2[x],cos)
endx = int(offsetx + (cos * xdata1[x]) + (sin * xdata2[x]))
endy = int(offsety + (sin * xdata1[x]) + (cos * xdata2[x]))
startx = int(endx - w)
starty = int(endy - h)
# appending the confidence score and probabilities to list
rects.append((startx, starty, endx, endy))
confidences.append(scoresdata[x])
# applying non-maxima suppression to supppress weak and overlapping bounding boxes
boxes = non_max_suppression(np.array(rects), probs=confidences)
iti=[]
rW = width1 / float(width2)
rH = height1 / float(height2)
bb = []
for (startx, starty, endx, endy) in boxes:
startx = int(startx * rW)
starty = int(starty * rH)
endx = int(endx * rW)
endy = int(endy * rH)
cv2.rectangle(image1, (startx, starty), (endx, endy), (255, 0,0), 2)
bb.append([startx, starty, endx, endy])
#combining the bounding boxes to get the text region
csx = 0
cex = 0
csy = 0
cey = 0
for i,box in enumerate(bb[::-1]):
if i==0:
csx = box[0]
else:
cex = box[2]
cey = box[3]+5
esx = box[1]
#print(image1)
cv2.imshow("BB img",image1)
cv2.waitKey(0)
rects.append((startx, starty, endx, endy))
confidences.append(scoresdata[x])
it=ima_org[csy:cey, csx:cex]
cv2.imshow("Cropped Text Region",it)
cv2.waitKey(0)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
thr = cv2.threshold(src=cv2.cvtColor(it,cv2.COLOR_RGB2GRAY), thresh=0, maxval=255, type=cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
txt = pytesseract.image_to_string(thr,lang='eng',config='--psm 11')
print(txt.strip())
这是脚本的输出
文本区域的边界框
裁剪的文本区域
提取的文本
\
正如帖子中提到的,您的所有图像都相似,您可以通过重新调整此脚本的用途从中提取文本。脚本相当快,这张图片需要 2.1 秒。