使用Opencv检测图像中的文本区域

Mee*_*yal 22 opencv python-2.7 python-tesseract

我有一个图像,想要检测其中的文本区域.

我试过TiRG_RAW_20110219项目,但结果并不理想.如果输入图像是http://imgur.com/yCxOvQS,GD38rCa,则它将生成http://imgur.com/yCxOvQS,GD38rCa#1作为输出.

谁能提出一些替代方案.我想通过仅将文本区域作为输入发送来改善tesseract的输出.

Ami*_*aha 55

import cv2


def captch_ex(file_name):
    img = cv2.imread(file_name)

    img_final = cv2.imread(file_name)
    img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)
    image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)
    ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY)  # for black text , cv.THRESH_BINARY_INV
    '''
            line  8 to 12  : Remove noisy portion 
    '''
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,
                                                         3))  # to manipulate the orientation of dilution , large x means horizonatally dilating  more, large y means vertically dilating more
    dilated = cv2.dilate(new_img, kernel, iterations=9)  # dilate , more the iteration more the dilation

    # for cv2.x.x

    _, contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # findContours returns 3 variables for getting contours

    # for cv3.x.x comment above line and uncomment line below

    #image, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)


    for contour in contours:
        # get rectangle bounding contour
        [x, y, w, h] = cv2.boundingRect(contour)

        # Don't plot small false positives that aren't text
        if w < 35 and h < 35:
            continue

        # draw rectangle around contour on original image
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2)

        '''
        #you can crop image and send to OCR  , false detected will return no text :)
        cropped = img_final[y :y +  h , x : x + w]

        s = file_name + '/crop_' + str(index) + '.jpg' 
        cv2.imwrite(s , cropped)
        index = index + 1

        '''
    # write original image with added contours to disk
    cv2.imshow('captcha_result', img)
    cv2.waitKey()


file_name = 'your_image.jpg'
captch_ex(file_name)
Run Code Online (Sandbox Code Playgroud)

点击查看结果

点击查看结果

  • @AmitKushwaha +1很棒的答案!我正在使用OpenCV 3.1.0,而cv2.findContours()返回三个值:image,contours,hierarchy.你的例子唯一需要的是在`contours`前面添加一个变量 (4认同)
  • cv2.findContours() 函数不再返回图像。所以,该语句必须改为 `contours, Hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)`,更多详细信息请参考此:[想要查找轮廓 -&gt; ValueError: 没有足够的值来解包 (预期 3,得到 2),出现这个](/sf/ask/3791524131/) (2认同)

nat*_*ncy 11

由于没有人发布完整的解决方案,这里有一种方法。通过观察到所需的文本是白色的并且单词是水平对齐的,我们可以使用颜色分割来提取和 OCR 字母。

  1. 执行颜色分割。我们加载图像,转换为 HSV 格式,定义下/上范围并使用cv2.inRange获取二值掩码执行颜色分割

  2. 扩大以连接文本字符。我们创建一个水平形状的内核,cv2.getStructuringElement然后使用dilatecv2.dilate将单个字母组合成单个轮廓

  3. 去除非文本轮廓。我们找到轮廓cv2.findContours并使用纵横比过滤以去除非文本字符。由于文本处于水平方向,如果确定轮廓小于预定义的纵横比阈值,那么我们通过填充轮廓来删除非文本轮廓cv2.drawContours

  4. 执行 OCR。我们按位和带有初始掩码的膨胀图像仅隔离文本字符并反转图像,使文本为黑色,背景为白色。最后,我们将图像扔进 Pytesseract OCR


这是每个步骤的可视化:

输入图像

从颜色分割生成的掩码

# Load image, convert to HSV format, define lower/upper ranges, and perform
# color segmentation to create a binary mask
image = cv2.imread('1.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 218])
upper = np.array([157, 54, 255])
mask = cv2.inRange(hsv, lower, upper)
Run Code Online (Sandbox Code Playgroud)

使用纵横比过滤连接文本轮廓和去除非文本轮廓的扩张图像

# Create horizontal kernel and dilate to connect text characters
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
dilate = cv2.dilate(mask, kernel, iterations=5)

# Find contours and filter using aspect ratio
# Remove non-text contours by filling in the contour
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    ar = w / float(h)
    if ar < 5:
        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
Run Code Online (Sandbox Code Playgroud)

按位和两个掩码和反转为 OCR 准备好结果

# Bitwise dilated image with mask, invert, then OCR
result = 255 - cv2.bitwise_and(dilate, mask)
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
Run Code Online (Sandbox Code Playgroud)

Pytesseract OCR 使用--psm 6配置设置假设统一的文本块的结果。看看这里为更多配置选项

All women become
like their mothers.
That is their tragedy.
No man does.

That's his.

OSCAR WILDE
Run Code Online (Sandbox Code Playgroud)

完整代码

import cv2
import numpy as np
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, convert to HSV format, define lower/upper ranges, and perform
# color segmentation to create a binary mask
image = cv2.imread('1.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 218])
upper = np.array([157, 54, 255])
mask = cv2.inRange(hsv, lower, upper)

# Create horizontal kernel and dilate to connect text characters
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
dilate = cv2.dilate(mask, kernel, iterations=5)

# Find contours and filter using aspect ratio
# Remove non-text contours by filling in the contour
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    ar = w / float(h)
    if ar < 5:
        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)

# Bitwise dilated image with mask, invert, then OCR
result = 255 - cv2.bitwise_and(dilate, mask)
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)

cv2.imshow('mask', mask)
cv2.imshow('dilate', dilate)
cv2.imshow('result', result)
cv2.waitKey()
Run Code Online (Sandbox Code Playgroud)

HSV 下/上颜色范围是使用此 HSV 颜色阈值脚本确定的

import cv2
import numpy as np

def nothing(x):
    pass

# Load image
image = cv2.imread('1.jpg')

# Create a window
cv2.namedWindow('image')

# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)

# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)

# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0

while(1):
    # Get current positions of all trackbars
    hMin = cv2.getTrackbarPos('HMin', 'image')
    sMin = cv2.getTrackbarPos('SMin', 'image')
    vMin = cv2.getTrackbarPos('VMin', 'image')
    hMax = cv2.getTrackbarPos('HMax', 'image')
    sMax = cv2.getTrackbarPos('SMax', 'image')
    vMax = cv2.getTrackbarPos('VMax', 'image')

    # Set minimum and maximum HSV values to display
    lower = np.array([hMin, sMin, vMin])
    upper = np.array([hMax, sMax, vMax])

    # Convert to HSV format and color threshold
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower, upper)
    result = cv2.bitwise_and(image, image, mask=mask)

    # Print if there is a change in HSV value
    if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
        print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
        phMin = hMin
        psMin = sMin
        pvMin = vMin
        phMax = hMax
        psMax = sMax
        pvMax = vMax

    # Display result image
    cv2.imshow('image', result)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()
Run Code Online (Sandbox Code Playgroud)


Mik*_*ord 6

如果您不介意动手,您可以尝试将这些文本区域增长为一个更大的矩形区域,然后将其一次性全部输入到超正方体中。

我还建议尝试对图像进行多次阈值处理,并将每个图像分别输入超正方体,看看是否有帮助。您可以将输出与字典单词进行比较,以自动确定特定 OCR 结果是否良好。