提取文本OpenCV

Question

提取文本OpenCV

Cli*_*lip 141 c++ text opencv image-processing bounding-box

我试图在图像中找到文本的边界框,目前正在使用这种方法:

// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);

// threshold the high variance regions
Mat varMatRegions = varMat > 100;

Run Code Online (Sandbox Code Playgroud)

给出这样的图像时:

在此输入图像描述

然后,当我显示varMatRegions我得到这个图像:

在此输入图像描述

正如你所看到的那样,它将左侧的文本块与卡片的标题结合起来,对于大多数卡片而言,这种方法效果很好,但在较繁忙的卡片上它可能会导致问题.

这些轮廓连接不好的原因是它使得轮廓的边界框几乎占据了整个卡片.

任何人都可以建议一种不同的方式来查找文本以确保正确检测文本吗？

200分,谁能在这两张卡上方找到文字.

在此输入图像描述

Answer 1

dha*_*hka 124

我在下面的程序中使用了基于渐变的方法.添加了生成的图像.请注意,我正在使用图像的缩小版本进行处理.

c ++版本

The MIT License (MIT)

Copyright (c) 2014 Dhanushka Dangampola

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

#include "stdafx.h"

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>

using namespace cv;
using namespace std;

#define INPUT_FILE              "1.jpg"
#define OUTPUT_FOLDER_PATH      string("")

int _tmain(int argc, _TCHAR* argv[])
{
    Mat large = imread(INPUT_FILE);
    Mat rgb;
    // downsample and use it for processing
    pyrDown(large, rgb);
    Mat small;
    cvtColor(rgb, small, CV_BGR2GRAY);
    // morphological gradient
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
    // binarize
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    // connect horizontally oriented regions
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
    // find contours
    Mat mask = Mat::zeros(bw.size(), CV_8UC1);
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    {
        Rect rect = boundingRect(contours[idx]);
        Mat maskROI(mask, rect);
        maskROI = Scalar(0, 0, 0);
        // fill the contour
        drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
        // ratio of non-zero pixels in the filled region
        double r = (double)countNonZero(maskROI)/(rect.width*rect.height);

        if (r > .45 /* assume at least 45% of the area is filled if it contains text */
            && 
            (rect.height > 8 && rect.width > 8) /* constraints on region size */
            /* these two conditions alone are not very robust. better to use something 
            like the number of significant peaks in a horizontal projection as a third condition */
            )
        {
            rectangle(rgb, rect, Scalar(0, 255, 0), 2);
        }
    }
    imwrite(OUTPUT_FOLDER_PATH + string("rgb.jpg"), rgb);

    return 0;
}

Run Code Online (Sandbox Code Playgroud)

python版本

The MIT License (MIT)

Copyright (c) 2017 Dhanushka Dangampola

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

import cv2
import numpy as np

large = cv2.imread('1.jpg')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)

kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)

_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

mask = np.zeros(bw.shape, dtype=np.uint8)

for idx in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[idx])
    mask[y:y+h, x:x+w] = 0
    cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
    r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)

    if r > 0.45 and w > 8 and h > 8:
        cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)

cv2.imshow('rects', rgb)

Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

+1,到目前为止最准确的结果,干得好!:P (3认同)
我只是看看他的方法.我看到的主要区别是他正在使用Sobel滤镜,而我正在使用形态渐变滤镜.我认为形态滤波器和下采样使很多不那么强的边缘变平.索贝尔可能会发出更多噪音. (3认同)
@ascenator 当您将 OTSU 与阈值类型结合使用时，它使用 Otsu 的阈值而不是指定的阈值。请参阅[此处](http://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold)。 (2认同)
@DforTye获取填充轮廓的水平投影(cv :: reduce),然后对其进行阈值处理(例如,使用平均值或中值高度).如果您可视化此结果,它看起来就像一个条形码.我想,当时,我正在考虑计算条数,并对其施加一个阈值.现在我想,如果该区域足够干净,如果我们可以将它提供给OCR并获得每个检测到的角色的置信水平以确保该区域包含文本,它也可能有所帮助. (2认同)

Answer 2

Lov*_*ill 122

您可以通过查找近边元素(灵感来自LPD)来检测文本:

#include "opencv2/opencv.hpp"

std::vector<cv::Rect> detectLetters(cv::Mat img)
{
    std::vector<cv::Rect> boundRect;
    cv::Mat img_gray, img_sobel, img_threshold, element;
    cvtColor(img, img_gray, CV_BGR2GRAY);
    cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
    cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
    element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
    cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
    std::vector< std::vector< cv::Point> > contours;
    cv::findContours(img_threshold, contours, 0, 1); 
    std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
    for( int i = 0; i < contours.size(); i++ )
        if (contours[i].size()>100)
        { 
            cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
            cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
            if (appRect.width>appRect.height) 
                boundRect.push_back(appRect);
        }
    return boundRect;
}

Run Code Online (Sandbox Code Playgroud)

用法:

int main(int argc,char** argv)
{
    //Read
    cv::Mat img1=cv::imread("side_1.jpg");
    cv::Mat img2=cv::imread("side_2.jpg");
    //Detect
    std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
    std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
    //Display
    for(int i=0; i< letterBBoxes1.size(); i++)
        cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
    cv::imwrite( "imgOut1.jpg", img1);  
    for(int i=0; i< letterBBoxes2.size(); i++)
        cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
    cv::imwrite( "imgOut2.jpg", img2);  
    return 0;
}

Run Code Online (Sandbox Code Playgroud)

结果:

一个.element = getStructuringElement(cv :: MORPH_RECT,cv :: Size(17,3)); imgOut1 imgOut2

湾 element = getStructuringElement(cv :: MORPH_RECT,cv :: Size(30,30)); imgOut1 imgOut2

对于所提到的其他图像,结果类似.

车牌检测器. (6认同)
+1,好结果.什么是"LPD"？ (5认同)
说`cv :: Rect a;`.由n扩大:`ax- = n/2; ay- = n/2; a.width + = n; a.height + = n;`. (4认同)
[图书](http://www.amazon.com/Mastering-OpenCV-Practical-Computer-Projects/dp/1849517827).[代码](https://github.com/MasteringOpenCV/code/tree/master/Chapter5_NumberPlateRecognition). (3认同)
对于某些卡片,边界框不会包含所有文本,例如半个字母被截断.比如这张卡:http://i.imgur.com/tX3XrwH.jpg如何用"n"扩展每个边界框的高度和宽度？谢谢你的解决方案,它很棒! (2认同)
嗨，我如何使用 python cv2 实现相同的结果？ (2认同)

Answer 3

ana*_*ana 46

这是我用来检测文本块的另一种方法:

将图像转换为灰度
应用阈值(简单二进制阈值,精选值为150作为阈值)
应用扩张加厚图像中的线条,从而产生更紧凑的物体和更少的空白碎片.使用较高的迭代次数值,因此扩张非常繁重(13次迭代,也可以精心挑选以获得最佳结果).
使用opencv findContours函数识别结果图像中对象的轮廓.
绘制了一个包围每个轮廓对象的边界框(矩形) - 每个框架构成一个文本块.
可选地,丢弃的区域不太可能是您要搜索的对象(例如文本块),因为上面的算法也可以找到相交或嵌套的对象(如第一张卡片的整个顶部区域),其中一些可能是对你的目的无趣.

下面是使用pyopencv在python中编写的代码,它应该很容易移植到C++.

import cv2

image = cv2.imread("card.png")
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) # grayscale
_,thresh = cv2.threshold(gray,150,255,cv2.THRESH_BINARY_INV) # threshold
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
dilated = cv2.dilate(thresh,kernel,iterations = 13) # dilate
_, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours

# for each contour found, draw a rectangle around it on original image
for contour in contours:
    # get rectangle bounding contour
    [x,y,w,h] = cv2.boundingRect(contour)

    # discard areas that are too large
    if h>300 and w>300:
        continue

    # discard areas that are too small
    if h<40 or w<40:
        continue

    # draw rectangle around contour on original image
    cv2.rectangle(image,(x,y),(x+w,y+h),(255,0,255),2)

# write original image with added contours to disk  
cv2.imwrite("contoured.jpg", image)

Run Code Online (Sandbox Code Playgroud)

原始图片是您帖子中的第一张图片.

在预处理(灰度,阈值和扩展 - 所以在第3步之后)后,图像看起来像这样:

扩张的图像

下面是结果图像(最后一行中的"contoured.jpg"); 图像中对象的最终边界框如下所示:

在此输入图像描述

您可以看到左侧的文本块被检测为一个单独的块,与周围环境分隔.

使用具有相同参数的相同脚本(除了为第二个图像更改的阈值类型,如下所述),以下是其他2张卡的结果:

在此输入图像描述

调整参数

参数(阈值,扩张参数)针对该图像和该任务(查找文本块)进行了优化,并且如果需要,可以针对其他卡片图像或要找到的其他类型的对象进行调整.

对于阈值处理(步骤2),我使用黑色阈值.对于文本比背景浅的图像(例如帖子中的第二个图像),应使用白色阈值,因此请使用以下方法替换剪贴类型cv2.THRESH_BINARY.对于第二个图像,我还使用略高的阈值值(180).改变阈值的参数和扩张的迭代次数将导致在限定图像中的对象时的不同灵敏度.

查找其他对象类型:

例如,在第一张图像中将膨胀减少到5次迭代,可以让我们对图像中的对象进行更精细的定界,粗略地找到图像中的所有单词(而不是文本块):

在此输入图像描述

知道一个单词的粗略大小,在这里我丢弃了太小(宽度或高度低于20像素)或太大(超过100像素宽度或高度)的区域,以忽略不太可能是单词的对象,以获得结果上面的图片.

你太棒了！我会在早上试试这个。 (2认同)

Answer 4

rtk*_*eta 26

@dhanushka的方法显示了最大的希望,但我想在Python中玩游戏,所以继续并将它翻译为有趣的:

import cv2
import numpy as np
from cv2 import boundingRect, countNonZero, cvtColor, drawContours, findContours, getStructuringElement, imread, morphologyEx, pyrDown, rectangle, threshold

large = imread(image_path)
# downsample and use it for processing
rgb = pyrDown(large)
# apply grayscale
small = cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# morphological gradient
morph_kernel = getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = morphologyEx(small, cv2.MORPH_GRADIENT, morph_kernel)
# binarize
_, bw = threshold(src=grad, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
morph_kernel = getStructuringElement(cv2.MORPH_RECT, (9, 1))
# connect horizontally oriented regions
connected = morphologyEx(bw, cv2.MORPH_CLOSE, morph_kernel)
mask = np.zeros(bw.shape, np.uint8)
# find contours
im2, contours, hierarchy = findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# filter contours
for idx in range(0, len(hierarchy[0])):
    rect = x, y, rect_width, rect_height = boundingRect(contours[idx])
    # fill the contour
    mask = drawContours(mask, contours, idx, (255, 255, 2555), cv2.FILLED)
    # ratio of non-zero pixels in the filled region
    r = float(countNonZero(mask)) / (rect_width * rect_height)
    if r > 0.45 and rect_height > 8 and rect_width > 8:
        rgb = rectangle(rgb, (x, y+rect_height), (x+rect_width, y), (0,255,0),3)

Run Code Online (Sandbox Code Playgroud)

现在显示图像:

from PIL import Image
Image.fromarray(rgb).show()

Run Code Online (Sandbox Code Playgroud)

不是最恐怖的剧本,但我试图尽可能地与原始的C++代码相似,以供读者遵循.

它的工作原理几乎和原版一样好.我很乐意阅读建议如何改进/修复它以完全类似于原始结果.

感谢您提供python版本.很多人会觉得这很有用.+1 (2认同)

Answer 5

her*_*tao 15

您可以尝试由Chucai Yi和Yingli Tian开发的这种方法.

他们还共享一个软件(基于Opencv-1.0,它应该在Windows平台下运行.),你可以使用(虽然没有可用的源代码).它将生成图像中的所有文本边界框(以彩色阴影显示).通过应用于您的样本图像,您将获得以下结果:

注意:为了使结果更加健壮,您可以进一步将相邻的框合并在一起.

更新:如果您的最终目标是识别图像中的文本,则可以进一步查看gttext,这是一个OCR免费软件和带有文本的彩色图像的Ground Truthing工具.源代码也可用.

有了这个,您可以获得如下认可的文本:

Answer 6

Far*_*yev 5

以上代码JAVA版本:谢谢@William

public static List<Rect> detectLetters(Mat img){    
    List<Rect> boundRect=new ArrayList<>();

    Mat img_gray =new Mat(), img_sobel=new Mat(), img_threshold=new Mat(), element=new Mat();
    Imgproc.cvtColor(img, img_gray, Imgproc.COLOR_RGB2GRAY);
    Imgproc.Sobel(img_gray, img_sobel, CvType.CV_8U, 1, 0, 3, 1, 0, Core.BORDER_DEFAULT);
    //at src, Mat dst, double thresh, double maxval, int type
    Imgproc.threshold(img_sobel, img_threshold, 0, 255, 8);
    element=Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(15,5));
    Imgproc.morphologyEx(img_threshold, img_threshold, Imgproc.MORPH_CLOSE, element);
    List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
    Mat hierarchy = new Mat();
    Imgproc.findContours(img_threshold, contours,hierarchy, 0, 1);

    List<MatOfPoint> contours_poly = new ArrayList<MatOfPoint>(contours.size());

     for( int i = 0; i < contours.size(); i++ ){             

         MatOfPoint2f  mMOP2f1=new MatOfPoint2f();
         MatOfPoint2f  mMOP2f2=new MatOfPoint2f();

         contours.get(i).convertTo(mMOP2f1, CvType.CV_32FC2);
         Imgproc.approxPolyDP(mMOP2f1, mMOP2f2, 2, true); 
         mMOP2f2.convertTo(contours.get(i), CvType.CV_32S);


            Rect appRect = Imgproc.boundingRect(contours.get(i));
            if (appRect.width>appRect.height) {
                boundRect.add(appRect);
            }
     }

    return boundRect;
}

Run Code Online (Sandbox Code Playgroud)

并在实践中使用此代码:

        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
        Mat img1=Imgcodecs.imread("abc.png");
        List<Rect> letterBBoxes1=Utils.detectLetters(img1);

        for(int i=0; i< letterBBoxes1.size(); i++)
            Imgproc.rectangle(img1,letterBBoxes1.get(i).br(), letterBBoxes1.get(i).tl(),new Scalar(0,255,0),3,8,0);         
        Imgcodecs.imwrite("abc1.png", img1);

Run Code Online (Sandbox Code Playgroud)

归档时间：	11 年，4 月前
查看次数：	102551 次
最近记录：	5 年，9 月前