Him*_*dar 10 python opencv image image-processing computer-vision
我有这些图片
我想删除背景中的文本。只有captcha characters
应该保留(即 K6PwKA、YabVzu)。任务是稍后使用 tesseract 识别这些字符。
这是我尝试过的,但它并没有给出很好的准确性。
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\HPO2KOR\AppData\Local\Tesseract-OCR\tesseract.exe"
img = cv2.imread("untitled.png")
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_filtered = cv2.inRange(gray_image, 0, 75)
cv2.imwrite("cleaned.png", gray_filtered)
Run Code Online (Sandbox Code Playgroud)
我该如何改进?
注意: 我尝试了针对这个问题提出的所有建议,但没有一个对我有用。
编辑: 根据 Elias 的说法,我尝试使用 photoshop 找到验证码文本的颜色,方法是将其转换为灰度,结果介于 [100, 105] 之间。然后我根据这个范围对图像进行阈值处理。但是我得到的结果并没有从tesseract给出令人满意的结果。
gray_filtered = cv2.inRange(gray_image, 100, 105)
cv2.imwrite("cleaned.png", gray_filtered)
gray_inv = ~gray_filtered
cv2.imwrite("cleaned.png", gray_inv)
data = pytesseract.image_to_string(gray_inv, lang='eng')
Run Code Online (Sandbox Code Playgroud)
输出 :
'KEP wKA'
Run Code Online (Sandbox Code Playgroud)
结果 :
编辑 2:
def get_text(img_name):
lower = (100, 100, 100)
upper = (104, 104, 104)
img = cv2.imread(img_name)
img_rgb_inrange = cv2.inRange(img, lower, upper)
neg_rgb_image = ~img_rgb_inrange
cv2.imwrite('neg_img_rgb_inrange.png', neg_rgb_image)
data = pytesseract.image_to_string(neg_rgb_image, lang='eng')
return data
Run Code Online (Sandbox Code Playgroud)
给出:
和文本作为
GXuMuUZ
Run Code Online (Sandbox Code Playgroud)
有什么办法可以软化一点
nat*_*ncy 11
以下是两种可能的方法和一种纠正扭曲文本的方法:
方法一:形态学运算+轮廓滤波
获取二值图像。加载图像,灰度,然后是大津的阈值。
删除文本轮廓。创建一个矩形核,cv2.getStructuringElement
然后执行形态学操作以去除噪声。
过滤并去除小噪音。 查找轮廓并使用轮廓区域过滤以去除小颗粒。我们通过填充轮廓有效地去除了噪声cv2.drawContours
执行 OCR。我们反转图像然后应用轻微的
高斯模糊。然后我们使用带有配置选项的Pytesseract进行OCR --psm 6
,将图像视为单个文本块。查看Tesseract 提高其他方法的质量以改进检测和Pytesseract 配置选项以进行其他设置。
输入图像->
Binary ->
Morph 开场
轮廓区域过滤->
Invert->
应用模糊以获得结果
OCR 的结果
YabVzu
Run Code Online (Sandbox Code Playgroud)
代码
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
# Find contours and remove small noise
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 50:
cv2.drawContours(opening, [c], -1, 0, -1)
# Invert and apply slight Gaussian blur
result = 255 - opening
result = cv2.GaussianBlur(result, (3,3), 0)
# Perform OCR
data = pytesseract.image_to_string(result, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()
Run Code Online (Sandbox Code Playgroud)
方法#2:颜色分割
观察到要提取的所需文本与图像中的噪声具有可区分的对比度,我们可以使用颜色阈值来隔离文本。这个想法是转换为 HSV 格式,然后使用颜色阈值来获得使用较低/较高颜色范围的蒙版。从我们使用相同的过程到 Pytesseract 的 OCR。
输入图像->
遮罩->
结果
代码
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('2.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Invert image and OCR
invert = 255 - mask
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('invert', invert)
cv2.waitKey()
Run Code Online (Sandbox Code Playgroud)
更正扭曲的文本
当图像水平时,OCR 效果最佳。为了确保文本是 OCR 的理想格式,我们可以执行透视变换。在去除所有噪声以隔离文本后,我们可以执行近似变形以将单个文本轮廓组合成单个轮廓。从这里我们可以找到使用旋转外框cv2.minAreaRect
,然后执行四点透视变换使用imutils.perspective.four_point_transform
。从清洁过的面具继续,这是结果:
Mask ->
Morph close->
检测到的旋转边界框->
结果
与其他图像一起输出
更新代码以包含透视变换
import cv2
import pytesseract
import numpy as np
from imutils.perspective import four_point_transform
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))
# OCR
data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()
Run Code Online (Sandbox Code Playgroud)
注意:颜色阈值范围是使用此 HSV 阈值脚本确定的
import cv2
import numpy as np
def nothing(x):
pass
# Load image
image = cv2.imread('2.png')
# Create a window
cv2.namedWindow('image')
# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)
# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)
# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0
while(1):
# Get current positions of all trackbars
hMin = cv2.getTrackbarPos('HMin', 'image')
sMin = cv2.getTrackbarPos('SMin', 'image')
vMin = cv2.getTrackbarPos('VMin', 'image')
hMax = cv2.getTrackbarPos('HMax', 'image')
sMax = cv2.getTrackbarPos('SMax', 'image')
vMax = cv2.getTrackbarPos('VMax', 'image')
# Set minimum and maximum HSV values to display
lower = np.array([hMin, sMin, vMin])
upper = np.array([hMax, sMax, vMax])
# Convert to HSV format and color threshold
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(image, image, mask=mask)
# Print if there is a change in HSV value
if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
phMin = hMin
psMin = sMin
pvMin = vMin
phMax = hMax
psMax = sMax
pvMax = vMax
# Display result image
cv2.imshow('image', result)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
Run Code Online (Sandbox Code Playgroud)
您的代码会产生比这更好的结果。在这里,我设置的阈值upperb
和lowerb
值基于直方图CDF
值和阈值。按ESC
按钮获取下一张图像。
这段代码不必要地复杂,需要以各种方式进行优化。可以重新排序代码以跳过一些步骤。我保留它,因为某些部分可能会帮助其他部分。通过保持具有高于特定阈值的区域的轮廓可以去除一些现有的噪声。欢迎对其他降噪方法提出任何建议。
可以在此处找到用于获取 4 个角点以进行透视变换的类似更简单的代码,
代码说明:
通过在原始图像中绘制矩形和角点来标记 ROI
拉直投资回报率并提取它
代码:
## Press ESC button to get next image
import cv2
import cv2 as cv
import numpy as np
frame = cv2.imread('extra/c1.png')
#frame = cv2.imread('extra/c2.png')
## keeping a copy of original
print(frame.shape)
original_frame = frame.copy()
original_frame2 = frame.copy()
## Show the original image
winName = 'Original'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Apply median blur
frame = cv2.medianBlur(frame,9)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
cv.imshow(winName, img_gray & frame)
cv.waitKey(0)
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Get largest contour from contours
contours, hierarchy = cv2.findContours(frame, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
## Get minimum area rectangle and corner points
rect = cv2.minAreaRect(max(contours, key = cv2.contourArea))
print(rect)
box = cv2.boxPoints(rect)
print(box)
## Sorted points by x and y
## Not used in this code
print(sorted(box , key=lambda k: [k[0], k[1]]))
## draw anchor points on corner
frame = original_frame.copy()
z = 6
for b in box:
cv2.circle(frame, tuple(b), z, 255, -1)
## show original image with corners
box2 = np.int0(box)
cv2.drawContours(frame,[box2],0,(0,0,255), 2)
cv2.imshow('Detected Corners',frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
## /sf/ask/813915371/
def subimage(image, center, theta, width, height):
shape = ( image.shape[1], image.shape[0] ) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D( center=center, angle=theta, scale=1 )
image = cv2.warpAffine( src=image, M=matrix, dsize=shape )
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[ y:y+height, x:x+width ]
return image
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
## use the calculated rectangle attributes to rotate and extract it
frame = subimage(original_frame, center=rect[0], theta=int(rect[2]), width=int(rect[1][0]), height=int(rect[1][1]))
original_frame = frame.copy()
cv.imshow(winName, frame)
cv.waitKey(0)
perspective_transformed_image = frame.copy()
## Apply median blur
frame = cv2.medianBlur(frame,11)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
frame = img_gray & frame
frame[np.where(frame==0)] = 255
cv.imshow(winName, frame)
cv.waitKey(0)
hist,bins = np.histogram(frame.flatten(),256,[0,256])
cdf = hist.cumsum()
cdf_normalized = cdf * hist.max()/ cdf.max()
print(cdf)
print(cdf_normalized)
hist_image = frame.copy()
## two decresing range algorithm
low_index = -1
for i in range(0, 256):
if cdf[i] > 0:
low_index = i
break
print(low_index)
tol = 0
tol_limit = 20
broken_index = -1
past_val = cdf[low_index] - cdf[low_index + 1]
for i in range(low_index + 1, 255):
cur_val = cdf[i] - cdf[i+1]
if tol > tol_limit:
broken_index = i
break
if cur_val < past_val:
tol += 1
past_val = cur_val
print(broken_index)
##
lower = min(frame.flatten())
upper = max(frame.flatten())
print(min(frame.flatten()))
print(max(frame.flatten()))
#img_rgb_inrange = cv2.inRange(frame_HSV, np.array([lower,lower,lower]), np.array([upper,upper,upper]))
img_rgb_inrange = cv2.inRange(frame, (low_index), (broken_index))
neg_rgb_image = ~img_rgb_inrange
## Show the original image
winName = 'Final'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, neg_rgb_image)
cv.waitKey(0)
kernel = np.ones((3,3),np.uint8)
frame = cv2.erode(neg_rgb_image,kernel,iterations = 1)
winName = 'Final Dilate'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
##
winName = 'Final Subtracted'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
img2 = np.zeros_like(perspective_transformed_image)
img2[:,:,0] = frame
img2[:,:,1] = frame
img2[:,:,2] = frame
frame = img2
cv.imshow(winName, perspective_transformed_image | frame)
cv.waitKey(0)
##
import matplotlib.pyplot as plt
plt.plot(cdf_normalized, color = 'b')
plt.hist(hist_image.flatten(),256,[0,256], color = 'r')
plt.xlim([0,256])
plt.legend(('cdf','histogram'), loc = 'upper left')
plt.show()
Run Code Online (Sandbox Code Playgroud)
1. 中值滤波器:
2.大津阈值:
3. 反转:
4. 倒像膨胀:
5. 屏蔽提取:
6. 转换的 ROI 点:
7.透视校正图像:
8. 中值模糊:
9.大津阈值:
10.倒像:
11. ROI 提取:
12.夹紧:
13. 膨胀:
14. 最终投资回报率:
15. 步骤 11 图像的直方图:
归档时间: |
|
查看次数: |
7520 次 |
最近记录: |