Geo*_*sto 13 python algorithm image-processing cv2
我试图用Pythony从微观样本中计算一种疾病的孢子数量,但到目前为止还没有取得多大成功.
因为孢子的颜色与背景相似,而且许多都很接近.
按照样品的照相显微镜检查.
图像处理代码:
import numpy as np
import argparse
import imutils
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to the input image")
ap.add_argument("-o", "--output", required=True,
help="path to the output image")
args = vars(ap.parse_args())
counter = {}
image_orig = cv2.imread(args["image"])
height_orig, width_orig = image_orig.shape[:2]
image_contours = image_orig.copy()
colors = ['Yellow']
for color in colors:
image_to_process = image_orig.copy()
counter[color] = 0
if color == 'Yellow':
lower = np.array([70, 150, 140]) #rgb(151, 143, 80)
upper = np.array([110, 240, 210]) #rgb(212, 216, 106)
image_mask = cv2.inRange(image_to_process, lower, upper)
image_res = cv2.bitwise_and(
image_to_process, image_to_process, mask=image_mask)
image_gray = cv2.cvtColor(image_res, cv2.COLOR_BGR2GRAY)
image_gray = cv2.GaussianBlur(image_gray, (5, 5), 50)
image_edged = cv2.Canny(image_gray, 100, 200)
image_edged = cv2.dilate(image_edged, None, iterations=1)
image_edged = cv2.erode(image_edged, None, iterations=1)
cnts = cv2.findContours(
image_edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
for c in cnts:
if cv2.contourArea(c) < 1100:
continue
hull = cv2.convexHull(c)
if color == 'Yellow':
cv2.drawContours(image_contours, [hull], 0, (0, 0, 255), 1)
counter[color] += 1
print("{} esporos {}".format(counter[color], color))
cv2.imwrite(args["output"], image_contours)
Run Code Online (Sandbox Code Playgroud)
该算法计算了11个孢子
但在图像中包含27个孢子
如何使这更准确?
Ric*_*ard 13
首先,我们将在下面使用一些初步代码:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from skimage.morphology import extrema
from skimage.morphology import watershed as skwater
def ShowImage(title,img,ctype):
if ctype=='bgr':
b,g,r = cv2.split(img) # get b,g,r
rgb_img = cv2.merge([r,g,b]) # switch it to rgb
plt.imshow(rgb_img)
elif ctype=='hsv':
rgb = cv2.cvtColor(img,cv2.COLOR_HSV2RGB)
plt.imshow(rgb)
elif ctype=='gray':
plt.imshow(img,cmap='gray')
elif ctype=='rgb':
plt.imshow(img)
else:
raise Exception("Unknown colour type")
plt.title(title)
plt.show()
Run Code Online (Sandbox Code Playgroud)
作为参考,这是您的原始图像:
#Read in image
img = cv2.imread('cells.jpg')
ShowImage('Original',img,'bgr')
Run Code Online (Sandbox Code Playgroud)
Otsu的方法是分割颜色的一种方法.该方法假设可以将图像的像素强度绘制成双峰直方图,并找到该直方图的最佳分离器.我应用以下方法.
#Convert to a single, grayscale channel
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
#Threshold the image to binary using Otsu's method
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
ShowImage('Grayscale',gray,'gray')
ShowImage('Applying Otsu',thresh,'gray')
Run Code Online (Sandbox Code Playgroud)
所有这些小斑点都很烦人,我们可以通过扩张来摆脱它们:
#Adjust iterations until desired result is achieved
kernel = np.ones((3,3),np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=5)
ShowImage('Dilated',dilated,'gray')
Run Code Online (Sandbox Code Playgroud)
我们现在需要确定流域的峰值并给它们单独的标签.这样做的目的是生成一组像素,使得每个单元在其中具有像素,并且没有两个单元具有其标识符像素接触.
为此,我们执行距离变换,然后过滤掉距离单元中心太远的距离.
#Calculate distance transformation
dist = cv2.distanceTransform(dilated,cv2.DIST_L2,5)
ShowImage('Distance',dist,'gray')
Run Code Online (Sandbox Code Playgroud)
#Adjust this parameter until desired separation occurs
fraction_foreground = 0.6
ret, sure_fg = cv2.threshold(dist,fraction_foreground*dist.max(),255,0)
ShowImage('Surely Foreground',sure_fg,'gray')
Run Code Online (Sandbox Code Playgroud)
就算法而言,上述图像中的每个白色区域是单独的单元.
现在我们通过减去最大值来识别未知区域,即将由分水岭算法标记的区域:
# Finding unknown region
unknown = cv2.subtract(dilated,sure_fg.astype(np.uint8))
ShowImage('Unknown',unknown,'gray')
Run Code Online (Sandbox Code Playgroud)
未知区域应在每个细胞周围形成完整的甜甜圈.
接下来,我们给出距离变换唯一标签产生的每个不同区域,然后在最终执行分水岭变换之前标记未知区域:
# Marker labelling
ret, markers = cv2.connectedComponents(sure_fg.astype(np.uint8))
ShowImage('Connected Components',markers,'rgb')
# Add one to all labels so that sure background is not 0, but 1
markers = markers+1
# Now, mark the region of unknown with zero
markers[unknown==np.max(unknown)] = 0
ShowImage('markers',markers,'rgb')
dist = cv2.distanceTransform(dilated,cv2.DIST_L2,5)
markers = skwater(-dist,markers,watershed_line=True)
ShowImage('Watershed',markers,'rgb')
Run Code Online (Sandbox Code Playgroud)
现在,单元格的总数是唯一标记的数量减去1(忽略背景):
len(set(markers.flatten()))-1
Run Code Online (Sandbox Code Playgroud)
在这种情况下,我们得到23.
您可以通过调整距离阈值,扩张程度或者使用h-maxima(局部阈值最大值)来使这或多或少变得准确.但要注意过度拟合; 也就是说,不要认为对单个图像进行调整会在各处获得最佳效果.
您还可以通过算法略微改变参数,以了解计数中的不确定性.这可能看起来像这样
import numpy as np
import cv2
import itertools
from matplotlib import pyplot as plt
from skimage.morphology import extrema
from skimage.morphology import watershed as skwater
def CountCells(dilation=5, fg_frac=0.6):
#Read in image
img = cv2.imread('cells.jpg')
#Convert to a single, grayscale channel
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
#Threshold the image to binary using Otsu's method
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#Adjust iterations until desired result is achieved
kernel = np.ones((3,3),np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=dilation)
#Calculate distance transformation
dist = cv2.distanceTransform(dilated,cv2.DIST_L2,5)
#Adjust this parameter until desired separation occurs
fraction_foreground = fg_frac
ret, sure_fg = cv2.threshold(dist,fraction_foreground*dist.max(),255,0)
# Finding unknown region
unknown = cv2.subtract(dilated,sure_fg.astype(np.uint8))
# Marker labelling
ret, markers = cv2.connectedComponents(sure_fg.astype(np.uint8))
# Add one to all labels so that sure background is not 0, but 1
markers = markers+1
# Now, mark the region of unknown with zero
markers[unknown==np.max(unknown)] = 0
markers = skwater(-dist,markers,watershed_line=True)
return len(set(markers.flatten()))-1
#Smaller numbers are noisier, which leads to many small blobs that get
#thresholded out (undercounting); larger numbers result in possibly fewer blobs,
#which can also cause undercounting.
dilations = [4,5,6]
#Small numbers equal less separation, so undercounting; larger numbers equal
#more separation or drop-outs. This can lead to over-counting initially, but
#rapidly to under-counting.
fracs = [0.5, 0.6, 0.7, 0.8]
for params in itertools.product(dilations,fracs):
print("Dilation={0}, FG frac={1}, Count={2}".format(*params,CountCells(*params)))
Run Code Online (Sandbox Code Playgroud)
给出结果:
Dilation=4, FG frac=0.5, Count=22
Dilation=4, FG frac=0.6, Count=23
Dilation=4, FG frac=0.7, Count=17
Dilation=4, FG frac=0.8, Count=12
Dilation=5, FG frac=0.5, Count=21
Dilation=5, FG frac=0.6, Count=23
Dilation=5, FG frac=0.7, Count=20
Dilation=5, FG frac=0.8, Count=13
Dilation=6, FG frac=0.5, Count=20
Dilation=6, FG frac=0.6, Count=23
Dilation=6, FG frac=0.7, Count=24
Dilation=6, FG frac=0.8, Count=14
Run Code Online (Sandbox Code Playgroud)
取计数值的中位数是将不确定性纳入单个数字的一种方法.
请记住,StackOverflow的许可要求您提供适当的 归属.在学术工作中,这可以通过引用来完成.