Sri*_*ati 5 python algorithm performance rectangles computational-geometry
我有左上角的坐标和矩形列表的右下角说(a,b)和(c,d).我想检测并删除矩形内的矩形.可以留下重叠的矩形.
我有一个10,000个矩形的数据集,我想要一个有效的方法来解决这个问题.
目前我这样做,
import pandas
data = pd.read_csv('dataset.csv')
l = list(range(len(data)-1))
for i in range(len(data)):
length = len(l)
if i >= length:
break
for j in range(i+1, length):
if j >= len(l):
break
if (data.iloc[l[i]]['a'] >= data.iloc[l[j]]['a']) and (data.iloc[l[i]]['b'] <= data.iloc[l[j]]['b']) and (data.iloc[l[i]]['c'] <= data.iloc[l[j]]['c']) and (data.iloc[l[i]]['d'] >= data.iloc[l[j]]['d']):
l.pop(j)
Run Code Online (Sandbox Code Playgroud)
我在按照矩形区域的降序对数据集进行排序后实现了这种算法,因为矩形区域较大,不适合具有较小区域的矩形.在这里,我检测到它是否在另一个矩形内,从列表l中弹出矩形的索引.每次元素加载时,它都会减少迭代次数.
这需要几个小时才能解决,我需要一种有效的方法来解决它甚至数十万个样本.
请帮忙!
这是一个你可以尝试的分而治之的小算法。
我假设一旦您可以快速枚举每对碰撞矩形,您还可以在恒定时间内检查一个是否完全包含在另一个中。
所以,我们只需找到碰撞的矩形即可。
首先,将其概括如下:假设您有两组矩形A和B,并且您只想查找
(a, b)使得矩形a来自A、b来自B、a和b相交的对。
首先,想法。考虑以下由水平线部分分隔的两组矩形A的示例:BL
+----+ +-----+
| A1 | | B1 |
| | +-----+ +-----+
+----+ | A2 |
+-----+ +-----+
| A3 |
_____________________________|_____|________ L
| |
+-------------------+##+ |
| |##| |
| B2 +##|--+
| |
+----------------------+
Run Code Online (Sandbox Code Playgroud)
该行将L集合A和B细分为三个子集:
A above L: {A1, A2} (short: A>L)
A intersects L: {A3} (short: A=L)
A below L: {} (short: A<L)
B above L: {B1} (B>L)
B intersects L: {} (B=L)
B below L: {B2} (B<L)
Run Code Online (Sandbox Code Playgroud)
请注意,只有以下组中的矩形可以发生碰撞:
A<L A=L A>L
B<L y y N
B=L y y y
B>L N y y
Run Code Online (Sandbox Code Playgroud)
也就是说,如果我们想要找到A和之间的所有碰撞B,一旦找到合适的线,我们就可以忽略vs.和vs.L之间的碰撞
。因此,我们得到以下分而治之算法:当且不为空时,找到一条合适的线(大致)最大化消除的碰撞检查的数量,将和分为三组,递归地进行七个子组碰撞,忽略两个子组组合。A<LB>LA>LB<LABAB
假设如果矩形是“小”,并且组A=L和B=L大部分是空的,这将(大致)在每一步中将集合的大小减半,因此我们获得了一个算法,该算法平均运行在类似的情况下,而O(n*log(n))不是O(n*n)。
一旦掌握了任意A和的一般情况B,就可以获取整组矩形R并使用 运行算法A = R; B = R。
以下是 Python 的粗略草图:
def isSubinterval(aStart, aEnd, bStart, bEnd):
return aStart >= bStart and aEnd <= bEnd
def intersects(aStart, aEnd, bStart, bEnd):
return not (aEnd < bStart or aStart > bEnd)
class Rectangle:
def __init__(self, l, r, b, t):
self.left = l
self.right = r
self.bottom = b
self.top = t
def isSubrectangle(self, other):
return (
isSubinterval(self.left, self.right, other.left, other.right) and
isSubinterval(self.bottom, self.top, other.bottom, other.top)
)
def intersects(self, other):
return (
intersects(self.left, self.right, other.left, other.right) and
intersects(self.bottom, self.top, other.bottom, other.top)
)
def __repr__(self):
return ("[%f,%f]x[%f,%f]" % (self.left, self.right, self.bottom, self.top))
def boundingBox(rects):
infty = float('inf')
b = infty
t = - infty
l = infty
r = - infty
for rect in rects:
b = min(b, rect.bottom)
l = min(l, rect.left)
r = max(r, rect.right)
t = max(t, rect.top)
return Rectangle(l, r, b, t)
class DividingLine:
def __init__(self, isHorizontal, position):
self.isHorizontal = isHorizontal
self.position = position
def isAbove(self, rectangle):
if self.isHorizontal:
return rectangle.bottom > self.position
else:
return rectangle.left > self.position
def isBelow(self, rectangle):
if self.isHorizontal:
return rectangle.top < self.position
else:
return rectangle.right < self.position
def enumeratePossibleLines(boundingBox):
NUM_TRIED_LINES = 5
for i in range(1, NUM_TRIED_LINES + 1):
w = boundingBox.right - boundingBox.left
yield DividingLine(False, boundingBox.left + w / float(NUM_TRIED_LINES + 1) * i)
h = boundingBox.top - boundingBox.bottom
yield DividingLine(True, boundingBox.bottom + h / float(NUM_TRIED_LINES + 1) * i)
def findGoodDividingLine(rects_1, rects_2):
bb = boundingBox(rects_1 + rects_2)
bestLine = None
bestGain = 0
for line in enumeratePossibleLines(bb):
above_1 = len([r for r in rects_1 if line.isAbove(r)])
below_1 = len([r for r in rects_1 if line.isBelow(r)])
above_2 = len([r for r in rects_2 if line.isAbove(r)])
below_2 = len([r for r in rects_2 if line.isBelow(r)])
# These groups are separated by the line, no need to
# perform all-vs-all collision checks on those groups!
gain = above_1 * below_2 + above_2 * below_1
if gain > bestGain:
bestGain = gain
bestLine = line
return bestLine
# Collides all rectangles from list `rects_1` with
# all rectangles from list `rects_2`, and invokes
# `onCollision(a, b)` on every colliding `a` and `b`.
def collideAllVsAll(rects_1, rects_2, onCollision):
if rects_1 and rects_2: # if one list empty, no collisions
line = findGoodDividingLine(rects_1, rects_2)
if line:
above_1 = [r for r in rects_1 if line.isAbove(r)]
below_1 = [r for r in rects_1 if line.isBelow(r)]
above_2 = [r for r in rects_2 if line.isAbove(r)]
below_2 = [r for r in rects_2 if line.isBelow(r)]
intersect_1 = [r for r in rects_1 if not (line.isAbove(r) or line.isBelow(r))]
intersect_2 = [r for r in rects_2 if not (line.isAbove(r) or line.isBelow(r))]
collideAllVsAll(above_1, above_2, onCollision)
collideAllVsAll(above_1, intersect_2, onCollision)
collideAllVsAll(intersect_1, above_2, onCollision)
collideAllVsAll(intersect_1, intersect_2, onCollision)
collideAllVsAll(intersect_1, below_2, onCollision)
collideAllVsAll(below_1, intersect_2, onCollision)
collideAllVsAll(below_1, below_2, onCollision)
else:
for r1 in rects_1:
for r2 in rects_2:
if r1.intersects(r2):
onCollision(r1, r2)
Run Code Online (Sandbox Code Playgroud)
这是一个小演示:
rects = [
Rectangle(1,6,9,10),
Rectangle(4,7,6,10),
Rectangle(1,5,6,7),
Rectangle(8,9,8,10),
Rectangle(6,9,5,7),
Rectangle(8,9,1,6),
Rectangle(7,9,2,4),
Rectangle(2,8,2,3),
Rectangle(1,3,1,4)
]
def showInterestingCollision(a, b):
if a is not b:
if a.left < b.left:
print("%r <-> %r collision" % (a, b))
collideAllVsAll(rects, rects, showInterestingCollision)
Run Code Online (Sandbox Code Playgroud)
至少在这种情况下,它确实检测到了所有有趣的碰撞:
[1.000000,6.000000]x[9.000000,10.000000] <-> [4.000000,7.000000]x[6.000000,10.000000] collision
[1.000000,5.000000]x[6.000000,7.000000] <-> [4.000000,7.000000]x[6.000000,10.000000] collision
[4.000000,7.000000]x[6.000000,10.000000] <-> [6.000000,9.000000]x[5.000000,7.000000] collision
[6.000000,9.000000]x[5.000000,7.000000] <-> [8.000000,9.000000]x[1.000000,6.000000] collision
[7.000000,9.000000]x[2.000000,4.000000] <-> [8.000000,9.000000]x[1.000000,6.000000] collision
[2.000000,8.000000]x[2.000000,3.000000] <-> [8.000000,9.000000]x[1.000000,6.000000] collision
[2.000000,8.000000]x[2.000000,3.000000] <-> [7.000000,9.000000]x[2.000000,4.000000] collision
[1.000000,3.000000]x[1.000000,4.000000] <-> [2.000000,8.000000]x[2.000000,3.000000] collision
Run Code Online (Sandbox Code Playgroud)
这是一个更真实的演示:
from random import random
from matplotlib import pyplot as plt
def randomRect():
w = random() * 0.1
h = random() * 0.1
centerX = random() * (1 - w)
centerY = random() * (1 - h)
return Rectangle(
centerX - w/2, centerX + w/2,
centerY - h/2, centerY + h/2
)
randomRects = [randomRect() for _ in range(0, 500)]
for r in randomRects:
plt.fill(
[r.left, r.right, r.right, r.left],
[r.bottom, r.bottom, r.top, r.top],
'b-',
color = 'k',
fill = False
)
def markSubrectanglesRed(a, b):
if a is not b:
if a.isSubrectangle(b):
plt.fill(
[a.left, a.right, a.right, a.left],
[a.bottom, a.bottom, a.top, a.top],
'b-',
color = 'r',
alpha = 0.4
)
plt.fill(
[b.left, b.right, b.right, b.left],
[b.bottom, b.bottom, b.top, b.top],
'b-',
color = 'b',
fill = False
)
collideAllVsAll(randomRects, randomRects, markSubrectanglesRed)
plt.show()
Run Code Online (Sandbox Code Playgroud)
该图以红色显示所有消除的矩形,以蓝色显示封闭的矩形:
下面是一个具有单次碰撞的小示例的准二元空间划分的边界框(黄色)和选定的分割线(青色)的可视化:
对于 10000 个“大小合理”的随机矩形(交叉率与图像中大致相同),它会在 18 秒内计算出所有碰撞,尽管代码距离优化还很远。