Bio*_*eek 5 python hash equality bioinformatics ete3
我有一个自定义对象列表,我想从中删除重复项.通常情况下,你会被定义都做到这一点__eq__,并__hash__为您的对象,然后取set对象的列表中.我已经定义了__eq__,但是我无法找到一种好的方法来实现__hash__它为相等的对象返回相同的值.
更具体地说,我有一个派生自ete3工具包中的Tree类的类.如果Robinson-Foulds距离为零,我已将两个对象定义为相等.
from ete3 import Tree
class MyTree(Tree):
def __init__(self, *args, **kwargs):
super(MyTree, self).__init__(*args, **kwargs)
def __eq__(self, other):
rf = self.robinson_foulds(other, unrooted_trees=True)
return not bool(rf[0])
newicks = ['((D, C), (A, B),(E));',
'((D, B), (A, C),(E));',
'((D, A), (B, C),(E));',
'((C, D), (A, B),(E));',
'((C, B), (A, D),(E));',
'((C, A), (B, D),(E));',
'((B, D), (A, C),(E));',
'((B, C), (A, D),(E));',
'((B, A), (C, D),(E));',
'((A, D), (B, C),(E));',
'((A, C), (B, D),(E));',
'((A, B), (C, D),(E));']
trees = [MyTree(newick) for newick in newicks]
print len(trees) # 12
print len(set(trees)) # also 12, not what I want!
Run Code Online (Sandbox Code Playgroud)
双方print len(trees)并print len(set(trees))返回12,但是这不是我想要的,因为几个对象都是彼此相等:
from itertools import product
for t1, t2 in product(newicks, repeat=2):
if t1 != t2:
mt1 = MyTree(t1)
mt2 = MyTree(t2)
if mt1 == mt2:
print t1, '==', t2
Run Code Online (Sandbox Code Playgroud)
返回:
((D, C), (A, B),(E)); == ((C, D), (A, B),(E));
((D, C), (A, B),(E)); == ((B, A), (C, D),(E));
((D, C), (A, B),(E)); == ((A, B), (C, D),(E));
((D, B), (A, C),(E)); == ((C, A), (B, D),(E));
((D, B), (A, C),(E)); == ((B, D), (A, C),(E));
((D, B), (A, C),(E)); == ((A, C), (B, D),(E));
((D, A), (B, C),(E)); == ((C, B), (A, D),(E));
((D, A), (B, C),(E)); == ((B, C), (A, D),(E));
((D, A), (B, C),(E)); == ((A, D), (B, C),(E));
((C, D), (A, B),(E)); == ((D, C), (A, B),(E));
((C, D), (A, B),(E)); == ((B, A), (C, D),(E));
((C, D), (A, B),(E)); == ((A, B), (C, D),(E));
((C, B), (A, D),(E)); == ((D, A), (B, C),(E));
((C, B), (A, D),(E)); == ((B, C), (A, D),(E));
((C, B), (A, D),(E)); == ((A, D), (B, C),(E));
((C, A), (B, D),(E)); == ((D, B), (A, C),(E));
((C, A), (B, D),(E)); == ((B, D), (A, C),(E));
((C, A), (B, D),(E)); == ((A, C), (B, D),(E));
((B, D), (A, C),(E)); == ((D, B), (A, C),(E));
((B, D), (A, C),(E)); == ((C, A), (B, D),(E));
((B, D), (A, C),(E)); == ((A, C), (B, D),(E));
((B, C), (A, D),(E)); == ((D, A), (B, C),(E));
((B, C), (A, D),(E)); == ((C, B), (A, D),(E));
((B, C), (A, D),(E)); == ((A, D), (B, C),(E));
((B, A), (C, D),(E)); == ((D, C), (A, B),(E));
((B, A), (C, D),(E)); == ((C, D), (A, B),(E));
((B, A), (C, D),(E)); == ((A, B), (C, D),(E));
((A, D), (B, C),(E)); == ((D, A), (B, C),(E));
((A, D), (B, C),(E)); == ((C, B), (A, D),(E));
((A, D), (B, C),(E)); == ((B, C), (A, D),(E));
((A, C), (B, D),(E)); == ((D, B), (A, C),(E));
((A, C), (B, D),(E)); == ((C, A), (B, D),(E));
((A, C), (B, D),(E)); == ((B, D), (A, C),(E));
((A, B), (C, D),(E)); == ((D, C), (A, B),(E));
((A, B), (C, D),(E)); == ((C, D), (A, B),(E));
((A, B), (C, D),(E)); == ((B, A), (C, D),(E));
Run Code Online (Sandbox Code Playgroud)
所以我的问题是:
__hash__对我的案子来说什么是好的实施才能set(trees)起作用?__hash__未定义的情况下从列表中删除相同的对象?