合并两个已排序的迭代器而不替换

Kir*_*lov 5 python

我需要合并两个迭代器.我写了这个函数:

def merge_no_repeat(iter1, iter2, key=None):
    """
    a = iter([(2, 'a'), (4, 'a'), (6, 'a')])
    b = iter([(1, 'b'), (2, 'b'), (3, 'b'), (4, 'b'), (5, 'b'), (6, 'b'), (7, 'b'), (8, 'b')])
    key = lambda item: item[0]
    fusion_no_repeat(a, b, key) ->
                iter([(1, 'b'), (2, 'a'), (3, 'b'), (4, 'a'), (5, 'b'), (6, 'a'), (7, 'b'), (8, 'b')])
    :param iter1: sorted iterator
    :param iter2: sorted iterator
    :param key: lambda get sorted key, default: lambda x: x
    :return: merged iterator
    """
    if key is None:
        key = lambda x: x
    element1 = next(iter1, None)
    element2 = next(iter2, None)
    while element1 is not None or element2 is not None:
        if element1 is None:
            yield element2
            element2 = next(iter2, None)
        elif element2 is None:
            yield element1
            element1 = next(iter1, None)
        elif key(element1) > key(element2):
            yield element2
            element2 = next(iter2, None)
        elif key(element1) == key(element2):
            yield element1
            element1 = next(iter1, None)
            element2 = next(iter2, None)
        elif key(element1) < key(element2):
            yield element1
            element1 = next(iter1, None)
Run Code Online (Sandbox Code Playgroud)

这个功能有效.但我认为这太复杂了.是否可以使用Python标准库使这个功能最简单?

409*_*ict 0

您可以使用:

def merge_no_repeat(iter1, iter2, key=None):
    if key is None:
        key = lambda x: x
    ref = next(iter1, None)
    for elem in iter2:
        key_elem = key(elem) # caching value so we won't compute it for each value in iter1 that is before this one
        while ref is not None and key_elem > key(ref):
            # Catch up with low values from iter1
            yield ref
            ref = next(iter1, None)
        if ref is None or key_elem < key(ref):
            # Catch up with low values from iter2, eliminate duplicates
            yield elem
    # Update: I forgot to consume iter1 in the first version of this code
    for elem in iter1:
        # Use remaining items of iter1 if needed
        yield elem
Run Code Online (Sandbox Code Playgroud)

我假设迭代器不会返回None值,除非完全消耗,因为您已经在原始代码中if element1 is None:进行了测试。elif element1 is None:


例子:

>>> from operator import itemgetter
>>> list(merge_no_repeat(
...     iter([(2, 'a'), (4, 'a'), (6, 'a')]),
...     iter([(1, 'b')]),
...     itemgetter(0)))
[(1, 'b'), (2, 'a'), (4, 'a'), (6, 'a')]
>>> list(merge_no_repeat(
...     iter([(2, 'a'), (4, 'a'), (6, 'a')]),
...     iter([(1, 'b'),(7, 'b'), (8, 'b')]),
...     itemgetter(0)))
[(1, 'b'), (2, 'a'), (4, 'a'), (6, 'a'), (7, 'b'), (8, 'b')]
>>> list(merge_no_repeat(
...     iter([(2, 'a'), (4, 'a'), (6, 'a')]),
...     iter([(1, 'b'),(3, 'b'), (4,'b'),(5,'b'),(7, 'b'), (8, 'b')]),
...     itemgetter(0)))
[(1, 'b'), (2, 'a'), (3, 'b'), (4, 'a'), (5, 'b'), (6, 'a'), (7, 'b'), (8, 'b')]
Run Code Online (Sandbox Code Playgroud)