Python 的多处理池完成后内存未释放

Jal*_*eks 9 python memory multiprocessing python-3.x

使用 Python 的 multiprocessing 时Pool.map(),我没有找回我的记忆。超过1GB的内存仍然被占用,虽然Pool退出了带有 的函数,但一切都关闭了,我什至尝试删除 的变量Pool并显式调用垃圾收集器。

当,在下面显示的代码中,取消注释pool.map()(并注释该pool.map()行)上方的两行时,一切看起来都不错,但是一旦使用multiprocessing内存,似乎在离开函数后不会再次被释放。

因为在现实世界的代码multiprocessing中调用了其他几个使用的函数,这甚至会堆积起来,消耗所有内存。
(不幸的是,我无法为次要的第二种情况生成一个最小的例子,堆积内存,但是一旦主要问题解决了,第二个也应该消失。)

这是 Linux 上的 Python 3.7.3,非常欢迎任何至少解释甚至解决此问题的帮助。

最小示例代码:

import gc
from time import sleep
from memory_profiler import profile
import numpy as np

def waitat(where, t):
    # print and wait, gives chance to see live memory usage in some task manager program
    print(where)
    sleep(t)

@profile
def parallel_convert_all_to_hsv(imgs: np.ndarray) -> np.ndarray:
    from skimage.color import rgb2hsv
    import multiprocessing as mp
    print("going parallel")
    pool = mp.Pool()
    try:
        # images_converted = [] # there is no memory problem when using commented lines below, instead of pool.map(…) line
        # for img in imgs:
        #     images_converted.append(rgb2hsv(img))
        images_converted = pool.map(rgb2hsv, imgs)
    except KeyboardInterrupt:
        pool.terminate()
    waitat("after pool.map",5)

    pool.close()
    pool.join()

    waitat("before del pool",5)
    pool = None
    del pool    # memory should now be freed here?
    mp = None
    rgb2hsv = None

    waitat("after del pool",5)
    print("copying over")
    res = np.array(images_converted)
    waitat("before del image_hsv in function",5)
    images_converted = None
    del images_converted
    return res

@profile
def doit():
    print("create random images")
    max_images = 700
    images = np.random.rand(max_images, 300, 300,3)

    waitat("before going parallel",5)
    images_converted = parallel_convert_all_to_hsv(images)
    print("images_converted has %i bytes" % images_converted.nbytes)
    # how to clean up Pool's memory at latest here?

    waitat("before deleting original images",5)
    images = None
    del images
    waitat("memory should be as before going parallel + %i bytes" % images_converted.nbytes ,10)
    images_converted = None
    del images_converted
    waitat("nearly end, memory should be as before" ,15)
    gc.collect(2)
    waitat("end, memory should be as before" ,15)    

doit()

Run Code Online (Sandbox Code Playgroud)

使用Memory Profiler输出,显示问题:

import gc
from time import sleep
from memory_profiler import profile
import numpy as np

def waitat(where, t):
    # print and wait, gives chance to see live memory usage in some task manager program
    print(where)
    sleep(t)

@profile
def parallel_convert_all_to_hsv(imgs: np.ndarray) -> np.ndarray:
    from skimage.color import rgb2hsv
    import multiprocessing as mp
    print("going parallel")
    pool = mp.Pool()
    try:
        # images_converted = [] # there is no memory problem when using commented lines below, instead of pool.map(…) line
        # for img in imgs:
        #     images_converted.append(rgb2hsv(img))
        images_converted = pool.map(rgb2hsv, imgs)
    except KeyboardInterrupt:
        pool.terminate()
    waitat("after pool.map",5)

    pool.close()
    pool.join()

    waitat("before del pool",5)
    pool = None
    del pool    # memory should now be freed here?
    mp = None
    rgb2hsv = None

    waitat("after del pool",5)
    print("copying over")
    res = np.array(images_converted)
    waitat("before del image_hsv in function",5)
    images_converted = None
    del images_converted
    return res

@profile
def doit():
    print("create random images")
    max_images = 700
    images = np.random.rand(max_images, 300, 300,3)

    waitat("before going parallel",5)
    images_converted = parallel_convert_all_to_hsv(images)
    print("images_converted has %i bytes" % images_converted.nbytes)
    # how to clean up Pool's memory at latest here?

    waitat("before deleting original images",5)
    images = None
    del images
    waitat("memory should be as before going parallel + %i bytes" % images_converted.nbytes ,10)
    images_converted = None
    del images_converted
    waitat("nearly end, memory should be as before" ,15)
    gc.collect(2)
    waitat("end, memory should be as before" ,15)    

doit()

Run Code Online (Sandbox Code Playgroud)

非并行代码的输出(没有出现问题的地方):

$ python3 -m memory_profiler pool-mem-probs.py
create random images
before going parallel
going parallel
after pool.map
before del pool
after del pool
copying over
before del image_hsv in function
Filename: pool-mem-probs.py

Line #    Mem usage    Increment   Line Contents
================================================
    11   1481.2 MiB   1481.2 MiB   @profile
    12                             def parallel_convert_all_to_hsv(imgs: np.ndarray) -> np.ndarray:
    13   1487.2 MiB      6.0 MiB       from skimage.color import rgb2hsv
    14   1487.2 MiB      0.0 MiB       import multiprocessing as mp
    15   1487.2 MiB      0.0 MiB       print("going parallel")
    16   1488.6 MiB      1.4 MiB       pool = mp.Pool()
    17   1488.6 MiB      0.0 MiB       try:
    18                                     # images_converted = []  # there is no memory problem when using commented lines below, instead of pool.map(…) line
    19                                     # for img in imgs:
    20                                     #     images_converted.append(rgb2hsv(img))
    21   2930.9 MiB   1442.3 MiB           images_converted = pool.map(rgb2hsv, imgs)
    22                                 except KeyboardInterrupt:
    23                                     pool.terminate()
    24   2930.9 MiB      0.0 MiB       waitat("after pool.map",5)
    25                                 
    26   2930.9 MiB      0.0 MiB       pool.close()
    27   2931.0 MiB      0.1 MiB       pool.join()
    28                                 
    29   2931.0 MiB      0.0 MiB       waitat("before del pool",5)
    30   2931.0 MiB      0.0 MiB       pool = None
    31   2931.0 MiB      0.0 MiB       del pool    # memory should now be freed here?
    32   2931.0 MiB      0.0 MiB       mp = None
    33   2931.0 MiB      0.0 MiB       rgb2hsv = None
    34                                 
    35   2931.0 MiB      0.0 MiB       waitat("after del pool",5)
    36   2931.0 MiB      0.0 MiB       print("copying over")
    37   4373.0 MiB   1441.9 MiB       res = np.array(images_converted)
    38   4373.0 MiB      0.0 MiB       waitat("before del image_hsv in function",5)
    39   4016.6 MiB      0.0 MiB       images_converted = None
    40   4016.6 MiB      0.0 MiB       del images_converted
    41   4016.6 MiB      0.0 MiB       return res


images_converted has 1512000000 bytes
before deleting original images
memory should be as before going parallel + 1512000000 bytes
nearly end, memory should be as before
end, memory should be as before
Filename: pool-mem-probs.py

Line #    Mem usage    Increment   Line Contents
================================================
    43     39.1 MiB     39.1 MiB   @profile
    44                             def doit():
    45     39.1 MiB      0.0 MiB       print("create random images")
    46     39.1 MiB      0.0 MiB       max_images = 700
    47   1481.2 MiB   1442.1 MiB       images = np.random.rand(max_images, 300, 300,3)
    48                             
    49   1481.2 MiB      0.0 MiB       waitat("before going parallel",5)
    50   4016.6 MiB   2535.4 MiB       images_converted = parallel_convert_all_to_hsv(images)
    51   4016.6 MiB      0.0 MiB       print("images_converted has %i bytes" % images_converted.nbytes)
    52                                 # how to clean up Pool's memory at latest here?
    53                             
    54   4016.6 MiB      0.0 MiB       waitat("before deleting original images",5)
    55   2574.6 MiB      0.0 MiB       images = None
    56   2574.6 MiB      0.0 MiB       del images
    57   2574.6 MiB      0.0 MiB       waitat("memory should be as before going parallel + %i bytes" % images_converted.nbytes ,10)
    58   1132.7 MiB      0.0 MiB       images_converted = None
    59   1132.7 MiB      0.0 MiB       del images_converted
    60   1132.7 MiB      0.0 MiB       waitat("nearly end, memory should be as before" ,15)
    61   1132.7 MiB      0.0 MiB       gc.collect(2)
    62   1132.7 MiB      0.0 MiB       waitat("end, memory should be as before" ,15)    
Run Code Online (Sandbox Code Playgroud)

Shp*_*ter 4

生成阈值可能会妨碍,请查看 gc.get_threshold()

尝试包括

gc.disable()
Run Code Online (Sandbox Code Playgroud)