Python Pickling 和多重处理

Cas*_*iel 4 python pickle multiprocessing

我正在尝试使用多处理来处理我的内存问题,但是我无法获取要腌制的函数,而且我不知道为什么。我的主要代码开始于

def main():
    print "starting main"
    q = Queue()
    p = Process(target=file_unpacking,args=("hellow world",q))
    p.start()
    p.join()
    if p.is_alive():
        p.terminate()
    print "The results are in"
    Chan1 = q.get()
    Chan2 = q.get()
    Start_Header = q.get()
    Date = q.get()
    Time = q.get()
    return Chan1, Chan2, Start_Header, Date, Time

def file_unpacking(args, q):
    print "starting unpacking"
    fileName1 = "050913-00012"
    unpacker = UnpackingClass()
    for fileNumber in range(0,44):
        fileName = fileName1 + str(fileNumber) + fileName3
        header, data1, data2 = UnpackingClass.unpackFile(path,fileName)

        if header == None:
            logging.warning("curropted file found at " + fileName)
            Data_Sets1.append(temp_1)
            Data_Sets2.append(temp_2)
            Headers.append(temp_3)
            temp_1 = []
            temp_2 = []
            temp_3 = []
            #for i in range(0,10000):
            #    Chan1.append(0)
            #    Chan2.append(0)

        else:
            logging.info(fileName + " is good!")
            temp_3.append(header)
            for i in range(0,10000):
                temp_1.append(data1[i])
                temp_2.append(data2[i])

    Data_Sets1.append(temp_1)
    Data_Sets2.append(temp_2)
    Headers.append(temp_3)
    temp_1 = []
    temp_2 = []
    temp_3 = []

    lengths = []
    for i in range(len(Data_Sets1)):
        lengths.append(len(Data_Sets1[i]))
    index = lengths.index(max(lengths))

    Chan1 = Data_Sets1[index]
    Chan2 = Data_Sets2[index]
    Start_Header = Headers[index]
    Date = Start_Header[index][0]
    Time = Start_Header[index][1]
    print "done unpacking"
    q.put(Chan1)
    q.put(Chan2)
    q.put(Start_Header)
    q.put(Date)
    q.put(Time)
Run Code Online (Sandbox Code Playgroud)

目前我在一个单独的 python 文件中拥有解包方法,该文件导入 struct 和 os. 这会读取部分文本部分二进制文件,构建它,然后关闭它。这主要是跑腿工作,所以我还不会发布它,但是如果有帮助的话我会的。我将开始

class UnpackingClass:
    def __init__(self):
        print "Unpacking Class"
    def unpackFile(path,fileName):
        import struct
        import os
    .......
Run Code Online (Sandbox Code Playgroud)

然后,我只需调用 main() 即可开始聚会,但除了无限循环的 pickle 错误之外,我什么也没有得到。

长话短说,我不知道如何腌制一个函数。一切都在文件顶部定义,所以我不知所措。

这是错误消息

Traceback (most recent call last):
 File "<string>", line 1, in <module>
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\multiprocessing\forking.py", line 373, in main
prepare(preparation_data)
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\multiprocessing\forking.py", line 488, in prepare
'__parents_main__', file, path_name, etc
 File "A:\598\TestCode\test1.py", line 142, in <module>
Chan1, Chan2, Start_Header, Date, Time = main()
 File "A:\598\TestCode\test1.py", line 43, in main
p.start()
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\multiprocessing\process.py", line 130, in start
self._popen = Popen(self)
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\multiprocessing\forking.py", line 271, in __init__
dump(process_obj, to_child, HIGHEST_PROTOCOL)
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\multiprocessing\forking.py", line 193, in dump
ForkingPickler(file, protocol).dump(obj)
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 224, in dump
self.save(obj)
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 419, in save_reduce
save(state)
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 681, in _batch_setitems
save(v)
 File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
  File "C:\Users\Casey\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.1.0.1371.win-x86_64\lib\pickle.py", line 748, in save_global
(obj, module, name))
pickle.PicklingError: Can't pickle <function file_unpacking at 0x0000000007E1F048>: it's    not found as __main__.file_unpacking
Run Code Online (Sandbox Code Playgroud)

Mik*_*rns 5

如果您想做任何并行计算,Pickling 函数是一件非常相关的事情。Pythonpicklemultiprocessing在并行计算方面相当糟糕,因此如果您不反对超出标准库,我建议dill进行序列化并pathos.multiprocessing作为multiprocessing替代。dill可以序列化 python 中的几乎所有内容,并pathos.multiprocessing用于dill提供更强大的并行 CPU 使用。有关更多信息,请参阅:

multiprocessing 和 dill 可以一起做什么?

或者这个简单的例子:

Python 2.7.6 (default, Nov 12 2013, 13:26:39) 
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import dill
>>> from pathos.multiprocessing import ProcessingPool
>>> 
>>> def squared(x):
...   return x**2
... 
>>> pool = ProcessingPool(4)
>>> pool.map(squared, range(10))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
>>> res = pool.amap(squared, range(10))
>>> res.get()
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
>>> res = pool.imap(squared, range(10))
>>> list(res)
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
>>> 
>>> def add(x,y):
...   return x+y
... 
>>> pool.map(add, range(10), range(10))
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
>>> res = pool.amap(add, range(10), range(10))
>>> res.get()
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
>>> res = pool.imap(add, range(10), range(10))
>>> list(res)
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
Run Code Online (Sandbox Code Playgroud)

和 都dill可以pathos在这里找到: https: //github.com/uqfoundation