Aut*_*ner 3 python python-multiprocessing
我正在尝试使用multiprocessing.Pool.下载和提取 zip 文件。但是每次执行脚本时,只会下载3 个zip ,并且目录中看不到剩余的文件(CPU % 也达到 100%)。有人可以帮助我如何解决这个问题/建议更好的方法并遵循我尝试过的代码段。我对多处理完全陌生。我的目标是在不达到最大 CPU 的情况下并行下载多个文件。
import StringIO
import os
import sys
import zipfile
from multiprocessing import Pool, cpu_count
import requests
filePath = os.path.dirname(os.path.abspath(__file__))
print("filePath is %s " % filePath)
sys.path.append(filePath)
url = ["http://mlg.ucd.ie/files/datasets/multiview_data_20130124.zip",
"http://mlg.ucd.ie/files/datasets/movielists_20130821.zip",
"http://mlg.ucd.ie/files/datasets/bbcsport.zip",
"http://mlg.ucd.ie/files/datasets/movielists_20130821.zip",
"http://mlg.ucd.ie/files/datasets/3sources.zip"]
def download_zips(url):
file_name = url.split("/")[-1]
response = requests.get(url)
sourceZip = zipfile.ZipFile(StringIO.StringIO(response.content))
print("\n Downloaded {} ".format(file_name))
sourceZip.extractall(filePath)
print("extracted {} \n".format(file_name))
sourceZip.close()
if __name__ == "__main__":
print("There are {} CPUs on this machine ".format(cpu_count()))
pool = Pool(cpu_count())
results = pool.map(download_zips, url)
pool.close()
pool.join()
Run Code Online (Sandbox Code Playgroud)
下面的输出
filePath is C:\Users\Documents\GitHub\Python-Examples-Internet\multi_processing
There are 4 CPUs on this machine
filePath is C:\Users\Documents\GitHub\Python-Examples-Internet\multi_processing
filePath is C:\Users\Documents\GitHub\Python-Examples-Internet\multi_processing
filePath is C:\Users\Documents\GitHub\Python-Examples-Internet\multi_processing
filePath is C:\Users\Documents\GitHub\Python-Examples-Internet\multi_processing
Downloaded bbcsport.zip
extracted bbcsport.zip
Downloaded 3sources.zip
extracted 3sources.zip
Downloaded multiview_data_20130124.zip
Downloaded movielists_20130821.zip
Downloaded movielists_20130821.zip
extracted multiview_data_20130124.zip
extracted movielists_20130821.zip
extracted movielists_20130821.zip
Run Code Online (Sandbox Code Playgroud)
我在您的函数中做了一些小调整,效果很好。请注意:
".../movielists_20130821.zip"在您的列表中出现两次,因此您将相同的内容下载了两次(可能是打字错误?)".../multiview_data_20130124.zip",".../movielists_20130821.zip"和".../3sources.zip"提取后,会生成一个新目录。".../bbcsport.zip"但是,该文件在提取时会将其文件放在根文件夹中,即您当前的工作目录(见下图)。也许你错过了这张支票?import sys, os
import zipfile
import requests
from multiprocessing import Pool, cpu_count
from functools import partial
from io import BytesIO
def download_zip(url, filePath):
try:
file_name = url.split("/")[-1]
response = requests.get(url)
sourceZip = zipfile.ZipFile(BytesIO(response.content))
print(" Downloaded {} ".format(file_name))
sourceZip.extractall(filePath)
print(" extracted {}".format(file_name))
sourceZip.close()
except Exception as e:
print(e)
if __name__ == "__main__":
filePath = os.path.dirname(os.path.abspath(__file__))
print("filePath is %s " % filePath)
# sys.path.append(filePath) # why do you need this?
urls = ["http://mlg.ucd.ie/files/datasets/multiview_data_20130124.zip",
"http://mlg.ucd.ie/files/datasets/movielists_20130821.zip",
"http://mlg.ucd.ie/files/datasets/bbcsport.zip",
"http://mlg.ucd.ie/files/datasets/movielists_20130821.zip",
"http://mlg.ucd.ie/files/datasets/3sources.zip"]
print("There are {} CPUs on this machine ".format(cpu_count()))
pool = Pool(cpu_count())
download_func = partial(download_zip, filePath = filePath)
results = pool.map(download_func, urls)
pool.close()
pool.join()
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
7149 次 |
| 最近记录: |