sku*_*ght 3 python multithreading tqdm
我正在尝试使用 tqdm 报告从三个链接下载每个文件的进度,我想使用多线程从每个链接同时下载同时更新进度条。但是当我执行我的脚本时,有多行进度条似乎线程正在同时更新 tqdm 进度条。我在问我应该如何运行多线程来下载文件,同时保持每次下载的进度条,而不会有重复的条填充整个屏幕?这是我的代码。
import os
import sys
import requests
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor as PE
def get_filename(url):
filename = os.path.basename(url)
fname, extension = os.path.splitext(filename)
if extension:
return filename
header = requests.head(url).headers
if "Location" in header:
return os.path.basename(header["Location"])
return fname
def get_file_size(url):
header = requests.head(url).headers
if "Content-Length" in header and header["Content-Length"] != 0:
return int(header["Content-Length"])
elif "Location" in header and "status" not in header:
redirect_link = header["Location"]
r = requests.head(redirect_link).headers
return int(r["Content-Length"])
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r, tqdm(
unit="B",
unit_scale=True,
unit_divisor=chunk_size,
desc=filename,
total=file_size,
file=sys.stdout
) as progress:
for chunk in r.iter_content(chunk_size=chunk_size):
data = f.write(chunk)
progress.update(data)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE(max_workers=len(urls)) as ex:
ex.map(download_file, urls)
Run Code Online (Sandbox Code Playgroud)
我修改了我的代码,这是我从Use tqdm with concurrent.futures 中获取的?.
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
# file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=chunk_size):
f.write(chunk)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE() as ex:
for url in urls:
tqdm(ex.submit(download_file, url),
total=get_file_size(url),
unit="B",
unit_scale=True,
unit_divisor=1024,
desc=get_filename(url),
file=sys.stdout)
Run Code Online (Sandbox Code Playgroud)
但是在我修改我的代码后,栏没有更新......
我的问题: tqdm 有重复的进度条
我对并发下载没有问题,但是在实现 tqdm 以更新每个链接的个人进度时遇到问题,下面是我想要实现的目标: 理想情况下每个下载都应该有进度条。
我使用了其中一种解决方案:
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor() as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
Run Code Online (Sandbox Code Playgroud)
但这就是结果: 在此处输入图像描述
这将是一般的想法(按您的意愿格式化):
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
def download_file(url):
with requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=50000):
pass
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor(max_workers=len(urls)) as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
Run Code Online (Sandbox Code Playgroud)
如果您知道每个下载的长度,则进行模拟
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
import time
import random
def download_file(url, pbar):
for _ in range(30):
time.sleep(.50 * random.random())
pbar.update(1)
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=90) as pbar:
with ThreadPoolExecutor(max_workers=3) as ex:
futures = [ex.submit(download_file, url, pbar) for url in urls]
for future in as_completed(futures):
result = future.result()
Run Code Online (Sandbox Code Playgroud)