下面是我逐行编写的代码(大约有900 页,每行 10 行和 5 个数据)有什么办法可以使它更快。目前将数据导出到 csv需要80 分钟。有什么方法可以对页面进行并行请求并提高此代码的效率。
import requests
from urllib3.exceptions import InsecureRequestWarning
import csv
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from bs4 import BeautifulSoup as bs
f = csv.writer(open('GEM.csv', 'w', newline=''))
f.writerow(['Bidnumber', 'Items', 'Quantitiy', 'Department', 'Enddate'])
def scrap_bid_data():
page_no = 1
while page_no < 910:
print('Hold on creating URL to fetch data...')
url = 'https://bidplus.gem.gov.in/bidlists?bidlists&page_no=' + str(page_no)
print('URL created: ' + url)
scraped_data = requests.get(url, verify=False)
soup_data = bs(scraped_data.text, 'lxml')
extracted_data = soup_data.find('div', {'id': 'pagi_content'})
if len(extracted_data) == 0: …Run Code Online (Sandbox Code Playgroud) python multithreading asynchronous threadpool python-multithreading