Jho*_*nny 6 python urllib3 python-requests
我使用 packagerequests一起urllib3.util.retry.Retry()发送数以万计的查询。我试图计算查询次数和必要尝试的次数,直到成功检索到所需的数据。我的目标是构建 API 可靠性的衡量标准。
为了解决这个问题,我们假设 的 Response 对象requests包含以下数据:
from requests import Session
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
def create_session():
session = Session()
retries = Retry(
total = 15,
backoff_factor = 0.5,
status_forcelist = [401, 408, 429, 500, 502, 504],
allowed_methods = frozenset(["GET"])
)
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
urls = ['https://httpbin.org/status/500']
count_queries = len(urls)
count_attempts = 0
with create_session() as s:
for url in urls:
response = s.get(url)
count_attempts += response.total_retries
Run Code Online (Sandbox Code Playgroud)
由于没有这样的变量,我正在寻找替代方法来计算重试总数。
虽然我无法找到解决此问题的方法,但我在搜索过程中进行了以下观察,这可能会有所帮助:
urllib3将重试历史记录存储在 Retry 对象中。存储urllib3.HTTPResponse最后一个 Retry 对象 ( docs )。(urllib3.HTTPResponse准确地说,其未解码的主体)存储在 中requests.Response.raw,但仅当stream=True( docs ) 时。据我了解,我无法访问这些数据。Retry类进行了子类化。本质上,调用回调函数将字符串打印到记录器。这可以适用于增加计数器而不是打印到日志。但是,如果可能的话,我更喜欢跟踪特定于特定 的重试get,如上所示,而不是get使用同一会话的所有 s。我正在使用Python 3.9,urllib3 1.26.8,请求2.26.0。
这是一个与此答案相当冗长的解决方案。它对会话级别的请求和重试进行计数(但这不是我首选的方法)。
import requests
from urllib3.util.retry import Retry
class RequestTracker:
""" track queries and retries """
def __init__(self):
self._retries = 0
self._queries = 0
def register_retry(self):
self._retries += 1
def register_query(self):
self._queries += 1
@property
def retries(self):
return self._retries
@property
def queries(self):
return self._queries
class RetryTracker(Retry):
""" subclass Retry to track count of retries """
def __init__(self, *args, **kwargs):
self._request_tracker = kwargs.pop('request_tracker', None)
super(RetryTracker, self).__init__(*args, **kwargs)
def new(self, **kw):
""" pass additional information when creating new Retry instance """
kw['request_tracker'] = self._request_tracker
return super(RetryTracker, self).new(**kw)
def increment(self, method, url, *args, **kwargs):
""" register retry attempt when new Retry object with incremented counter is returned """
if self._request_tracker:
self._request_tracker.register_retry()
return super(RetryTracker, self).increment(method, url, *args, **kwargs)
class RetrySession(requests.Session):
""" subclass Session to track count of queries """
def __init__(self, retry):
super().__init__()
self._requests_count = retry
def prepare_request(self, request):
""" increment query counter """
# increment requests counter
self._requests_count.register_query()
return super().prepare_request(request)
class RequestManager:
""" manage requests """
def __init__(self, request_tracker=None):
# session settings
self.__session = None
self.__request_tracker = request_tracker
# retry logic specification
args = dict(
total = 11,
backoff_factor = 1,
status_forcelist = [401,408, 429, 500, 502, 504],
allowed_methods = frozenset(["GET"])
)
if self.__request_tracker is not None:
args['request_tracker'] = self.__request_tracker
self.__retries = RetryTracker(**args)
else:
self.__retries = Retry(**args)
@property
def session(self):
if self.__session is None:
# create new session
if self.__request_tracker is not None:
self.__session = RetrySession(self.__request_tracker)
else:
self.__session = requests.Session()
# mount https adapter with retry logic
https = requests.adapters.HTTPAdapter(max_retries=self.__retries)
self.__session.mount('https://', https)
return self.__session
@session.setter
def session(self, value):
raise AttributeError('Setting session attribute is prohibited.')
request_tracker = RequestTracker()
request_manager = RequestManager(request_tracker=request_tracker)
session = request_manager.session
urls = ['https://httpbin.org/status/500']
with session as s:
for url in urls:
response = s.get(url)
print(request_tracker.queries)
print(request_tracker.retries)
Run Code Online (Sandbox Code Playgroud)