From b1e35d0d6d82f278513b82f9dcf4be6edba5f9ff Mon Sep 17 00:00:00 2001 From: filak Date: Fri, 7 Jan 2022 00:11:31 +0100 Subject: [PATCH 1/2] Update app.py --- sickle/app.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sickle/app.py b/sickle/app.py index 86e35995..a4c8faa1 100644 --- a/sickle/app.py +++ b/sickle/app.py @@ -89,6 +89,7 @@ def __init__(self, endpoint, default_retry_after=60, class_mapping=None, encoding=None, + custom_http_adapter=None, **request_args): self.endpoint = endpoint @@ -113,6 +114,10 @@ def __init__(self, endpoint, self.request_args = request_args self.session = requests.Session() + if custom_http_adapter: + self.session.mount(endpoint, custom_http_adapter) + + def harvest(self, **kwargs): # pragma: no cover """Make HTTP requests to the OAI server. From c98ce36f4ff617bb156a95258cb6dd3f3375302c Mon Sep 17 00:00:00 2001 From: filak Date: Fri, 7 Jan 2022 01:03:02 +0100 Subject: [PATCH 2/2] Update app.py --- sickle/app.py | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/sickle/app.py b/sickle/app.py index a4c8faa1..0517abbf 100644 --- a/sickle/app.py +++ b/sickle/app.py @@ -12,6 +12,8 @@ import time import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry from sickle.iterator import BaseOAIIterator, OAIItemIterator from sickle.response import OAIResponse @@ -56,11 +58,11 @@ class Sickle(object): use the value from the retry-after header (if present) and will wait the specified number of seconds between retries. :type max_retries: int - :param retry_status_codes: HTTP status codes to retry (default will only retry on 503) + :param retry_status_codes: HTTP status codes to retry (default will retry on 429, 500, 502, 503 and 504) :type retry_status_codes: iterable - :param default_retry_after: default number of seconds to wait between retries in case no retry-after header is found - on the response (defaults to 60 seconds) - :type default_retry_after: int + :param retry_backoff_factor: Backoff factor to apply between retries after the second try, + if no Retry-After header is sent by the server. Default: 2.0 + :type retry_backoff_factor: float :type protocol_version: str :param class_mapping: A dictionary that maps OAI verbs to classes representing OAI items. If not provided, @@ -86,7 +88,8 @@ def __init__(self, endpoint, iterator=OAIItemIterator, max_retries=0, retry_status_codes=None, - default_retry_after=60, + default_retry_after=None, + retry_backoff_factor=2, class_mapping=None, encoding=None, custom_http_adapter=None, @@ -105,14 +108,24 @@ def __init__(self, endpoint, else: raise TypeError( "Argument 'iterator' must be subclass of %s" % BaseOAIIterator.__name__) - self.max_retries = max_retries - self.retry_status_codes = retry_status_codes or [503] - self.default_retry_after = default_retry_after + + if default_retry_after is not None: + logger.warning("default_retry_after is no longer supported, please use retry_backoff_factor instead.") + + retry_adapter = requests.adapters.HTTPAdapter(max_retries=Retry( + total=max_retries, + backoff_factor=retry_backoff_factor, + status_forcelist=retry_status_codes or [429, 500, 502, 503, 504], + method_whitelist=frozenset(['GET', 'POST']) + )) + self.session = requests.Session() + self.session.mount('https://', retry_adapter) + self.session.mount('http://', retry_adapter) + self.oai_namespace = OAI_NAMESPACE % self.protocol_version self.class_mapping = class_mapping or DEFAULT_CLASS_MAP self.encoding = encoding self.request_args = request_args - self.session = requests.Session() if custom_http_adapter: self.session.mount(endpoint, custom_http_adapter) @@ -125,23 +138,17 @@ def harvest(self, **kwargs): # pragma: no cover :rtype: :class:`sickle.OAIResponse` """ http_response = self._request(kwargs) - for _ in range(self.max_retries): - if self._is_error_code(http_response.status_code) \ - and http_response.status_code in self.retry_status_codes: - retry_after = self.get_retry_after(http_response) - logger.warning( - "HTTP %d! Retrying after %d seconds..." % (http_response.status_code, retry_after)) - time.sleep(retry_after) - http_response = self._request(kwargs) - http_response.raise_for_status() if self.encoding: http_response.encoding = self.encoding return OAIResponse(http_response, params=kwargs) def _request(self, kwargs): if self.http_method == 'GET': - return self.session.get(self.endpoint, params=kwargs, **self.request_args) - return self.session.post(self.endpoint, data=kwargs, **self.request_args) + response = self.session.get(self.endpoint, params=kwargs, **self.request_args) + else: + response = self.session.post(self.endpoint, data=kwargs, **self.request_args) + response.raise_for_status() + return response def ListRecords(self, ignore_deleted=False, **kwargs): """Issue a ListRecords request.