From 8482899c62d90f960fc9cc79cba68720475a4ac2 Mon Sep 17 00:00:00 2001 From: Alireza Davoudian Date: Fri, 17 Nov 2023 15:51:00 +0100 Subject: [PATCH] Added error handling --- daget/__main__.py | 11 +++++++++-- daget/exceptions.py | 15 ++++++++++++--- daget/utils.py | 31 +++++++++++++++++++++++++------ 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/daget/__main__.py b/daget/__main__.py index 2bc2b64..5d522bb 100755 --- a/daget/__main__.py +++ b/daget/__main__.py @@ -22,8 +22,15 @@ def main(): # get doi/url and resolve to landing page try: url = get_redirect_url(args.url) + + if get_file_list_from_repo(url) is None: + raise RepoError(f'Landing page is not supported {url}') except ResolveError as err: - print(bcolors.FAIL, f'error resolving {args.url}', bcolors.ENDC) + print(bcolors.FAIL, f'Error resolving {args.url}: {err}', bcolors.ENDC) + exit(1) + except Exception as e: + # Catch any other unexpected exceptions, including URLError + print(bcolors.FAIL, f'{e}', bcolors.ENDC) exit(1) print(f'landing page: {url}') @@ -37,7 +44,7 @@ def main(): if len(os.listdir(desitnation)) != 0: print(bcolors.FAIL, f'{desitnation} must be a empty directory or new directory path', bcolors.ENDC) exit(1) - + print(f'destination: {desitnation}') files = get_file_list_from_repo(url) diff --git a/daget/exceptions.py b/daget/exceptions.py index 87ec07d..cf3b7a0 100644 --- a/daget/exceptions.py +++ b/daget/exceptions.py @@ -1,5 +1,14 @@ class ResolveError(ValueError): - pass + def __init__(self, message, http_response_code=None): + super().__init__(message) + self.http_response_code = http_response_code -class RepoError(Exception): - pass \ No newline at end of file +class RepoError(ValueError): + def __init__(self, message, url, supported_urls=None, http_response_code=None): + super().__init__(message) + self.url = url + self.supported_urls = supported_urls or ["dataverse.harvard.edu", "dataverse.no", "snd.se/catalogue", "su.figshare.com", "figshare.scilifelab.se", "zenodo.org"] + self.http_response_code = http_response_code + + if url not in self.supported_urls: + raise self \ No newline at end of file diff --git a/daget/utils.py b/daget/utils.py index 9f94551..5715e44 100644 --- a/daget/utils.py +++ b/daget/utils.py @@ -1,20 +1,39 @@ +import re +import socket import urllib, urllib.error from daget.exceptions import RepoError, ResolveError def get_redirect_url(url): # if url provided is a shorthand doi (TODO: check with regex) - if not url.startswith(('http://', 'https://')): + if not re.match(r'^https?://', url): url = 'https://doi.org/' + url opener = urllib.request.build_opener() opener.addheaders = [('User-Agent', 'daget')] urllib.request.install_opener(opener) - try: - r = urllib.request.urlopen(url) - return r.geturl() - except urllib.error.HTTPError: - raise ResolveError(f"{url} not found") + + try: + response = urllib.request.urlopen(url) + return response.geturl() + + except urllib.error.HTTPError as e: + # Catch HTTP errors and extract relevant information + error_message = f"HTTPError: {e.code} - {e.reason}" + raise ResolveError(f"{url} not found. {error_message}") + + except urllib.error.URLError as e: + # Catch URL errors (e.g., network issues) and provide relevant information + if isinstance(e.reason, str): + error_message = f"URLError: {e.reason}" + else: + error_message = f"URLError: {str(e.reason)}" + + # Additional handling for socket.gaierror + if isinstance(e.reason, socket.gaierror): + error_message += f", errno: {e.reason.errno}, strerror: {e.reason.strerror}" + + raise ResolveError(f"Error connecting to {url}. {error_message}") def download_file(url, target): opener = urllib.request.build_opener()