From e55642126f767437f5bb83d236a05e6b13735b01 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 13 Oct 2020 22:43:06 -0300 Subject: [PATCH 1/6] Retry download if closed connection without response --- audiomate/corpus/io/voxforge.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/audiomate/corpus/io/voxforge.py b/audiomate/corpus/io/voxforge.py index d5b464d..aad9b92 100644 --- a/audiomate/corpus/io/voxforge.py +++ b/audiomate/corpus/io/voxforge.py @@ -90,7 +90,14 @@ def download_files(self, file_urls, target_path): target_file_path = os.path.join(target_path, file_name) url_to_target[file_url] = target_file_path - dl_result = download.download_files(url_to_target, num_threads=self.num_workers) + while True: + try: + dl_result = download.download_files(url_to_target, num_threads=self.num_workers) + except: + logger.info('Failed to download file. Remote end closed connection without response. Trying again in 5 seconds...') + time.sleep(5) + continue + break downloaded_files = [] for url, status, path_or_msg in dl_result: From 39d6cf43af4c19de3da717dca716dcb9f18d12fe Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 13 Oct 2020 22:52:48 -0300 Subject: [PATCH 2/6] Retry download if closed connection without response. Add exception type --- audiomate/corpus/io/voxforge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/audiomate/corpus/io/voxforge.py b/audiomate/corpus/io/voxforge.py index aad9b92..b2d973a 100644 --- a/audiomate/corpus/io/voxforge.py +++ b/audiomate/corpus/io/voxforge.py @@ -2,6 +2,7 @@ import re import tarfile import shutil +import time import requests @@ -93,8 +94,8 @@ def download_files(self, file_urls, target_path): while True: try: dl_result = download.download_files(url_to_target, num_threads=self.num_workers) - except: - logger.info('Failed to download file. Remote end closed connection without response. Trying again in 5 seconds...') + except ConnectionError as e: + logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) time.sleep(5) continue break From 3fb13890a14b3b49750bfdabd15bd4375f685438 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 13 Oct 2020 22:58:02 -0300 Subject: [PATCH 3/6] Retry download if closed connection without response. Add exception type and error. --- audiomate/corpus/io/voxforge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audiomate/corpus/io/voxforge.py b/audiomate/corpus/io/voxforge.py index b2d973a..58ebf28 100644 --- a/audiomate/corpus/io/voxforge.py +++ b/audiomate/corpus/io/voxforge.py @@ -95,7 +95,7 @@ def download_files(self, file_urls, target_path): try: dl_result = download.download_files(url_to_target, num_threads=self.num_workers) except ConnectionError as e: - logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) + logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) time.sleep(5) continue break From eb2f711243182ae74847dbc0a36a85aa00f5a7e0 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 2 Dec 2020 20:38:30 -0300 Subject: [PATCH 4/6] retry on download error for tatoeba --- audiomate/corpus/io/tatoeba.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/audiomate/corpus/io/tatoeba.py b/audiomate/corpus/io/tatoeba.py index 9ec18a7..ce4e143 100644 --- a/audiomate/corpus/io/tatoeba.py +++ b/audiomate/corpus/io/tatoeba.py @@ -8,6 +8,7 @@ from audiomate.utils import download from audiomate.utils import textfile from . import base +import time logger = logutil.getLogger() @@ -136,7 +137,17 @@ def _download_audio_files(self, records, target_path): audio_file = os.path.join(audio_folder, '{}.mp3'.format(record[0])) os.makedirs(audio_folder, exist_ok=True) - download_url = 'https://audio.tatoeba.org/sentences/{}/{}.mp3'.format(record[2], record[0]) + download_url = 'https://audio.tatoeba.org/sentences/{}/{}.mp3'.format(record[2], record[0]) + while True: + try: + download.download_file(download_url, audio_file) + except ConnectionError as e: + logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) + logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) + time.sleep(5) + continue + break + download.download_file(download_url, audio_file) From 678a6eda1151917c9a9935074a91d7e306793dda Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 2 Dec 2020 20:44:55 -0300 Subject: [PATCH 5/6] retry on download error for tatoeba --- audiomate/corpus/io/tatoeba.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/audiomate/corpus/io/tatoeba.py b/audiomate/corpus/io/tatoeba.py index ce4e143..5981f4c 100644 --- a/audiomate/corpus/io/tatoeba.py +++ b/audiomate/corpus/io/tatoeba.py @@ -137,16 +137,16 @@ def _download_audio_files(self, records, target_path): audio_file = os.path.join(audio_folder, '{}.mp3'.format(record[0])) os.makedirs(audio_folder, exist_ok=True) - download_url = 'https://audio.tatoeba.org/sentences/{}/{}.mp3'.format(record[2], record[0]) - while True: - try: - download.download_file(download_url, audio_file) - except ConnectionError as e: - logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) - logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) - time.sleep(5) - continue - break + download_url = 'https://audio.tatoeba.org/sentences/{}/{}.mp3'.format(record[2], record[0]) + while True: + try: + download.download_file(download_url, audio_file) + except ConnectionError as e: + logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) + logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) + time.sleep(5) + continue + break download.download_file(download_url, audio_file) From 32c761301e620a6cf62f87f261cdc242cdd0ef2a Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 2 Dec 2020 20:47:03 -0300 Subject: [PATCH 6/6] retry on download error for tatoeba --- audiomate/corpus/io/tatoeba.py | 1 - 1 file changed, 1 deletion(-) diff --git a/audiomate/corpus/io/tatoeba.py b/audiomate/corpus/io/tatoeba.py index 5981f4c..5fa479c 100644 --- a/audiomate/corpus/io/tatoeba.py +++ b/audiomate/corpus/io/tatoeba.py @@ -142,7 +142,6 @@ def _download_audio_files(self, records, target_path): try: download.download_file(download_url, audio_file) except ConnectionError as e: - logger.info('Remote end closed connection without response. Trying again in 5 seconds...', e) logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) time.sleep(5) continue