From 802261bf1fe70b06cea41ccc17a1e13b289662fe Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 30 Apr 2026 20:43:07 -0400 Subject: [PATCH 1/4] fix: log missing transcripts, but do not raise A missing transcript file may happen because those transcripts were never properly imported. But regardless of how we get in that state, we shouldn't break course export. The best we can do is log the error and then have get_video_transcript_data return None (i.e. we have no data for this transcript). --- edxval/__init__.py | 2 +- edxval/api.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/edxval/__init__.py b/edxval/__init__.py index cde3e292..d05eb3c0 100644 --- a/edxval/__init__.py +++ b/edxval/__init__.py @@ -2,4 +2,4 @@ init """ -__version__ = '4.0.0' +__version__ = '4.0.1' diff --git a/edxval/api.py b/edxval/api.py index f7cb8619..4c185dd0 100644 --- a/edxval/api.py +++ b/edxval/api.py @@ -291,7 +291,13 @@ def get_video_transcript_data(video_id, language_code): video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: try: - return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read()) + return dict( + file_name=video_transcript.filename, + content=video_transcript.transcript.file.read() + ) + except FileNotFoundError as f_err: + logger.error('Transcript for video %s not found: %s', video_id, f_err.filename) + return None except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', From 33464df822572949932173865c5782e25e051b8d Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 1 May 2026 19:17:12 -0400 Subject: [PATCH 2/4] fix: missing transcript should not fail export Before this commit, a missing transcript file would raise a FileNotFoundError from get_video_transcript_data() when the file was missing. This can happen if there are references to edx-val video_ids in the course content, but the corresponding transcript files were never properly exported. This exception was never handled anywhere in edx-val, so it would fail the course export entirely. Now we catch the FileField's underlying FileNotFoundError and re-raise a TranscriptNotFoundError, and then catch that a couple layers up in create_transcripts_xml(). This means that a missing transcript will not fail other transcripts for the same VideoBlock (though in practice, if one is broken, the others are likely broken as well). --- edxval/api.py | 9 +++++---- edxval/tests/test_api.py | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/edxval/api.py b/edxval/api.py index 4c185dd0..8f26114a 100644 --- a/edxval/api.py +++ b/edxval/api.py @@ -296,8 +296,9 @@ def get_video_transcript_data(video_id, language_code): content=video_transcript.transcript.file.read() ) except FileNotFoundError as f_err: - logger.error('Transcript for video %s not found: %s', video_id, f_err.filename) - return None + err_msg = f"Transcript for video {video_id} not found: {f_err.filename}" + logger.error(err_msg) + raise TranscriptNotFoundError(err_msg) from f_err except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', @@ -1137,9 +1138,9 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): static_dir=static_file_dir ) transcript_files_map[language_code] = transcript_filename - except TranscriptsGenerationException: + except (TranscriptsGenerationException, TranscriptNotFoundError): # we don't want to halt export in this case, just log and move to the next transcript. - logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) + logger.error('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) continue SubElement( diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py index a3d56934..86bd8b0e 100644 --- a/edxval/tests/test_api.py +++ b/edxval/tests/test_api.py @@ -40,6 +40,7 @@ VideoSortField, ) from edxval.config.waffle import OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS +from edxval.exceptions import TranscriptNotFoundError from edxval.models import ( LIST_MAX_ITEMS, CourseVideo, @@ -2791,7 +2792,7 @@ def test_get_video_transcript_data_exception(self, mock_logger): """ video_id = 'medium-soaker' language_code = 'zh' - with self.assertRaises(IOError): + with self.assertRaises(TranscriptNotFoundError): api.get_video_transcript_data(video_id, language_code) mock_logger.exception.assert_called_with( From 80f86a71f9515a58863830454a60f5dbe411bb79 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 1 May 2026 19:32:58 -0400 Subject: [PATCH 3/4] temp: fix test logger mock verification --- edxval/tests/test_api.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py index 86bd8b0e..338c1397 100644 --- a/edxval/tests/test_api.py +++ b/edxval/tests/test_api.py @@ -2792,13 +2792,12 @@ def test_get_video_transcript_data_exception(self, mock_logger): """ video_id = 'medium-soaker' language_code = 'zh' + with self.assertRaises(TranscriptNotFoundError): api.get_video_transcript_data(video_id, language_code) - mock_logger.exception.assert_called_with( - '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', - video_id, - language_code, + mock_logger.error.assert_called_with( + "Transcript for video medium-soaker not found: /mnt/edx-val/non/existent/transcript/path" ) def test_get_video_transcript_data_not_found(self): From b40eea44df77616f4deb467e893b248bbdd2dc6a Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 1 May 2026 19:57:13 -0400 Subject: [PATCH 4/4] fix: modify test to run properly on other envs --- edxval/tests/test_api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py index 338c1397..3af0162d 100644 --- a/edxval/tests/test_api.py +++ b/edxval/tests/test_api.py @@ -2796,9 +2796,11 @@ def test_get_video_transcript_data_exception(self, mock_logger): with self.assertRaises(TranscriptNotFoundError): api.get_video_transcript_data(video_id, language_code) - mock_logger.error.assert_called_with( - "Transcript for video medium-soaker not found: /mnt/edx-val/non/existent/transcript/path" - ) + args, _kwargs = mock_logger.error.call_args + logged_msg = args[0] + assert logged_msg.startswith("Transcript for video medium-soaker not found:") + # Exact path varies depending on how test settings config MEDIA + assert logged_msg.endswith("non/existent/transcript/path") def test_get_video_transcript_data_not_found(self): """