From 0224b792d0ba8cf7343475da1e653360ce2ee501 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 30 Apr 2026 20:43:07 -0400 Subject: [PATCH] fix: missing transcript should not fail export Before this commit, a missing transcript file would raise a FileNotFoundError from get_video_transcript_data() when the file was missing. This can happen if there are references to edx-val video_ids in the course content, but the corresponding transcript files were never properly exported. This exception was never handled anywhere in edx-val, so it would fail the course export entirely. Now we catch the FileField's underlying FileNotFoundError and re-raise a TranscriptNotFoundError, and then catch that a couple layers up in create_transcripts_xml(). This means that a missing transcript will not fail other transcripts for the same VideoBlock (though in practice, if one is broken, the others are likely broken as well). --- edxval/__init__.py | 2 +- edxval/api.py | 13 ++++++++++--- edxval/tests/test_api.py | 14 ++++++++------ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/edxval/__init__.py b/edxval/__init__.py index cde3e292..d05eb3c0 100644 --- a/edxval/__init__.py +++ b/edxval/__init__.py @@ -2,4 +2,4 @@ init """ -__version__ = '4.0.0' +__version__ = '4.0.1' diff --git a/edxval/api.py b/edxval/api.py index f7cb8619..8f26114a 100644 --- a/edxval/api.py +++ b/edxval/api.py @@ -291,7 +291,14 @@ def get_video_transcript_data(video_id, language_code): video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: try: - return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read()) + return dict( + file_name=video_transcript.filename, + content=video_transcript.transcript.file.read() + ) + except FileNotFoundError as f_err: + err_msg = f"Transcript for video {video_id} not found: {f_err.filename}" + logger.error(err_msg) + raise TranscriptNotFoundError(err_msg) from f_err except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', @@ -1131,9 +1138,9 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): static_dir=static_file_dir ) transcript_files_map[language_code] = transcript_filename - except TranscriptsGenerationException: + except (TranscriptsGenerationException, TranscriptNotFoundError): # we don't want to halt export in this case, just log and move to the next transcript. - logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) + logger.error('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) continue SubElement( diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py index a3d56934..3af0162d 100644 --- a/edxval/tests/test_api.py +++ b/edxval/tests/test_api.py @@ -40,6 +40,7 @@ VideoSortField, ) from edxval.config.waffle import OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS +from edxval.exceptions import TranscriptNotFoundError from edxval.models import ( LIST_MAX_ITEMS, CourseVideo, @@ -2791,14 +2792,15 @@ def test_get_video_transcript_data_exception(self, mock_logger): """ video_id = 'medium-soaker' language_code = 'zh' - with self.assertRaises(IOError): + + with self.assertRaises(TranscriptNotFoundError): api.get_video_transcript_data(video_id, language_code) - mock_logger.exception.assert_called_with( - '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', - video_id, - language_code, - ) + args, _kwargs = mock_logger.error.call_args + logged_msg = args[0] + assert logged_msg.startswith("Transcript for video medium-soaker not found:") + # Exact path varies depending on how test settings config MEDIA + assert logged_msg.endswith("non/existent/transcript/path") def test_get_video_transcript_data_not_found(self): """