From 0224b792d0ba8cf7343475da1e653360ce2ee501 Mon Sep 17 00:00:00 2001
From: David Ormsbee <dave@axim.org>
Date: Thu, 30 Apr 2026 20:43:07 -0400
Subject: [PATCH] fix: missing transcript should not fail export

Before this commit, a missing transcript file would raise a
FileNotFoundError from get_video_transcript_data() when the file was
missing. This can happen if there are references to edx-val video_ids in
the course content, but the corresponding transcript files were never
properly exported. This exception was never handled anywhere in edx-val,
so it would fail the course export entirely.

Now we catch the FileField's underlying FileNotFoundError and re-raise a
TranscriptNotFoundError, and then catch that a couple layers up in
create_transcripts_xml(). This means that a missing transcript will not
fail other transcripts for the same VideoBlock (though in practice, if
one is broken, the others are likely broken as well).
---
 edxval/__init__.py       |  2 +-
 edxval/api.py            | 13 ++++++++++---
 edxval/tests/test_api.py | 14 ++++++++------
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/edxval/__init__.py b/edxval/__init__.py
index cde3e292..d05eb3c0 100644
--- a/edxval/__init__.py
+++ b/edxval/__init__.py
@@ -2,4 +2,4 @@
 init
 """
 
-__version__ = '4.0.0'
+__version__ = '4.0.1'
diff --git a/edxval/api.py b/edxval/api.py
index f7cb8619..8f26114a 100644
--- a/edxval/api.py
+++ b/edxval/api.py
@@ -291,7 +291,14 @@ def get_video_transcript_data(video_id, language_code):
     video_transcript = VideoTranscript.get_or_none(video_id, language_code)
     if video_transcript:
         try:
-            return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read())
+            return dict(
+                file_name=video_transcript.filename,
+                content=video_transcript.transcript.file.read()
+            )
+        except FileNotFoundError as f_err:
+            err_msg = f"Transcript for video {video_id} not found: {f_err.filename}"
+            logger.error(err_msg)
+            raise TranscriptNotFoundError(err_msg) from f_err
         except Exception:
             logger.exception(
                 '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
@@ -1131,9 +1138,9 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
                 static_dir=static_file_dir
             )
             transcript_files_map[language_code] = transcript_filename
-        except TranscriptsGenerationException:
+        except (TranscriptsGenerationException, TranscriptNotFoundError):
             # we don't want to halt export in this case, just log and move to the next transcript.
-            logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id)
+            logger.error('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id)
             continue
 
         SubElement(
diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py
index a3d56934..3af0162d 100644
--- a/edxval/tests/test_api.py
+++ b/edxval/tests/test_api.py
@@ -40,6 +40,7 @@
     VideoSortField,
 )
 from edxval.config.waffle import OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS
+from edxval.exceptions import TranscriptNotFoundError
 from edxval.models import (
     LIST_MAX_ITEMS,
     CourseVideo,
@@ -2791,14 +2792,15 @@ def test_get_video_transcript_data_exception(self, mock_logger):
         """
         video_id = 'medium-soaker'
         language_code = 'zh'
-        with self.assertRaises(IOError):
+
+        with self.assertRaises(TranscriptNotFoundError):
             api.get_video_transcript_data(video_id, language_code)
 
-        mock_logger.exception.assert_called_with(
-            '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
-            video_id,
-            language_code,
-        )
+        args, _kwargs = mock_logger.error.call_args
+        logged_msg = args[0]
+        assert logged_msg.startswith("Transcript for video medium-soaker not found:")
+        # Exact path varies depending on how test settings config MEDIA
+        assert logged_msg.endswith("non/existent/transcript/path")
 
     def test_get_video_transcript_data_not_found(self):
         """