From 82d413165f1cc34f7c1263df4baf78db2090e142 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Fri, 27 Mar 2026 23:30:18 -0500
Subject: [PATCH 1/6] feat: send bot error message to dev channel instead

---
 .../core/services/outbox_consumer.py          | 19 +----
 .../unit/telegram_bot/test_outbox_consumer.py | 81 +++++++++----------
 2 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/apps/telegram-bot/core/services/outbox_consumer.py b/apps/telegram-bot/core/services/outbox_consumer.py
index 6415235..1ec3991 100644
--- a/apps/telegram-bot/core/services/outbox_consumer.py
+++ b/apps/telegram-bot/core/services/outbox_consumer.py
@@ -4,7 +4,7 @@
 import redis.asyncio as aioredis
 
 from core.config import settings
-from core.services.message_sender import send_item_message
+from core.services.message_sender import send_item_message, send_debug_channel
 from fastfetchbot_shared.utils.logger import logger
 
 _redis: aioredis.Redis | None = None
@@ -42,7 +42,9 @@ async def _consume_loop() -> None:
 
             if error:
                 logger.warning(f"[{job_id}] Scrape failed: {error}")
-                await _send_error_to_chat(chat_id, error)
+                await send_debug_channel(
+                    f"[Scrape Error] job_id={job_id}\nchat_id: {chat_id}\n\n{error}"
+                )
             else:
                 metadata_item = payload.get("metadata_item")
                 if metadata_item and chat_id:
@@ -63,19 +65,6 @@ async def _consume_loop() -> None:
             await asyncio.sleep(1)
 
 
-async def _send_error_to_chat(chat_id: int | str, error: str) -> None:
-    """Send an error notification to the user's chat."""
-    try:
-        from core.services.bot_app import application
-
-        await application.bot.send_message(
-            chat_id=chat_id,
-            text=f"Sorry, an error occurred while processing your request:\n\n{error}",
-        )
-    except Exception as e:
-        logger.error(f"Failed to send error message to chat {chat_id}: {e}")
-
-
 async def start(bot_id: int) -> None:
     """Start the outbox consumer as a background asyncio task.
 
diff --git a/tests/unit/telegram_bot/test_outbox_consumer.py b/tests/unit/telegram_bot/test_outbox_consumer.py
index d3d1fb4..67f6cd8 100644
--- a/tests/unit/telegram_bot/test_outbox_consumer.py
+++ b/tests/unit/telegram_bot/test_outbox_consumer.py
@@ -136,7 +136,39 @@ async def brpop_side_effect(*args, **kwargs):
 
 class TestConsumeLoopError:
     @pytest.mark.asyncio
-    async def test_sends_error_to_chat(self, mock_redis):
+    async def test_sends_error_to_debug_channel(self, mock_redis):
+        payload = _make_payload(error="scraper failed", chat_id=99, job_id="j42")
+        call_count = 0
+
+        async def brpop_side_effect(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return ("scrape:outbox", payload)
+            raise asyncio.CancelledError()
+
+        mock_redis.brpop = AsyncMock(side_effect=brpop_side_effect)
+
+        with patch(
+            "core.services.outbox_consumer.aioredis.from_url",
+            return_value=mock_redis,
+        ), patch(
+            "core.services.outbox_consumer.send_debug_channel",
+            new_callable=AsyncMock,
+        ) as mock_debug:
+            from core.services.outbox_consumer import _consume_loop
+
+            await _consume_loop()
+
+            mock_debug.assert_awaited_once()
+            msg = mock_debug.call_args[0][0]
+            assert "scraper failed" in msg
+            assert "j42" in msg
+            assert "99" in msg
+
+    @pytest.mark.asyncio
+    async def test_does_not_send_error_to_user_chat(self, mock_redis):
+        """Scrape errors should NOT notify the user."""
         payload = _make_payload(error="scraper failed", chat_id=99)
         call_count = 0
 
@@ -153,14 +185,18 @@ async def brpop_side_effect(*args, **kwargs):
             "core.services.outbox_consumer.aioredis.from_url",
             return_value=mock_redis,
         ), patch(
-            "core.services.outbox_consumer._send_error_to_chat",
+            "core.services.outbox_consumer.send_debug_channel",
             new_callable=AsyncMock,
-        ) as mock_err:
+        ), patch(
+            "core.services.outbox_consumer.send_item_message",
+            new_callable=AsyncMock,
+        ) as mock_send:
             from core.services.outbox_consumer import _consume_loop
 
             await _consume_loop()
 
-            mock_err.assert_awaited_once_with(99, "scraper failed")
+            # User should receive no message at all
+            mock_send.assert_not_awaited()
 
 
 # ---------------------------------------------------------------------------
@@ -222,43 +258,6 @@ async def brpop_side_effect(*args, **kwargs):
         assert call_count == 3  # 2 None returns, then cancel
 
 
-# ---------------------------------------------------------------------------
-# _send_error_to_chat
-# ---------------------------------------------------------------------------
-
-
-class TestSendErrorToChat:
-    @pytest.mark.asyncio
-    async def test_sends_error_message(self):
-        mock_bot = AsyncMock()
-        mock_app = MagicMock()
-        mock_app.bot = mock_bot
-
-        # _send_error_to_chat uses: from core.services.bot_app import application
-        with patch("core.services.bot_app.application", mock_app):
-            from core.services.outbox_consumer import _send_error_to_chat
-
-            await _send_error_to_chat(42, "something went wrong")
-
-        mock_bot.send_message.assert_awaited_once()
-        call_kwargs = mock_bot.send_message.call_args.kwargs
-        assert call_kwargs["chat_id"] == 42
-        assert "something went wrong" in call_kwargs["text"]
-
-    @pytest.mark.asyncio
-    async def test_handles_send_failure_gracefully(self):
-        mock_bot = AsyncMock()
-        mock_bot.send_message = AsyncMock(side_effect=RuntimeError("telegram down"))
-        mock_app = MagicMock()
-        mock_app.bot = mock_bot
-
-        with patch("core.services.bot_app.application", mock_app):
-            from core.services.outbox_consumer import _send_error_to_chat
-
-            # Should not raise
-            await _send_error_to_chat(42, "error")
-
-
 # ---------------------------------------------------------------------------
 # start / stop
 # ---------------------------------------------------------------------------

From b542a68a29276c4f760883baf282c6ac48603fbc Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Sat, 28 Mar 2026 01:16:32 -0500
Subject: [PATCH 2/6] feat: add exception handling

---
 apps/api/src/main.py                          | 21 ++++-
 .../core/services/message_sender.py           | 39 +++++----
 apps/telegram-bot/core/webhook/server.py      | 33 +++++---
 .../fastfetchbot_file_export/pdf_export.py    | 22 ++++--
 .../fastfetchbot_file_export/transcribe.py    | 79 ++++++++++---------
 .../video_download.py                         | 10 +--
 .../shared/fastfetchbot_shared/exceptions.py  | 26 ++++++
 .../services/scrapers/bluesky/scraper.py      |  1 +
 .../services/scrapers/common.py               |  5 +-
 .../services/scrapers/douban/__init__.py      |  5 +-
 .../scrapers/general/firecrawl_client.py      |  3 +-
 .../services/scrapers/general/zyte.py         |  3 +-
 .../services/scrapers/instagram/__init__.py   |  4 +-
 .../services/scrapers/scraper_manager.py      |  3 +-
 .../services/scrapers/threads/__init__.py     |  2 +-
 .../services/scrapers/twitter/__init__.py     | 13 ++-
 .../services/scrapers/weibo/scraper.py        | 22 +++---
 .../services/scrapers/xiaohongshu/adaptar.py  | 21 ++---
 .../services/scrapers/zhihu/__init__.py       | 38 ++++-----
 .../services/telegraph/__init__.py            |  2 +-
 .../fastfetchbot_shared/utils/network.py      | 19 +++--
 21 files changed, 230 insertions(+), 141 deletions(-)
 create mode 100644 packages/shared/fastfetchbot_shared/exceptions.py

diff --git a/apps/api/src/main.py b/apps/api/src/main.py
index dea43d9..70c28f5 100644
--- a/apps/api/src/main.py
+++ b/apps/api/src/main.py
@@ -1,6 +1,7 @@
 import sentry_sdk
 
 from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
 from contextlib import asynccontextmanager
 from starlette.middleware.base import BaseHTTPMiddleware
 
@@ -8,6 +9,7 @@
 from src.routers import inoreader, scraper_routers, scraper
 from src.config import settings
 from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.exceptions import FastFetchBotError
 
 SENTRY_DSN = ""
 
@@ -38,12 +40,27 @@ def __init__(self, app):
 
     async def dispatch(self, request: Request, call_next):
         logger.info(f"{request.method} {request.url}")
-        response = await call_next(request)
-        return response
+        try:
+            response = await call_next(request)
+            return response
+        except Exception:
+            logger.exception(f"Unhandled error during {request.method} {request.url}")
+            raise
 
 
 def create_app():
     fastapi_app = FastAPI(lifespan=lifespan)
+
+    @fastapi_app.exception_handler(FastFetchBotError)
+    async def fastfetchbot_error_handler(request: Request, exc: FastFetchBotError):
+        logger.error(f"Domain error on {request.method} {request.url}: {exc}")
+        return JSONResponse(status_code=502, content={"error": str(exc)})
+
+    @fastapi_app.exception_handler(Exception)
+    async def generic_error_handler(request: Request, exc: Exception):
+        logger.exception(f"Unhandled error on {request.method} {request.url}")
+        return JSONResponse(status_code=500, content={"error": "Internal server error"})
+
     fastapi_app.add_middleware(LogMiddleware)
     fastapi_app.include_router(inoreader.router)
     fastapi_app.include_router(scraper.router)
diff --git a/apps/telegram-bot/core/services/message_sender.py b/apps/telegram-bot/core/services/message_sender.py
index 666b568..a2ccc76 100644
--- a/apps/telegram-bot/core/services/message_sender.py
+++ b/apps/telegram-bot/core/services/message_sender.py
@@ -163,9 +163,8 @@ async def send_item_message(
                 else False,
                 disable_notification=True,
             )
-    except Exception as e:
-        logger.error(e)
-        traceback.print_exc()
+    except Exception:
+        logger.exception("Failed to send item message")
         await send_debug_channel(traceback.format_exc())
 
 
@@ -234,9 +233,13 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple:
                 "https",
             ]:  # if the url is a http url, download the file
                 file_format = "mp4" if media_item["media_type"] == "video" else None
-                io_object = await download_file_by_metadata_item(
-                    media_item["url"], data=data, file_format=file_format
-                )
+                try:
+                    io_object = await download_file_by_metadata_item(
+                        media_item["url"], data=data, file_format=file_format
+                    )
+                except Exception:
+                    logger.warning(f"Skipping media download: {media_item['url']}")
+                    continue
                 filename = io_object.name
                 file_size = io_object.size
             else:  # if the url is a local file path, just add it to the media group
@@ -300,9 +303,13 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple:
                         or img_width > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT
                         or img_height > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT
                 ) and data["category"] not in ["xiaohongshu"]:
-                    io_object = await download_file_by_metadata_item(
-                        url=image_url, data=data
-                    )
+                    try:
+                        io_object = await download_file_by_metadata_item(
+                            url=image_url, data=data
+                        )
+                    except Exception:
+                        logger.warning(f"Skipping document download: {image_url}")
+                        continue
                     if not io_object.name.endswith(".gif"):
                         if not io_object.name.endswith(ext.lower()):
                             io_object.name = io_object.name + "." + ext.lower()
@@ -312,11 +319,15 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple:
                         )
                         file_counter += 1
             elif media_item["media_type"] == "gif":
-                io_object = await download_file_by_metadata_item(
-                    url=media_item["url"],
-                    data=data,
-                    file_name="gif_image-" + str(media_counter) + ".gif",
-                )
+                try:
+                    io_object = await download_file_by_metadata_item(
+                        url=media_item["url"],
+                        data=data,
+                        file_name="gif_image-" + str(media_counter) + ".gif",
+                    )
+                except Exception:
+                    logger.warning(f"Skipping gif download: {media_item['url']}")
+                    continue
                 io_object.name = io_object.name + ".gif"
                 media_group.append(InputMediaAnimation(io_object))
             elif media_item["media_type"] == "video":
diff --git a/apps/telegram-bot/core/webhook/server.py b/apps/telegram-bot/core/webhook/server.py
index 96ae377..8deb01a 100644
--- a/apps/telegram-bot/core/webhook/server.py
+++ b/apps/telegram-bot/core/webhook/server.py
@@ -51,24 +51,39 @@ async def lifespan(app):
         await database.shutdown()
 
 
+def _log_task_exception(task: asyncio.Task):
+    """Callback for fire-and-forget tasks to ensure exceptions are logged."""
+    if not task.cancelled() and task.exception():
+        logger.exception("Unhandled error in background task", exc_info=task.exception())
+
+
 async def telegram_webhook(request: Request):
     secret = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
     if secret != settings.TELEGRAM_BOT_SECRET_TOKEN:
         return JSONResponse({"error": "unauthorized"}, status_code=401)
-    data = await request.json()
+    try:
+        data = await request.json()
+    except Exception:
+        logger.exception("Failed to parse webhook request body")
+        return JSONResponse({"error": "invalid JSON"}, status_code=400)
     logger.debug(f"Telegram webhook update received: {data.get('update_id', 'unknown')}")
-    asyncio.create_task(process_telegram_update(data))
+    task = asyncio.create_task(process_telegram_update(data))
+    task.add_done_callback(_log_task_exception)
     return JSONResponse({"status": "ok"})
 
 
 async def send_message_endpoint(request: Request):
-    data = await request.json()
-    metadata_item = data["data"]
-    chat_id = data.get("chat_id")
-    if isinstance(chat_id, str) and chat_id.startswith("-"):
-        chat_id = int(chat_id)
-    await send_item_message(metadata_item, chat_id=chat_id)
-    return JSONResponse({"status": "ok"})
+    try:
+        data = await request.json()
+        metadata_item = data["data"]
+        chat_id = data.get("chat_id")
+        if isinstance(chat_id, str) and chat_id.startswith("-"):
+            chat_id = int(chat_id)
+        await send_item_message(metadata_item, chat_id=chat_id)
+        return JSONResponse({"status": "ok"})
+    except Exception:
+        logger.exception("Failed to handle send_message request")
+        return JSONResponse({"error": "Internal server error"}, status_code=500)
 
 
 async def health(request: Request):
diff --git a/packages/file-export/fastfetchbot_file_export/pdf_export.py b/packages/file-export/fastfetchbot_file_export/pdf_export.py
index e9ec16c..f4a60d6 100644
--- a/packages/file-export/fastfetchbot_file_export/pdf_export.py
+++ b/packages/file-export/fastfetchbot_file_export/pdf_export.py
@@ -2,6 +2,8 @@
 
 from weasyprint import HTML, CSS
 from weasyprint.text.fonts import FontConfiguration
+from loguru import logger
+from fastfetchbot_shared.exceptions import FileExportError
 
 CSS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf_export.css")
 
@@ -19,7 +21,7 @@ def convert_html_to_pdf(
     elif html_string:
         html_item = HTML(string=html_string)
     else:
-        raise ValueError("Either html_string or html_file must be provided")
+        raise FileExportError("Either html_string or html_file must be provided")
     html_item.write_pdf(output_filename, stylesheets=[css_item])
 
 
@@ -30,10 +32,14 @@ def export_pdf(
     download_dir: str = "/tmp",
 ) -> str:
     """Export HTML to PDF and return the output file path."""
-    output_path = os.path.join(download_dir, output_filename)
-    convert_html_to_pdf(
-        output_filename=output_path,
-        html_string=html_string,
-        html_file=html_file,
-    )
-    return output_path
+    try:
+        output_path = os.path.join(download_dir, output_filename)
+        convert_html_to_pdf(
+            output_filename=output_path,
+            html_string=html_string,
+            html_file=html_file,
+        )
+        return output_path
+    except Exception as e:
+        logger.exception(f"PDF export failed for {output_filename}")
+        raise FileExportError(f"PDF export failed for {output_filename}") from e
diff --git a/packages/file-export/fastfetchbot_file_export/transcribe.py b/packages/file-export/fastfetchbot_file_export/transcribe.py
index 54c81f4..94b6cae 100644
--- a/packages/file-export/fastfetchbot_file_export/transcribe.py
+++ b/packages/file-export/fastfetchbot_file_export/transcribe.py
@@ -3,6 +3,7 @@
 from pydub import AudioSegment
 from openai import OpenAI
 from loguru import logger
+from fastfetchbot_shared.exceptions import FileExportError
 
 TRANSCRIBE_MODEL = "whisper-1"
 SEGMENT_LENGTH = 5 * 60  # 5 minutes in seconds
@@ -74,40 +75,44 @@ def get_audio_text(audio_file: str, openai_api_key: str) -> str:
 
     Returns formatted string with summary and full transcript.
     """
-    client = OpenAI(api_key=openai_api_key)
-    transcript = ""
-    AudioSegment.converter = "ffmpeg"
-    audio_file_non_ext, audio_file_ext = os.path.splitext(audio_file)
-    ext = audio_file_ext.lstrip(".")
-    audio_item = AudioSegment.from_file(audio_file, ext)
-    start_trim = milliseconds_until_sound(audio_item)
-    audio_item = audio_item[start_trim:]
-    audio_length = int(audio_item.duration_seconds) + 1
-
-    for index, i in enumerate(range(0, audio_length * 1000, SEGMENT_LENGTH * 1000)):
-        start_time = i
-        end_time = i + SEGMENT_LENGTH * 1000
-        if end_time >= audio_length * 1000:
-            audio_segment = audio_item[start_time:]
-        else:
-            audio_segment = audio_item[start_time:end_time]
-
-        segment_path = f"{audio_file_non_ext}-{index + 1}{audio_file_ext}"
-        audio_segment.export(segment_path)
-        logger.info(f"audio_segment_path: {segment_path}")
-
-        with open(segment_path, "rb") as f:
-            result = client.audio.transcriptions.create(
-                model=TRANSCRIBE_MODEL, file=f
-            )
-            transcript += result.text
-
-        os.remove(segment_path)
-
-    transcript = punctuation_assistant(client, transcript)
-    transcript = (
-        f"全文总结：\n{summary_assistant(client, transcript)}\n原文：\n{transcript}"
-    )
-    logger.info(f"transcript: {transcript}")
-    os.remove(audio_file)
-    return transcript
+    try:
+        client = OpenAI(api_key=openai_api_key)
+        transcript = ""
+        AudioSegment.converter = "ffmpeg"
+        audio_file_non_ext, audio_file_ext = os.path.splitext(audio_file)
+        ext = audio_file_ext.lstrip(".")
+        audio_item = AudioSegment.from_file(audio_file, ext)
+        start_trim = milliseconds_until_sound(audio_item)
+        audio_item = audio_item[start_trim:]
+        audio_length = int(audio_item.duration_seconds) + 1
+
+        for index, i in enumerate(range(0, audio_length * 1000, SEGMENT_LENGTH * 1000)):
+            start_time = i
+            end_time = i + SEGMENT_LENGTH * 1000
+            if end_time >= audio_length * 1000:
+                audio_segment = audio_item[start_time:]
+            else:
+                audio_segment = audio_item[start_time:end_time]
+
+            segment_path = f"{audio_file_non_ext}-{index + 1}{audio_file_ext}"
+            audio_segment.export(segment_path)
+            logger.info(f"audio_segment_path: {segment_path}")
+
+            with open(segment_path, "rb") as f:
+                result = client.audio.transcriptions.create(
+                    model=TRANSCRIBE_MODEL, file=f
+                )
+                transcript += result.text
+
+            os.remove(segment_path)
+
+        transcript = punctuation_assistant(client, transcript)
+        transcript = (
+            f"全文总结：\n{summary_assistant(client, transcript)}\n原文：\n{transcript}"
+        )
+        logger.info(f"transcript: {transcript}")
+        os.remove(audio_file)
+        return transcript
+    except Exception as e:
+        logger.exception(f"Audio transcription failed for {audio_file}")
+        raise FileExportError(f"Audio transcription failed for {audio_file}") from e
diff --git a/packages/file-export/fastfetchbot_file_export/video_download.py b/packages/file-export/fastfetchbot_file_export/video_download.py
index e70cb73..45b4b4f 100644
--- a/packages/file-export/fastfetchbot_file_export/video_download.py
+++ b/packages/file-export/fastfetchbot_file_export/video_download.py
@@ -1,8 +1,8 @@
 import os
-import traceback
 
 from loguru import logger
 from yt_dlp import YoutubeDL
+from fastfetchbot_shared.exceptions import FileExportError
 
 
 def get_video_orientation(content_info: dict, extractor: str) -> str:
@@ -39,7 +39,7 @@ def get_format_for_orientation(
         if hd and bilibili_cookie:
             return "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
         return "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w"
-    raise ValueError("no available extractor found")
+    raise FileExportError("no available extractor found")
 
 
 def init_yt_downloader(
@@ -201,6 +201,6 @@ def download_video(
             "orientation": orientation,
             "file_path": file_path_output,
         }
-    except Exception:
-        logger.exception(f"download_video failed: url={url}\n{traceback.format_exc()}")
-        raise
+    except Exception as e:
+        logger.exception(f"download_video failed: url={url}")
+        raise FileExportError(f"download_video failed: url={url}") from e
diff --git a/packages/shared/fastfetchbot_shared/exceptions.py b/packages/shared/fastfetchbot_shared/exceptions.py
new file mode 100644
index 0000000..72777be
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/exceptions.py
@@ -0,0 +1,26 @@
+class FastFetchBotError(Exception):
+    """Base exception for all FastFetchBot domain errors."""
+
+
+class ScraperError(FastFetchBotError):
+    """Error during scraping."""
+
+
+class ScraperNetworkError(ScraperError):
+    """Network/connection error during scraping."""
+
+
+class ScraperParseError(ScraperError):
+    """Failed to parse scraped content."""
+
+
+class TelegraphPublishError(FastFetchBotError):
+    """Telegraph publishing failed."""
+
+
+class FileExportError(FastFetchBotError):
+    """File export (PDF, video, audio) failed."""
+
+
+class ExternalServiceError(FastFetchBotError):
+    """External service call failed (OpenAI, Inoreader, etc.)."""
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/bluesky/scraper.py b/packages/shared/fastfetchbot_shared/services/scrapers/bluesky/scraper.py
index 2874fe1..8c3d163 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/bluesky/scraper.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/bluesky/scraper.py
@@ -189,3 +189,4 @@ async def _request_post_data(self, bluesky_post: BlueskyPost) -> ThreadViewPost:
             return post_thread_data.thread
         except Exception as e:
             logger.error(f"Error while getting post data: {e}")
+            raise
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/common.py b/packages/shared/fastfetchbot_shared/services/scrapers/common.py
index 0e94809..f4e6e4f 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/common.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/common.py
@@ -1,6 +1,7 @@
 from typing import Optional, Any
 
 from fastfetchbot_shared.models.url_metadata import UrlMetadata
+from fastfetchbot_shared.exceptions import ScraperError
 from fastfetchbot_shared.services.scrapers import (
     twitter,
     wechat,
@@ -75,7 +76,7 @@ def _resolve_scraper_class(self, category: str):
             return self.service_classes[category]
         if category in ("youtube", "bilibili"):
             return self._get_video_downloader()
-        raise KeyError(f"No scraper registered for category: {category}")
+        raise ScraperError(f"No scraper registered for category: {category}")
 
     async def get_item(self, metadata_item: Optional[dict] = None) -> dict:
         if not metadata_item:
@@ -92,7 +93,7 @@ async def get_item(self, metadata_item: Optional[dict] = None) -> dict:
                     metadata_item = await scraper_item.get_item()
             except Exception as e:
                 logger.error(f"Error while getting item: {e}")
-                raise e
+                raise
         logger.info(f"Got metadata item")
         logger.debug(metadata_item)
         metadata_item = await self.process_item(metadata_item)
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/douban/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/douban/__init__.py
index a273294..ee9a083 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/douban/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/douban/__init__.py
@@ -8,6 +8,7 @@
 
 from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
 from fastfetchbot_shared.utils.network import get_selector, HEADERS
+from fastfetchbot_shared.utils.logger import logger
 from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
 from fastfetchbot_shared.services.scrapers.config import JINJA2_ENV
 
@@ -220,10 +221,10 @@ def _douban_short_text_process(self) -> str:
     @staticmethod
     def raw_content_to_html(raw_content: str) -> str:
         # Split the text into paragraphs based on double newlines
-        print(raw_content)
+        logger.debug(raw_content)
         paragraphs = raw_content.split('<br>\n')
         # Wrap each paragraph with <p> tags
-        print(paragraphs)
+        logger.debug(paragraphs)
         html_paragraphs = [f'<p>{paragraph.strip()}</p>' for paragraph in paragraphs]
         # Join the paragraphs to form the final HTML string
         html_string = ''.join(html_paragraphs)
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/general/firecrawl_client.py b/packages/shared/fastfetchbot_shared/services/scrapers/general/firecrawl_client.py
index 3c7e8ea..467fe9a 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/general/firecrawl_client.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/general/firecrawl_client.py
@@ -7,6 +7,7 @@
 from firecrawl import AsyncFirecrawl
 
 from fastfetchbot_shared.services.scrapers.config import settings
+from fastfetchbot_shared.exceptions import ExternalServiceError
 
 
 @dataclass(frozen=True)
@@ -93,4 +94,4 @@ async def scrape_url(
             )
             return result.model_dump(exclude_none=True)
         except Exception as e:
-            raise RuntimeError(f"Firecrawl scrape_url failed: url={url}") from e
+            raise ExternalServiceError(f"Firecrawl scrape_url failed: url={url}") from e
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/general/zyte.py b/packages/shared/fastfetchbot_shared/services/scrapers/general/zyte.py
index cabf2c3..d011d6f 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/general/zyte.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/general/zyte.py
@@ -4,6 +4,7 @@
 from fastfetchbot_shared.services.scrapers.general.base import BaseGeneralDataProcessor, BaseGeneralScraper
 from fastfetchbot_shared.services.scrapers.scraper import DataProcessor
 from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.exceptions import ExternalServiceError
 
 
 class ZyteDataProcessor(BaseGeneralDataProcessor):
@@ -17,7 +18,7 @@ def __init__(self, url: str):
 
     async def _get_page_content(self) -> None:
         if not settings.ZYTE_API_KEY:
-            raise RuntimeError("ZYTE_API_KEY is not configured")
+            raise ExternalServiceError("ZYTE_API_KEY is not configured")
 
         try:
             client = AsyncZyteAPI(api_key=settings.ZYTE_API_KEY)
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/instagram/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/instagram/__init__.py
index 52bf256..ae99e0c 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/instagram/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/instagram/__init__.py
@@ -81,10 +81,10 @@ async def _get_post_info(self) -> dict:
                     and "status" in ins_data
                     and ins_data["status"] is False
                 ):
-                    print("get_ins_post_item error: ", self.scraper)
+                    logger.warning(f"get_ins_post_item error: {self.scraper}")
                     continue
                 elif type(ins_data) == str and "400" in ins_data:
-                    print("get_ins_post_item error: ", self.scraper, ins_data)
+                    logger.warning(f"get_ins_post_item error: {self.scraper} {ins_data}")
                     continue
             if (
                 self.scraper == "looter2"
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/scraper_manager.py b/packages/shared/fastfetchbot_shared/services/scrapers/scraper_manager.py
index b1abed5..fbc5183 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/scraper_manager.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/scraper_manager.py
@@ -1,6 +1,7 @@
 from typing import Optional
 
 from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.exceptions import ScraperError
 from fastfetchbot_shared.services.scrapers.bluesky.scraper import BlueskyScraper
 from fastfetchbot_shared.services.scrapers.weibo.scraper import WeiboScraper
 from fastfetchbot_shared.services.scrapers.general.scraper import GeneralScraper
@@ -40,7 +41,7 @@ async def init_scraper(cls, category: str) -> None:
                     cls.scrapers["unknown"] = scraper
         else:
             logger.error(f"Scraper {category} is not supported")
-            raise ValueError(f"Scraper {category} is not supported")
+            raise ScraperError(f"Scraper {category} is not supported")
 
     @classmethod
     async def init_bluesky_scraper(cls) -> BlueskyScraper:
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/threads/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/threads/__init__.py
index a56b562..69be60a 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/threads/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/threads/__init__.py
@@ -97,7 +97,7 @@ async def intercept_response(response):
             for xhr in gql_calls:
                 text = await xhr.text()
                 data = json.loads(text)
-                print(json.dumps(data, indent=4, ensure_ascii=False))
+                logger.debug(json.dumps(data, indent=4, ensure_ascii=False))
                 threads = data["data"]["data"]["containing_thread"]["thread_items"]
                 for thread in threads:
                     thread_data["threads"].append(self.parse_single_threads_data(thread["post"]))
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/twitter/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/twitter/__init__.py
index 9a38090..4c5b280 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/twitter/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/twitter/__init__.py
@@ -1,6 +1,5 @@
 # TODO: https://rapidapi.com/Glavier/api/twitter135
 import asyncio
-import traceback
 from urllib.parse import urlparse
 from typing import Dict, List, Optional, Any, Tuple
 
@@ -9,6 +8,7 @@
 
 from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
 from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from fastfetchbot_shared.exceptions import ScraperError, ScraperParseError
 from twitter.scraper import Scraper
 from .config import (
     ALL_SCRAPER,
@@ -73,11 +73,10 @@ async def _get_response_tweet_data(self) -> Dict:
                 elif self.scraper == "api-client":
                     tweet_data = await self._api_client_get_response_tweet_data()
                     return tweet_data
-            except Exception as e:
-                logger.error(e)
-                traceback.print_exc()
+            except Exception:
+                logger.exception(f"Twitter scraper {self.scraper} failed")
                 continue
-        raise Exception("No valid response from all Twitter scrapers")
+        raise ScraperError("No valid response from all Twitter scrapers")
 
     async def _rapidapi_get_response_tweet_data(self) -> Dict:
         async with httpx.AsyncClient() as client:
@@ -94,11 +93,11 @@ async def _rapidapi_get_response_tweet_data(self) -> Dict:
                         type(tweet_data) == str
                         and ("400" in tweet_data or "429" in tweet_data)
                 ):
-                    raise Exception("Invalid response from Twitter API")
+                    raise ScraperParseError("Invalid response from Twitter API")
                 else:
                     return tweet_data
             else:
-                raise Exception("Invalid response from Twitter API")
+                raise ScraperParseError("Invalid response from Twitter API")
 
     async def _api_client_get_response_tweet_data(self) -> Dict:
         scraper = Scraper(
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/weibo/scraper.py b/packages/shared/fastfetchbot_shared/services/scrapers/weibo/scraper.py
index a0dd89e..acbf414 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/weibo/scraper.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/weibo/scraper.py
@@ -8,6 +8,7 @@
 from lxml import html
 
 from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.exceptions import ScraperError, ScraperNetworkError, ScraperParseError
 from fastfetchbot_shared.services.scrapers.scraper import Scraper, DataProcessor
 from fastfetchbot_shared.services.scrapers.weibo import Weibo
 from fastfetchbot_shared.utils.network import get_response_json, get_random_user_agent
@@ -61,19 +62,20 @@ async def _get_weibo(self) -> None:
         # Priority 1: API with SUB cookie and isGetLongText=true
         try:
             weibo_info = await self._get_weibo_info(method="api")
-        except ConnectionError as e:
+        except ScraperNetworkError as e:
             logger.warning(f"Failed to get weibo info by api: {e}")
         # Priority 2: webpage scraping
         if weibo_info is None:
             try:
                 weibo_info = await self._get_weibo_info(method="webpage")
-            except ConnectionError as e:
+            except ScraperNetworkError as e:
                 logger.error(f"All weibo fetch methods failed. Last error: {e}")
                 raise
         try:
             await self._process_weibo_item(weibo_info)
         except Exception as e:
             logger.error(f"Failed to process weibo item: {e}")
+            raise
 
     async def _get_weibo_info(self, method=None) -> dict:
         try:
@@ -84,11 +86,11 @@ async def _get_weibo_info(self, method=None) -> dict:
             elif method == "api":
                 weibo_info = await self._get_weibo_info_api()
             else:
-                raise ValueError("method must be webpage or api")
+                raise ScraperError("method must be webpage or api")
             weibo_info = self._parse_weibo_info(weibo_info)
             return weibo_info
-        except ConnectionError as e:
-            raise ConnectionError(f"There are some network issues: {e}")
+        except (ConnectionError, ScraperNetworkError) as e:
+            raise ScraperNetworkError(f"There are some network issues: {e}") from e
 
     async def _get_weibo_info_webpage(self) -> dict:
         url = WEIBO_WEB_HOST + self.id
@@ -125,11 +127,13 @@ async def _get_weibo_info_api(self) -> dict:
                 response.raise_for_status()
                 ajax_json = response.json()
             if not ajax_json or ajax_json.get("ok") == 0:
-                raise ConnectionError("API returned ok=0 or empty response")
+                raise ScraperParseError("Weibo API returned ok=0 or empty response")
             logger.debug(f"weibo info by api: {ajax_json}")
             return ajax_json
+        except (ScraperParseError, ScraperNetworkError):
+            raise
         except Exception as e:
-            raise ConnectionError(f"Failed to get weibo info by api: {e}")
+            raise ScraperNetworkError(f"Failed to get weibo info by api: {e}") from e
 
     async def _get_long_weibo_info_api(self) -> dict:
         ajax_json = await get_response_json(
@@ -173,7 +177,7 @@ async def _process_weibo_item(self, weibo_info: dict) -> None:
                 #     raise Exception("Still a long text weibo, should go long text api.")
                 text = longtext_info.get("text")
                 if not text:
-                    raise Exception(
+                    raise ScraperParseError(
                         "Failed to get longtext of weibo by webpage scraping."
                     )
             except Exception as e:
@@ -466,7 +470,7 @@ def _weibo_html_text_clean(text, method="bs4"):
         elif method == "lxml":
             return WeiboDataProcessor._weibo_html_text_clean_lxml(text)
         else:
-            raise ValueError("method must be bs4 or lxml")
+            raise ScraperError("method must be bs4 or lxml")
 
     @staticmethod
     def _weibo_html_text_clean_bs4(text):
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/xiaohongshu/adaptar.py b/packages/shared/fastfetchbot_shared/services/scrapers/xiaohongshu/adaptar.py
index 900bc52..3a28e61 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/xiaohongshu/adaptar.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/xiaohongshu/adaptar.py
@@ -9,6 +9,7 @@
 
 from fastfetchbot_shared.config import settings as shared_settings
 from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.exceptions import ScraperError, ScraperParseError, ExternalServiceError
 
 XHS_API_URL = "https://edith.xiaohongshu.com"
 XHS_WEB_URL = "https://www.xiaohongshu.com"
@@ -22,10 +23,10 @@ def parse_xhs_note_url(note_url: str) -> Dict[str, str]:
     query = dict(parse_qsl(parsed.query, keep_blank_values=True))
     path_parts = [part for part in parsed.path.split("/") if part]
     if not path_parts:
-        raise ValueError(f"Invalid XHS note URL: {note_url}")
+        raise ScraperParseError(f"Invalid XHS note URL: {note_url}")
     note_id = path_parts[-1]
     if note_id in {"explore", "discovery", "item"}:
-        raise ValueError(f"Invalid XHS note URL path: {note_url}")
+        raise ScraperParseError(f"Invalid XHS note URL path: {note_url}")
     return {
         "note_id": note_id,
         "xsec_token": query.get("xsec_token", ""),
@@ -52,7 +53,7 @@ def __init__(
         self.cookies = cookies.strip()
         self.sign_server_endpoint = (sign_server_endpoint or shared_settings.SIGN_SERVER_URL).rstrip("/")
         if not self.sign_server_endpoint:
-            raise ValueError(
+            raise ExternalServiceError(
                 "XhsSinglePostAdapter requires a sign server URL. "
                 "Set shared_settings.SIGN_SERVER_URL in the environment or pass sign_server_endpoint explicitly."
             )
@@ -100,12 +101,12 @@ async def _sign_headers(self, uri: str, data: Optional[Any] = None) -> Dict[str,
         resp.raise_for_status()
         body = resp.json()
         if not body.get("isok"):
-            raise RuntimeError(f"XHS sign server returned error: {body}")
+            raise ExternalServiceError(f"XHS sign server returned error: {body}")
         sign = body.get("data", {}) or {}
         required = ["x_s", "x_t", "x_s_common", "x_b3_traceid"]
         missing = [key for key in required if key not in sign]
         if missing:
-            raise RuntimeError(f"XHS sign response missing fields: {missing}")
+            raise ExternalServiceError(f"XHS sign response missing fields: {missing}")
         headers = self._base_headers()
         headers.update(
             {
@@ -147,7 +148,7 @@ async def fetch_post(
                 xsec_source=url_info["xsec_source"],
             )
         if note is None:
-            raise RuntimeError(f"Cannot fetch note detail from API or HTML: {url_info['note_id']}")
+            raise ScraperError(f"Cannot fetch note detail from API or HTML: {url_info['note_id']}")
 
         result: Dict[str, Any] = {"platform": "xhs", "note": note, "comments": [], "url": get_pure_url(note_url)}
         if with_comments:
@@ -191,18 +192,18 @@ def _parse_api_response(resp: httpx.Response) -> Dict[str, Any]:
         try:
             body = resp.json()
         except json.JSONDecodeError as exc:
-            raise RuntimeError(f"XHS API returned non-JSON: status={resp.status_code}") from exc
+            raise ScraperParseError(f"XHS API returned non-JSON: status={resp.status_code}") from exc
 
         if resp.status_code in (461, 471):
             verify_type = resp.headers.get("Verifytype", "")
             verify_uuid = resp.headers.get("Verifyuuid", "")
-            raise RuntimeError(
+            raise ScraperError(
                 f"XHS blocked by captcha: verify_type={verify_type}, verify_uuid={verify_uuid}"
             )
 
         if body.get("success"):
             return body.get("data", {}) or {}
-        raise RuntimeError(f"XHS API error: status={resp.status_code}, body={body}")
+        raise ScraperParseError(f"XHS API error: status={resp.status_code}, body={body}")
 
     async def _fetch_note_by_api(
             self,
@@ -536,7 +537,7 @@ async def _get_redirection_url(self, note_url: str) -> str:
 
         final_url = str(resp.url)
         if XHS_WEB_URL not in final_url and "xiaohongshu.com" not in final_url:
-            raise RuntimeError(
+            raise ScraperError(
                 f"Short URL did not redirect to xiaohongshu.com: {note_url} -> {final_url}"
             )
         return final_url
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
index 96c3da5..f616499 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
@@ -1,6 +1,5 @@
 import json
 import re
-import traceback
 from typing import Dict, Optional, Any
 from urllib.parse import urlparse
 
@@ -18,6 +17,7 @@
 from fastfetchbot_shared.utils.network import get_selector, get_redirect_url, get_response_json, get_random_user_agent, \
     get_content_async, get_response
 from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.exceptions import ScraperError, ScraperParseError
 from fastfetchbot_shared.services.scrapers.config import settings, JINJA2_ENV
 from .config import (
     SHORT_LIMIT,
@@ -176,12 +176,12 @@ async def _get_zhihu_item(self) -> None:
                 if self.title != "":
                     break
             except Exception as e:
-                traceback.print_exc()
+                logger.exception(f"Zhihu scraper method {method} failed")
                 if method == ALL_METHODS[-1]:
-                    print("all methods failed")
-                    raise e
+                    logger.error("All Zhihu scraper methods failed")
+                    raise
                 else:
-                    print(
+                    logger.warning(
                         f"zhihu {self.zhihu_type} {self.method} failed, try the next method"
                     )
                 continue
@@ -299,7 +299,7 @@ async def _get_zhihu_answer(self) -> None:
                     answer_data = _parse_answer_api_json_data(json_data)
                     logger.debug(f"answer data: {answer_data}")
                 except Exception as e:
-                    raise Exception("Cannot get the answer by API")
+                    raise ScraperParseError("Cannot get the answer by API") from e
             elif self.method == "fxzhihu":
                 try:
                     resp = await get_response(url=self.request_url, headers=self.headers, client=self.httpx_client)
@@ -308,7 +308,7 @@ async def _get_zhihu_answer(self) -> None:
                     answer_data = _parse_answer_api_json_data(json_data)
                     logger.debug(f"answer data: {answer_data}")
                 except Exception as e:
-                    raise Exception("Cannot get the answer by fxzhihu, error: " + str(e))
+                    raise ScraperError("Cannot get the answer by fxzhihu, error: " + str(e)) from e
             elif self.method == "json":
                 try:
                     selector = await get_selector(self.request_url, headers=self.headers)
@@ -317,9 +317,9 @@ async def _get_zhihu_answer(self) -> None:
                     json_data = json_data["initialState"]["entities"]
                     answer_data = self._parse_answer_json_data(json_data)
                 except Exception as e:
-                    raise Exception("Cannot get the selector")
+                    raise ScraperParseError("Cannot get the selector") from e
             if answer_data == {}:
-                raise Exception("Cannot get the answer")
+                raise ScraperError("Cannot get the answer")
             self._resolve_answer_json_data(answer_data)
             # Apply FxZhihu-style content processing for api method
             if self.method == "api":
@@ -350,11 +350,11 @@ async def _get_zhihu_answer(self) -> None:
                 if self.author_url == "https://www.zhihu.com/people/":
                     self.author_url = ""
             except Exception as e:
-                raise Exception("Cannot get the answer")
+                raise ScraperError("Cannot get the answer") from e
         if (
                 self.title == ""
         ):  # TODO: this is not a good way to check if the scraping is successful. To be improved.
-            raise Exception("Cannot get the answer")
+            raise ScraperError("Cannot get the answer")
 
     async def _get_zhihu_status(self):
         """
@@ -390,8 +390,8 @@ async def _get_zhihu_status(self):
         else:
             try:
                 selector = await get_selector(self.request_url, headers=self.headers)
-            except:
-                raise Exception("zhihu request failed")
+            except Exception as e:
+                raise ScraperError("zhihu request failed") from e
             if self.method == "json":
                 def _process_picture(pictures, content_attr):
                     if not hasattr(self, content_attr):
@@ -516,8 +516,8 @@ def _process_picture(pictures, content_attr):
                         self.retweet_html = str(
                             html.tostring(pic_html, pretty_print=True)
                         ).replace("b'<div", "<div")
-                        print(type(self.retweet_html))
-                        print(self.retweet_html)
+                        logger.debug(f"retweet_html type: {type(self.retweet_html)}")
+                        logger.debug(self.retweet_html)
                     else:
                         self.retweet_html = str(
                             etree.tostring(
@@ -528,7 +528,7 @@ def _process_picture(pictures, content_attr):
                             ),
                             encoding="utf-8",
                         )
-                        print(self.retweet_html)
+                        logger.debug(self.retweet_html)
                 self.author = selector.xpath(
                     'string(//div[@class="AuthorInfo"]//meta[@itemprop="name"]/@content)'
                 )
@@ -559,12 +559,12 @@ async def _get_zhihu_article(self):
                     self.raw_content = fix_images_and_links(self.raw_content)
                     self.raw_content = unmask_zhihu_links(self.raw_content)
             except Exception as e:
-                raise Exception("zhihu request failed")
+                raise ScraperError("zhihu request failed")
         else:
             try:
                 selector = await get_selector(self.request_url, headers=self.headers)
             except Exception as e:
-                raise Exception("zhihu request failed")
+                raise ScraperError("zhihu request failed")
             if self.method == "json":
                 json_data = selector.xpath('string(//script[@id="js-initialData"])')
                 json_data = json.loads(json_data)
@@ -818,7 +818,7 @@ def _parse_status_json_data(self, data: Dict) -> Dict:
                 "origin_pin_comment_count": pins."{self.status_id}".originPin.commentCount
                 }}"""
         result = jmespath.search(expression, data)
-        print(result)
+        logger.debug(result)
         author_url_token = result["author_url_token"]
         expression = f"""{{
                         "author": users."{author_url_token}".name
diff --git a/packages/shared/fastfetchbot_shared/services/telegraph/__init__.py b/packages/shared/fastfetchbot_shared/services/telegraph/__init__.py
index 6921465..be4eb12 100644
--- a/packages/shared/fastfetchbot_shared/services/telegraph/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/telegraph/__init__.py
@@ -70,5 +70,5 @@ async def get_telegraph(self, upload_images: bool = True) -> str:
             telegraph_url = telegraph_post["url"]
             return telegraph_url
         except Exception as e:
-            logger.error("Telegraph upload failed", exc_info=e)
+            logger.error("Telegraph upload failed", exc_info=True)
             return ""
diff --git a/packages/shared/fastfetchbot_shared/utils/network.py b/packages/shared/fastfetchbot_shared/utils/network.py
index e8b5e48..1e3e0e0 100644
--- a/packages/shared/fastfetchbot_shared/utils/network.py
+++ b/packages/shared/fastfetchbot_shared/utils/network.py
@@ -5,7 +5,6 @@
 
 import aiofiles
 import httpx
-import traceback
 
 from lxml import etree
 from fake_useragent import UserAgent
@@ -39,8 +38,8 @@ async def get_response_json(url: str, headers=None, client: httpx.AsyncClient =
     try:
         response = await get_response(url, headers=headers, client=client)
         json_result = response.json()
-    except Exception as e:
-        print(e, traceback.format_exc())
+    except Exception:
+        logger.exception(f"Failed to get JSON response from {url}")
         json_result = None
     return json_result
 
@@ -67,16 +66,16 @@ async def get_selector(
         if (
                 resp.history
         ):  # if there is a redirect, the request will have a response chain
-            print("Request was redirected")
+            logger.debug("Request was redirected")
             for h in resp.history:
-                print(h.status_code, h.url)
+                logger.debug(f"Redirect: {h.status_code} {h.url}")
                 # if code is 302, do not follow the redirect
                 if h.status_code == 302:
                     selector = await get_selector(
                         h.url, headers=headers, follow_redirects=False
                     )
                     return selector
-            print("Final destination:", resp.status_code, resp.url)
+            logger.debug(f"Final destination: {resp.status_code} {resp.url}")
         selector = etree.HTML(resp.text)  # the content of the final destination
         return selector
 
@@ -114,7 +113,7 @@ async def wait_for_network_idle():
             async with page.expect_response("**/api/content") as response_info:
                 response = await response_info.value
                 if response.status == 200:
-                    print("Content loaded")
+                    logger.debug("Content loaded")
 
         await page.goto(url)
         await wait_for_network_idle()
@@ -160,9 +159,9 @@ async def download_file_by_metadata_item(
             file_name = "media-" + str(uuid.uuid1())[:8] + "." + file_format
         io_object = NamedBytesIO(file_data, name=file_name)
         return io_object
-    except Exception as e:
-        await asyncio.sleep(2)
-        logger.error(f"Failed to download {url}, {e}")
+    except Exception:
+        logger.exception(f"Failed to download {url}")
+        raise
 
 
 async def download_file_to_local(

From a694ac3e49c8f24766d7eb13858f22b2bda8e05a Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Sat, 28 Mar 2026 01:16:41 -0500
Subject: [PATCH 3/6] feat: add tests for exception handlings

---
 .../file_export/test_pdf_export_exceptions.py | 52 ++++++++++
 .../file_export/test_transcribe_exceptions.py | 31 ++++++
 tests/unit/file_export/test_video_download.py | 48 +++++++++
 tests/unit/scrapers/test_bluesky.py           |  6 +-
 tests/unit/scrapers/test_common.py            | 17 ++++
 tests/unit/scrapers/test_general_firecrawl.py |  3 +-
 tests/unit/scrapers/test_general_zyte.py      |  3 +-
 tests/unit/scrapers/test_scraper_manager.py   |  5 +-
 .../unit/scrapers/test_twitter_exceptions.py  | 97 +++++++++++++++++++
 tests/unit/scrapers/test_weibo.py             | 25 ++---
 tests/unit/scrapers/test_xiaohongshu.py       | 29 +++---
 .../unit/telegram_bot/test_webhook_server.py  | 94 ++++++++++++++++++
 tests/unit/test_api_exception_handlers.py     | 95 ++++++++++++++++++
 tests/unit/test_exceptions.py                 | 82 ++++++++++++++++
 14 files changed, 554 insertions(+), 33 deletions(-)
 create mode 100644 tests/unit/file_export/test_pdf_export_exceptions.py
 create mode 100644 tests/unit/file_export/test_transcribe_exceptions.py
 create mode 100644 tests/unit/scrapers/test_twitter_exceptions.py
 create mode 100644 tests/unit/telegram_bot/test_webhook_server.py
 create mode 100644 tests/unit/test_api_exception_handlers.py
 create mode 100644 tests/unit/test_exceptions.py

diff --git a/tests/unit/file_export/test_pdf_export_exceptions.py b/tests/unit/file_export/test_pdf_export_exceptions.py
new file mode 100644
index 0000000..878e582
--- /dev/null
+++ b/tests/unit/file_export/test_pdf_export_exceptions.py
@@ -0,0 +1,52 @@
+"""Tests for exception handling in packages/file-export/fastfetchbot_file_export/pdf_export.py"""
+
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from fastfetchbot_shared.exceptions import FileExportError
+
+
+@pytest.fixture(autouse=True)
+def mock_weasyprint(monkeypatch):
+    """Mock weasyprint to avoid native library dependency."""
+    mock_module = MagicMock()
+    monkeypatch.setitem(sys.modules, "weasyprint", mock_module)
+    monkeypatch.setitem(sys.modules, "weasyprint.text", MagicMock())
+    monkeypatch.setitem(sys.modules, "weasyprint.text.fonts", MagicMock())
+    # Force reimport of pdf_export with mocked weasyprint
+    if "fastfetchbot_file_export.pdf_export" in sys.modules:
+        del sys.modules["fastfetchbot_file_export.pdf_export"]
+
+
+class TestExportPdfExceptionHandling:
+    def test_raises_file_export_error_on_failure(self):
+        from fastfetchbot_file_export.pdf_export import export_pdf
+
+        with patch(
+            "fastfetchbot_file_export.pdf_export.convert_html_to_pdf",
+            side_effect=RuntimeError("WeasyPrint crashed"),
+        ):
+            with pytest.raises(FileExportError, match="PDF export failed"):
+                export_pdf(html_string="<p>test</p>", output_filename="test.pdf")
+
+    def test_preserves_original_cause(self):
+        from fastfetchbot_file_export.pdf_export import export_pdf
+
+        original = OSError("disk full")
+        with patch(
+            "fastfetchbot_file_export.pdf_export.convert_html_to_pdf",
+            side_effect=original,
+        ):
+            with pytest.raises(FileExportError) as exc_info:
+                export_pdf(html_string="<p>x</p>", output_filename="out.pdf")
+            assert exc_info.value.__cause__ is original
+
+
+class TestConvertHtmlToPdf:
+    def test_no_input_raises_file_export_error(self):
+        from fastfetchbot_file_export.pdf_export import convert_html_to_pdf
+
+        with pytest.raises(FileExportError, match="Either html_string or html_file"):
+            convert_html_to_pdf("output.pdf")
diff --git a/tests/unit/file_export/test_transcribe_exceptions.py b/tests/unit/file_export/test_transcribe_exceptions.py
new file mode 100644
index 0000000..533be53
--- /dev/null
+++ b/tests/unit/file_export/test_transcribe_exceptions.py
@@ -0,0 +1,31 @@
+"""Tests for exception handling in packages/file-export/fastfetchbot_file_export/transcribe.py"""
+
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from fastfetchbot_shared.exceptions import FileExportError
+
+
+class TestGetAudioTextExceptionHandling:
+    def test_raises_file_export_error_on_failure(self):
+        from fastfetchbot_file_export.transcribe import get_audio_text
+
+        with patch(
+            "fastfetchbot_file_export.transcribe.OpenAI",
+            side_effect=RuntimeError("API key invalid"),
+        ):
+            with pytest.raises(FileExportError, match="Audio transcription failed"):
+                get_audio_text("/tmp/nonexistent.mp3", "fake-key")
+
+    def test_preserves_original_cause(self):
+        from fastfetchbot_file_export.transcribe import get_audio_text
+
+        original = RuntimeError("connection refused")
+        with patch(
+            "fastfetchbot_file_export.transcribe.OpenAI",
+            side_effect=original,
+        ):
+            with pytest.raises(FileExportError) as exc_info:
+                get_audio_text("/tmp/test.mp3", "key")
+            assert exc_info.value.__cause__ is original
diff --git a/tests/unit/file_export/test_video_download.py b/tests/unit/file_export/test_video_download.py
index 92099f3..b4c0842 100644
--- a/tests/unit/file_export/test_video_download.py
+++ b/tests/unit/file_export/test_video_download.py
@@ -426,3 +426,51 @@ def test_no_media_file_when_download_false(self, mock_celery):
         }
         vd._video_info_formatting(meta_info)
         assert vd.media_files == []
+
+
+# ---------------------------------------------------------------------------
+# Exception handling in fastfetchbot_file_export.video_download
+# ---------------------------------------------------------------------------
+
+
+class TestVideoDownloadExceptions:
+    def test_unsupported_extractor_raises_file_export_error(self):
+        from fastfetchbot_file_export.video_download import get_format_for_orientation
+        from fastfetchbot_shared.exceptions import FileExportError
+
+        with pytest.raises(FileExportError, match="no available extractor found"):
+            get_format_for_orientation("tiktok", "horizontal", False)
+
+    def test_download_video_wraps_errors_in_file_export_error(self):
+        from fastfetchbot_file_export.video_download import download_video
+        from fastfetchbot_shared.exceptions import FileExportError
+
+        with patch(
+            "fastfetchbot_file_export.video_download.init_yt_downloader"
+        ) as mock_init:
+            mock_ydl = MagicMock()
+            mock_ydl.__enter__ = MagicMock(return_value=mock_ydl)
+            mock_ydl.__exit__ = MagicMock(return_value=False)
+            mock_ydl.extract_info.side_effect = RuntimeError("network error")
+            mock_init.return_value = mock_ydl
+
+            with pytest.raises(FileExportError, match="download_video failed"):
+                download_video("https://youtube.com/watch?v=test", download=True)
+
+    def test_download_video_preserves_original_cause(self):
+        from fastfetchbot_file_export.video_download import download_video
+        from fastfetchbot_shared.exceptions import FileExportError
+
+        original = RuntimeError("connection reset")
+        with patch(
+            "fastfetchbot_file_export.video_download.init_yt_downloader"
+        ) as mock_init:
+            mock_ydl = MagicMock()
+            mock_ydl.__enter__ = MagicMock(return_value=mock_ydl)
+            mock_ydl.__exit__ = MagicMock(return_value=False)
+            mock_ydl.extract_info.side_effect = original
+            mock_init.return_value = mock_ydl
+
+            with pytest.raises(FileExportError) as exc_info:
+                download_video("https://youtube.com/watch?v=x", download=True)
+            assert exc_info.value.__cause__ is original
diff --git a/tests/unit/scrapers/test_bluesky.py b/tests/unit/scrapers/test_bluesky.py
index a5cbf1c..f0081d7 100644
--- a/tests/unit/scrapers/test_bluesky.py
+++ b/tests/unit/scrapers/test_bluesky.py
@@ -635,7 +635,7 @@ async def test_request_post_data_uses_handle_when_no_did(self):
 
     @pytest.mark.asyncio
     async def test_request_post_data_exception_handling(self):
-        """_request_post_data should log error and return None on exception."""
+        """_request_post_data should log error and re-raise on exception."""
         from fastfetchbot_shared.services.scrapers.bluesky.scraper import BlueskyScraper
 
         with patch(
@@ -652,5 +652,5 @@ async def test_request_post_data_exception_handling(self):
             bluesky_post.handle = "alice"
             bluesky_post.post_rkey = "rkey123"
 
-            result = await scraper._request_post_data(bluesky_post)
-            assert result is None
+            with pytest.raises(Exception, match="network error"):
+                await scraper._request_post_data(bluesky_post)
diff --git a/tests/unit/scrapers/test_common.py b/tests/unit/scrapers/test_common.py
index 051f270..4f613c7 100644
--- a/tests/unit/scrapers/test_common.py
+++ b/tests/unit/scrapers/test_common.py
@@ -6,6 +6,7 @@
 
 from fastfetchbot_shared.models.url_metadata import UrlMetadata
 from fastfetchbot_shared.services.scrapers.common import InfoExtractService
+from fastfetchbot_shared.exceptions import ScraperError
 
 
 # ---------------------------------------------------------------------------
@@ -215,3 +216,19 @@ async def test_process_item_no_strip_needed(self, make_url_metadata):
         svc = InfoExtractService(url_metadata=make_url_metadata())
         result = await svc.process_item({"title": "clean"})
         assert result["title"] == "clean"
+
+
+# ---------------------------------------------------------------------------
+# _resolve_scraper_class
+# ---------------------------------------------------------------------------
+
+class TestResolveScraperClass:
+    def test_known_category_returns_class(self, make_url_metadata):
+        svc = InfoExtractService(url_metadata=make_url_metadata(source="twitter"))
+        cls = svc._resolve_scraper_class("twitter")
+        assert cls is not None
+
+    def test_unknown_category_raises_scraper_error(self, make_url_metadata):
+        svc = InfoExtractService(url_metadata=make_url_metadata())
+        with pytest.raises(ScraperError, match="No scraper registered"):
+            svc._resolve_scraper_class("tiktok")
diff --git a/tests/unit/scrapers/test_general_firecrawl.py b/tests/unit/scrapers/test_general_firecrawl.py
index d095519..a38d46a 100644
--- a/tests/unit/scrapers/test_general_firecrawl.py
+++ b/tests/unit/scrapers/test_general_firecrawl.py
@@ -9,6 +9,7 @@
     FirecrawlClient,
     FirecrawlSettings,
 )
+from fastfetchbot_shared.exceptions import ExternalServiceError
 
 
 # ---------------------------------------------------------------------------
@@ -166,7 +167,7 @@ async def test_scrape_url_exception(self, mock_fc_cls):
         mock_app.scrape.side_effect = Exception("network error")
 
         client = FirecrawlClient.get_instance()
-        with pytest.raises(RuntimeError, match="Firecrawl scrape_url failed"):
+        with pytest.raises(ExternalServiceError, match="Firecrawl scrape_url failed"):
             await client.scrape_url(url="https://fail.com")
 
 
diff --git a/tests/unit/scrapers/test_general_zyte.py b/tests/unit/scrapers/test_general_zyte.py
index 8695986..33f5861 100644
--- a/tests/unit/scrapers/test_general_zyte.py
+++ b/tests/unit/scrapers/test_general_zyte.py
@@ -8,6 +8,7 @@
     ZyteDataProcessor,
     ZyteScraper,
 )
+from fastfetchbot_shared.exceptions import ExternalServiceError
 
 
 # ---------------------------------------------------------------------------
@@ -36,7 +37,7 @@ class TestZyteGetPageContent:
     )
     async def test_no_api_key_raises(self):
         proc = ZyteDataProcessor("https://example.com")
-        with pytest.raises(RuntimeError, match="ZYTE_API_KEY is not configured"):
+        with pytest.raises(ExternalServiceError, match="ZYTE_API_KEY is not configured"):
             await proc._get_page_content()
 
     @pytest.mark.asyncio
diff --git a/tests/unit/scrapers/test_scraper_manager.py b/tests/unit/scrapers/test_scraper_manager.py
index 24b0cc2..ec6d34c 100644
--- a/tests/unit/scrapers/test_scraper_manager.py
+++ b/tests/unit/scrapers/test_scraper_manager.py
@@ -5,6 +5,7 @@
 import pytest
 
 from fastfetchbot_shared.services.scrapers.scraper_manager import ScraperManager
+from fastfetchbot_shared.exceptions import ScraperError
 
 
 # ---------------------------------------------------------------------------
@@ -124,8 +125,8 @@ async def test_init_unknown_when_already_initialized(self):
 
 class TestInitScraperUnsupported:
     @pytest.mark.asyncio
-    async def test_unsupported_category_raises_value_error(self):
-        with pytest.raises(ValueError, match="not supported"):
+    async def test_unsupported_category_raises_scraper_error(self):
+        with pytest.raises(ScraperError, match="not supported"):
             await ScraperManager.init_scraper("tiktok")
 
 
diff --git a/tests/unit/scrapers/test_twitter_exceptions.py b/tests/unit/scrapers/test_twitter_exceptions.py
new file mode 100644
index 0000000..6826730
--- /dev/null
+++ b/tests/unit/scrapers/test_twitter_exceptions.py
@@ -0,0 +1,97 @@
+"""Tests for exception handling in Twitter scraper."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from fastfetchbot_shared.exceptions import ScraperError, ScraperParseError
+
+
+class TestGetResponseTweetData:
+    @pytest.mark.asyncio
+    async def test_all_scrapers_fail_raises_scraper_error(self):
+        from fastfetchbot_shared.services.scrapers.twitter import Twitter
+
+        tw = Twitter(url="https://twitter.com/user/status/123")
+
+        with patch.object(
+            tw, "_rapidapi_get_response_tweet_data",
+            new_callable=AsyncMock,
+            side_effect=ScraperParseError("API error"),
+        ), patch.object(
+            tw, "_api_client_get_response_tweet_data",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("client error"),
+        ):
+            with pytest.raises(ScraperError, match="No valid response from all Twitter scrapers"):
+                await tw._get_response_tweet_data()
+
+    @pytest.mark.asyncio
+    async def test_first_scraper_fails_falls_through(self):
+        from fastfetchbot_shared.services.scrapers.twitter import Twitter
+
+        tw = Twitter(url="https://twitter.com/user/status/456")
+        fake_data = {
+            "data": {
+                "threaded_conversation_with_injections_v2": {
+                    "instructions": [{"entries": []}]
+                }
+            }
+        }
+
+        with patch.object(
+            tw, "_rapidapi_get_response_tweet_data",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("first failed"),
+        ), patch.object(
+            tw, "_api_client_get_response_tweet_data",
+            new_callable=AsyncMock,
+            return_value=fake_data,
+        ):
+            # Should fall through to api-client and succeed
+            result = await tw._get_response_tweet_data()
+            assert result == fake_data
+
+
+class TestRapidapiResponseValidation:
+    @pytest.mark.asyncio
+    async def test_error_response_raises_scraper_parse_error(self):
+        from fastfetchbot_shared.services.scrapers.twitter import Twitter
+        import httpx
+
+        tw = Twitter(url="https://twitter.com/user/status/789")
+        tw.scraper = "Twitter135"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"errors": [{"message": "rate limited"}]}
+
+        with patch("httpx.AsyncClient") as MockClient:
+            mock_client = AsyncMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            MockClient.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            MockClient.return_value.__aexit__ = AsyncMock(return_value=False)
+
+            with patch.object(tw, "_get_request_headers"):
+                with pytest.raises(ScraperParseError, match="Invalid response"):
+                    await tw._rapidapi_get_response_tweet_data()
+
+    @pytest.mark.asyncio
+    async def test_non_200_raises_scraper_parse_error(self):
+        from fastfetchbot_shared.services.scrapers.twitter import Twitter
+
+        tw = Twitter(url="https://twitter.com/user/status/000")
+        tw.scraper = "Twitter135"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 500
+
+        with patch("httpx.AsyncClient") as MockClient:
+            mock_client = AsyncMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            MockClient.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            MockClient.return_value.__aexit__ = AsyncMock(return_value=False)
+
+            with patch.object(tw, "_get_request_headers"):
+                with pytest.raises(ScraperParseError, match="Invalid response"):
+                    await tw._rapidapi_get_response_tweet_data()
diff --git a/tests/unit/scrapers/test_weibo.py b/tests/unit/scrapers/test_weibo.py
index 8b2230e..d19461c 100644
--- a/tests/unit/scrapers/test_weibo.py
+++ b/tests/unit/scrapers/test_weibo.py
@@ -24,6 +24,7 @@
 from typing import Dict
 
 from fastfetchbot_shared.models.metadata_item import MediaFile, MessageType
+from fastfetchbot_shared.exceptions import ScraperError, ScraperNetworkError, ScraperParseError
 
 
 # ---------------------------------------------------------------------------
@@ -218,7 +219,7 @@ def test_lxml_method(self):
 
     def test_invalid_method_raises(self):
         from fastfetchbot_shared.services.scrapers.weibo.scraper import WeiboDataProcessor
-        with pytest.raises(ValueError, match="method must be bs4 or lxml"):
+        with pytest.raises(ScraperError, match="method must be bs4 or lxml"):
             WeiboDataProcessor._weibo_html_text_clean("<p>test</p>", method="invalid")
 
 
@@ -803,7 +804,7 @@ async def test_invalid_method_raises(self):
             mock_env.get_template.return_value = mock_template
             from fastfetchbot_shared.services.scrapers.weibo.scraper import WeiboDataProcessor
             wp = WeiboDataProcessor(url="https://m.weibo.cn/detail/123")
-        with pytest.raises(ValueError, match="method must be webpage or api"):
+        with pytest.raises(ScraperError, match="method must be webpage or api"):
             await wp._get_weibo_info(method="invalid")
 
     @pytest.mark.asyncio
@@ -827,7 +828,7 @@ async def test_connection_error_propagated(self):
             from fastfetchbot_shared.services.scrapers.weibo.scraper import WeiboDataProcessor
             wp = WeiboDataProcessor(url="https://m.weibo.cn/detail/123")
         with patch.object(wp, "_get_weibo_info_api", new_callable=AsyncMock, side_effect=ConnectionError("net fail")):
-            with pytest.raises(ConnectionError, match="network issues"):
+            with pytest.raises(ScraperNetworkError, match="network issues"):
                 await wp._get_weibo_info(method="api")
 
 
@@ -960,7 +961,7 @@ async def test_ok_zero_raises(self):
         mock_client.__aexit__ = AsyncMock(return_value=False)
 
         with patch("fastfetchbot_shared.services.scrapers.weibo.scraper.httpx.AsyncClient", return_value=mock_client):
-            with pytest.raises(ConnectionError):
+            with pytest.raises(ScraperParseError):
                 await wp._get_weibo_info_api()
 
     @pytest.mark.asyncio
@@ -983,7 +984,7 @@ async def test_empty_response_raises(self):
         mock_client.__aexit__ = AsyncMock(return_value=False)
 
         with patch("fastfetchbot_shared.services.scrapers.weibo.scraper.httpx.AsyncClient", return_value=mock_client):
-            with pytest.raises(ConnectionError):
+            with pytest.raises(ScraperParseError):
                 await wp._get_weibo_info_api()
 
     @pytest.mark.asyncio
@@ -1004,7 +1005,7 @@ async def test_http_error_raises(self):
         mock_client.__aexit__ = AsyncMock(return_value=False)
 
         with patch("fastfetchbot_shared.services.scrapers.weibo.scraper.httpx.AsyncClient", return_value=mock_client):
-            with pytest.raises(ConnectionError):
+            with pytest.raises(ScraperNetworkError):
                 await wp._get_weibo_info_api()
 
 
@@ -1083,7 +1084,7 @@ async def side_effect(method=None):
             nonlocal call_count
             call_count += 1
             if call_count == 1:
-                raise ConnectionError("api fail")
+                raise ScraperNetworkError("api fail")
             return weibo_info
 
         with patch.object(wp, "_get_weibo_info", new_callable=AsyncMock, side_effect=side_effect):
@@ -1100,12 +1101,12 @@ async def test_both_fail_raises(self):
             from fastfetchbot_shared.services.scrapers.weibo.scraper import WeiboDataProcessor
             wp = WeiboDataProcessor(url="https://m.weibo.cn/detail/123")
 
-        with patch.object(wp, "_get_weibo_info", new_callable=AsyncMock, side_effect=ConnectionError("fail")):
-            with pytest.raises(ConnectionError):
+        with patch.object(wp, "_get_weibo_info", new_callable=AsyncMock, side_effect=ScraperNetworkError("fail")):
+            with pytest.raises(ScraperNetworkError):
                 await wp._get_weibo()
 
     @pytest.mark.asyncio
-    async def test_process_item_exception_caught(self):
+    async def test_process_item_exception_reraised(self):
         with patch("fastfetchbot_shared.services.scrapers.weibo.scraper.JINJA2_ENV") as mock_env:
             mock_template = MagicMock()
             mock_template.render.return_value = "<p>rendered</p>"
@@ -1115,8 +1116,8 @@ async def test_process_item_exception_caught(self):
 
         with patch.object(wp, "_get_weibo_info", new_callable=AsyncMock, return_value={"id": "123"}):
             with patch.object(wp, "_process_weibo_item", new_callable=AsyncMock, side_effect=Exception("process fail")):
-                # Should not raise, exception is caught and logged
-                await wp._get_weibo()
+                with pytest.raises(Exception, match="process fail"):
+                    await wp._get_weibo()
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/scrapers/test_xiaohongshu.py b/tests/unit/scrapers/test_xiaohongshu.py
index eca8e50..6fadc8e 100644
--- a/tests/unit/scrapers/test_xiaohongshu.py
+++ b/tests/unit/scrapers/test_xiaohongshu.py
@@ -19,6 +19,7 @@
     XHS_WEB_URL,
 )
 from fastfetchbot_shared.models.metadata_item import MessageType, MediaFile
+from fastfetchbot_shared.exceptions import ScraperError, ScraperParseError, ExternalServiceError
 
 
 # ---------------------------------------------------------------------------
@@ -48,19 +49,19 @@ def test_nested_path(self):
         assert result["note_id"] == "note456"
 
     def test_empty_path_raises(self):
-        with pytest.raises(ValueError, match="Invalid XHS note URL"):
+        with pytest.raises(ScraperParseError, match="Invalid XHS note URL"):
             parse_xhs_note_url("https://www.xiaohongshu.com/")
 
     def test_explore_only_raises(self):
-        with pytest.raises(ValueError, match="Invalid XHS note URL path"):
+        with pytest.raises(ScraperParseError, match="Invalid XHS note URL path"):
             parse_xhs_note_url("https://www.xiaohongshu.com/explore")
 
     def test_discovery_only_raises(self):
-        with pytest.raises(ValueError, match="Invalid XHS note URL path"):
+        with pytest.raises(ScraperParseError, match="Invalid XHS note URL path"):
             parse_xhs_note_url("https://www.xiaohongshu.com/discovery")
 
     def test_item_only_raises(self):
-        with pytest.raises(ValueError, match="Invalid XHS note URL path"):
+        with pytest.raises(ScraperParseError, match="Invalid XHS note URL path"):
             parse_xhs_note_url("https://www.xiaohongshu.com/item")
 
 
@@ -101,7 +102,7 @@ def test_strips_trailing_slash(self):
 
     @patch("fastfetchbot_shared.config.settings.SIGN_SERVER_URL", "")
     def test_no_sign_server_raises(self):
-        with pytest.raises(ValueError, match="sign server URL"):
+        with pytest.raises(ExternalServiceError, match="sign server URL"):
             XhsSinglePostAdapter(cookies="c=1", sign_server_endpoint="")
 
     @patch("fastfetchbot_shared.config.settings.SIGN_SERVER_URL", "http://fallback:8989")
@@ -193,7 +194,7 @@ async def test_not_ok_raises(self):
         adapter._http = AsyncMock()
         adapter._http.post = AsyncMock(return_value=mock_resp)
 
-        with pytest.raises(RuntimeError, match="sign server returned error"):
+        with pytest.raises(ExternalServiceError, match="sign server returned error"):
             await adapter._sign_headers("/api/test")
 
     @pytest.mark.asyncio
@@ -210,7 +211,7 @@ async def test_missing_fields_raises(self):
         adapter._http = AsyncMock()
         adapter._http.post = AsyncMock(return_value=mock_resp)
 
-        with pytest.raises(RuntimeError, match="missing fields"):
+        with pytest.raises(ExternalServiceError, match="missing fields"):
             await adapter._sign_headers("/api/test")
 
     @pytest.mark.asyncio
@@ -224,7 +225,7 @@ async def test_none_data_uses_empty_dict(self):
         adapter._http = AsyncMock()
         adapter._http.post = AsyncMock(return_value=mock_resp)
 
-        with pytest.raises(RuntimeError, match="missing fields"):
+        with pytest.raises(ExternalServiceError, match="missing fields"):
             await adapter._sign_headers("/api/test")
 
     @pytest.mark.asyncio
@@ -273,7 +274,7 @@ def test_non_json_raises(self):
         resp = MagicMock(spec=httpx.Response)
         resp.status_code = 200
         resp.json.side_effect = json.JSONDecodeError("err", "", 0)
-        with pytest.raises(RuntimeError, match="non-JSON"):
+        with pytest.raises(ScraperParseError, match="non-JSON"):
             XhsSinglePostAdapter._parse_api_response(resp)
 
     def test_captcha_461(self):
@@ -281,7 +282,7 @@ def test_captcha_461(self):
             461, {"success": False},
             headers={"Verifytype": "captcha", "Verifyuuid": "uuid1"},
         )
-        with pytest.raises(RuntimeError, match="captcha"):
+        with pytest.raises(ScraperError, match="captcha"):
             XhsSinglePostAdapter._parse_api_response(resp)
 
     def test_captcha_471(self):
@@ -289,12 +290,12 @@ def test_captcha_471(self):
             471, {"success": False},
             headers={"Verifytype": "sms", "Verifyuuid": "uuid2"},
         )
-        with pytest.raises(RuntimeError, match="captcha"):
+        with pytest.raises(ScraperError, match="captcha"):
             XhsSinglePostAdapter._parse_api_response(resp)
 
     def test_api_error_not_success(self):
         resp = self._make_response(200, {"success": False, "msg": "error"})
-        with pytest.raises(RuntimeError, match="XHS API error"):
+        with pytest.raises(ScraperParseError, match="XHS API error"):
             XhsSinglePostAdapter._parse_api_response(resp)
 
 
@@ -951,7 +952,7 @@ async def test_both_fail_raises(self):
         adapter._fetch_note_by_html = AsyncMock(return_value=None)
 
         url = f"{XHS_WEB_URL}/explore/n1"
-        with pytest.raises(RuntimeError, match="Cannot fetch note"):
+        with pytest.raises(ScraperError, match="Cannot fetch note"):
             await adapter.fetch_post(note_url=url)
 
     @pytest.mark.asyncio
@@ -1292,7 +1293,7 @@ async def test_not_xhs_raises(self):
             mock_client.__aexit__ = AsyncMock(return_value=None)
             MockClient.return_value = mock_client
 
-            with pytest.raises(RuntimeError, match="did not redirect to xiaohongshu.com"):
+            with pytest.raises(ScraperError, match="did not redirect to xiaohongshu.com"):
                 await adapter._get_redirection_url("https://xhslink.com/abc")
 
 
diff --git a/tests/unit/telegram_bot/test_webhook_server.py b/tests/unit/telegram_bot/test_webhook_server.py
new file mode 100644
index 0000000..db16adc
--- /dev/null
+++ b/tests/unit/telegram_bot/test_webhook_server.py
@@ -0,0 +1,94 @@
+"""Tests for apps/telegram-bot/core/webhook/server.py"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+class TestLogTaskException:
+    def test_logs_exception_from_failed_task(self):
+        from core.webhook.server import _log_task_exception
+
+        mock_task = MagicMock(spec=asyncio.Task)
+        mock_task.cancelled.return_value = False
+        test_exc = RuntimeError("task boom")
+        mock_task.exception.return_value = test_exc
+
+        with patch("core.webhook.server.logger") as mock_logger:
+            _log_task_exception(mock_task)
+            mock_logger.exception.assert_called_once()
+
+    def test_does_not_log_cancelled_task(self):
+        from core.webhook.server import _log_task_exception
+
+        mock_task = MagicMock(spec=asyncio.Task)
+        mock_task.cancelled.return_value = True
+
+        with patch("core.webhook.server.logger") as mock_logger:
+            _log_task_exception(mock_task)
+            mock_logger.exception.assert_not_called()
+
+    def test_does_not_log_successful_task(self):
+        from core.webhook.server import _log_task_exception
+
+        mock_task = MagicMock(spec=asyncio.Task)
+        mock_task.cancelled.return_value = False
+        mock_task.exception.return_value = None
+
+        with patch("core.webhook.server.logger") as mock_logger:
+            _log_task_exception(mock_task)
+            mock_logger.exception.assert_not_called()
+
+
+class TestTelegramWebhook:
+    @pytest.mark.asyncio
+    async def test_invalid_json_returns_400(self):
+        from core.webhook.server import telegram_webhook
+
+        mock_request = MagicMock()
+        mock_request.headers = {"X-Telegram-Bot-Api-Secret-Token": ""}
+        mock_request.json = AsyncMock(side_effect=ValueError("bad json"))
+
+        with patch("core.webhook.server.settings") as mock_settings:
+            mock_settings.TELEGRAM_BOT_SECRET_TOKEN = ""
+            response = await telegram_webhook(mock_request)
+            assert response.status_code == 400
+
+    @pytest.mark.asyncio
+    async def test_unauthorized_returns_401(self):
+        from core.webhook.server import telegram_webhook
+
+        mock_request = MagicMock()
+        mock_request.headers = {"X-Telegram-Bot-Api-Secret-Token": "wrong"}
+
+        with patch("core.webhook.server.settings") as mock_settings:
+            mock_settings.TELEGRAM_BOT_SECRET_TOKEN = "correct"
+            response = await telegram_webhook(mock_request)
+            assert response.status_code == 401
+
+
+class TestSendMessageEndpoint:
+    @pytest.mark.asyncio
+    async def test_exception_returns_500(self):
+        from core.webhook.server import send_message_endpoint
+
+        mock_request = MagicMock()
+        mock_request.json = AsyncMock(side_effect=RuntimeError("db error"))
+
+        response = await send_message_endpoint(mock_request)
+        assert response.status_code == 500
+
+    @pytest.mark.asyncio
+    async def test_success_returns_ok(self):
+        from core.webhook.server import send_message_endpoint
+
+        mock_request = MagicMock()
+        mock_request.json = AsyncMock(return_value={
+            "data": {"title": "test"},
+            "chat_id": "123",
+        })
+
+        with patch("core.webhook.server.send_item_message", new_callable=AsyncMock):
+            response = await send_message_endpoint(mock_request)
+            assert response.status_code == 200
diff --git a/tests/unit/test_api_exception_handlers.py b/tests/unit/test_api_exception_handlers.py
new file mode 100644
index 0000000..ef1953c
--- /dev/null
+++ b/tests/unit/test_api_exception_handlers.py
@@ -0,0 +1,95 @@
+"""Tests for FastAPI global exception handlers in apps/api/src/main.py
+
+Uses a minimal FastAPI app with the same exception handlers to avoid
+needing to fully configure the real app's dependencies.
+"""
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from fastfetchbot_shared.exceptions import (
+    FastFetchBotError,
+    ScraperError,
+    ScraperParseError,
+    FileExportError,
+)
+
+
+def _create_test_app():
+    """Create a minimal FastAPI app with the same exception handlers as main.py."""
+    from fastapi import Request
+    from fastapi.responses import JSONResponse
+    from fastfetchbot_shared.utils.logger import logger
+
+    app = FastAPI()
+
+    @app.exception_handler(FastFetchBotError)
+    async def fastfetchbot_error_handler(request: Request, exc: FastFetchBotError):
+        logger.error(f"Domain error on {request.method} {request.url}: {exc}")
+        return JSONResponse(status_code=502, content={"error": str(exc)})
+
+    @app.exception_handler(Exception)
+    async def generic_error_handler(request: Request, exc: Exception):
+        logger.exception(f"Unhandled error on {request.method} {request.url}")
+        return JSONResponse(status_code=500, content={"error": "Internal server error"})
+
+    return app
+
+
+@pytest.fixture
+def test_app():
+    return _create_test_app()
+
+
+@pytest.fixture
+def client(test_app):
+    return TestClient(test_app, raise_server_exceptions=False)
+
+
+class TestFastFetchBotErrorHandler:
+    def test_scraper_error_returns_502(self, test_app, client):
+        @test_app.get("/test")
+        async def _():
+            raise ScraperError("Twitter API failed")
+
+        response = client.get("/test")
+        assert response.status_code == 502
+        assert "Twitter API failed" in response.json()["error"]
+
+    def test_scraper_parse_error_returns_502(self, test_app, client):
+        @test_app.get("/test-parse")
+        async def _():
+            raise ScraperParseError("Invalid response")
+
+        response = client.get("/test-parse")
+        assert response.status_code == 502
+
+    def test_file_export_error_returns_502(self, test_app, client):
+        @test_app.get("/test-export")
+        async def _():
+            raise FileExportError("PDF failed")
+
+        response = client.get("/test-export")
+        assert response.status_code == 502
+        assert "PDF failed" in response.json()["error"]
+
+
+class TestGenericExceptionHandler:
+    def test_unhandled_exception_returns_500(self, test_app, client):
+        @test_app.get("/test-crash")
+        async def _():
+            raise RuntimeError("unexpected crash")
+
+        response = client.get("/test-crash")
+        assert response.status_code == 500
+        assert response.json()["error"] == "Internal server error"
+
+    def test_generic_does_not_leak_details(self, test_app, client):
+        @test_app.get("/test-sensitive")
+        async def _():
+            raise ValueError("secret database password")
+
+        response = client.get("/test-sensitive")
+        assert response.status_code == 500
+        assert "secret" not in response.json()["error"]
diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py
new file mode 100644
index 0000000..5c685f3
--- /dev/null
+++ b/tests/unit/test_exceptions.py
@@ -0,0 +1,82 @@
+"""Tests for packages/shared/fastfetchbot_shared/exceptions.py"""
+
+import pytest
+
+from fastfetchbot_shared.exceptions import (
+    FastFetchBotError,
+    ScraperError,
+    ScraperNetworkError,
+    ScraperParseError,
+    TelegraphPublishError,
+    FileExportError,
+    ExternalServiceError,
+)
+
+
+class TestExceptionHierarchy:
+    def test_scraper_error_is_fastfetchbot_error(self):
+        assert issubclass(ScraperError, FastFetchBotError)
+
+    def test_scraper_network_error_is_scraper_error(self):
+        assert issubclass(ScraperNetworkError, ScraperError)
+
+    def test_scraper_parse_error_is_scraper_error(self):
+        assert issubclass(ScraperParseError, ScraperError)
+
+    def test_telegraph_publish_error_is_fastfetchbot_error(self):
+        assert issubclass(TelegraphPublishError, FastFetchBotError)
+
+    def test_file_export_error_is_fastfetchbot_error(self):
+        assert issubclass(FileExportError, FastFetchBotError)
+
+    def test_external_service_error_is_fastfetchbot_error(self):
+        assert issubclass(ExternalServiceError, FastFetchBotError)
+
+    def test_all_are_exceptions(self):
+        for cls in (
+            FastFetchBotError,
+            ScraperError,
+            ScraperNetworkError,
+            ScraperParseError,
+            TelegraphPublishError,
+            FileExportError,
+            ExternalServiceError,
+        ):
+            assert issubclass(cls, Exception)
+
+
+class TestExceptionCatching:
+    """Verify that catching a parent also catches children."""
+
+    def test_catch_fastfetchbot_catches_scraper_error(self):
+        with pytest.raises(FastFetchBotError):
+            raise ScraperError("test")
+
+    def test_catch_scraper_catches_network_error(self):
+        with pytest.raises(ScraperError):
+            raise ScraperNetworkError("network fail")
+
+    def test_catch_scraper_catches_parse_error(self):
+        with pytest.raises(ScraperError):
+            raise ScraperParseError("parse fail")
+
+    def test_catch_fastfetchbot_catches_file_export_error(self):
+        with pytest.raises(FastFetchBotError):
+            raise FileExportError("export fail")
+
+    def test_catch_fastfetchbot_catches_external_service_error(self):
+        with pytest.raises(FastFetchBotError):
+            raise ExternalServiceError("service fail")
+
+
+class TestExceptionChaining:
+    def test_from_preserves_original_cause(self):
+        original = ValueError("original")
+        try:
+            raise ScraperError("wrapped") from original
+        except ScraperError as e:
+            assert e.__cause__ is original
+
+    def test_message_preserved(self):
+        exc = ScraperParseError("bad data")
+        assert str(exc) == "bad data"

From dcbeb3418f6b69f548a71ccd0883fc1638ea500b Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Sat, 28 Mar 2026 01:23:03 -0500
Subject: [PATCH 4/6] feat: update CLAUDE.md

---
 CLAUDE.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index de0732c..b8184e0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -183,6 +183,23 @@ GitHub Actions (`.github/workflows/ci.yml`) builds and pushes all four images on
 6. Add platform-specific router in `apps/api/src/routers/` (if API endpoints are needed)
 7. Add any new pip dependencies to `packages/shared/pyproject.toml` under `[project.optional-dependencies] scrapers`
 
+### Exception Handling
+- **Custom exceptions** are defined in `packages/shared/fastfetchbot_shared/exceptions.py`:
+  - `FastFetchBotError` — base for all domain errors
+  - `ScraperError` / `ScraperNetworkError` / `ScraperParseError` — scraper failures
+  - `TelegraphPublishError` — Telegraph publishing failures
+  - `FileExportError` — file export (PDF, video, audio) failures
+  - `ExternalServiceError` — external service call failures (OpenAI, Firecrawl, Zyte, XHS sign server, etc.)
+- **Always use typed exceptions** instead of generic `RuntimeError`, `ValueError`, or `Exception` for domain errors. Pick the most specific subclass that fits.
+- **Use `from e` chaining** when wrapping exceptions: `raise ScraperError("message") from e`
+- **Boundary-level handlers** catch exceptions at service boundaries:
+  - FastAPI: global `@app.exception_handler(FastFetchBotError)` returns 502, generic `Exception` returns 500
+  - Telegram bot: `error_process` handler catches handler exceptions; webhook server protects endpoints
+  - Celery/ARQ workers: existing task-level try/catch with outbox error push
+- **Never use `print()` or `traceback.print_exc()`** — always use `logger.exception()` (includes traceback) or `logger.error()` (message only)
+- **Never silently swallow exceptions** — if catching an exception, either re-raise it or handle it explicitly with logging. Do not return `None` or empty data on failure.
+- **Fail fast after fallback chains** — scrapers may try multiple methods/APIs, but must raise a typed error when all fallbacks are exhausted
+
 ### Key Conventions
 - **`packages/shared/` (`fastfetchbot-shared`)** is for shared async logic — scrapers, templates, Telegraph, and async Celery task wrappers (file_export). Most code here is async and reusable across apps
 - **`packages/file-export/` (`fastfetchbot-file-export`)** is exclusively for synchronous Celery worker jobs — the heavy I/O operations that run inside the Celery worker process (yt-dlp video download, WeasyPrint PDF generation, OpenAI audio transcription). Apps never import this package directly; they use the async wrappers in `fastfetchbot_shared.services.file_export` which submit tasks to the Celery worker

From 6389f0e1ad48eac9b99188d2b3d5f60d8e1bb356 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Sat, 28 Mar 2026 01:25:42 -0500
Subject: [PATCH 5/6] feat: add unit tests for new code

---
 .../file_export/test_transcribe_exceptions.py |  72 +++++++-
 .../unit/telegram_bot/test_message_sender.py  | 154 ++++++++++++++++++
 tests/unit/test_network.py                    | 103 ++++++++++++
 3 files changed, 328 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/telegram_bot/test_message_sender.py
 create mode 100644 tests/unit/test_network.py

diff --git a/tests/unit/file_export/test_transcribe_exceptions.py b/tests/unit/file_export/test_transcribe_exceptions.py
index 533be53..4461752 100644
--- a/tests/unit/file_export/test_transcribe_exceptions.py
+++ b/tests/unit/file_export/test_transcribe_exceptions.py
@@ -1,6 +1,6 @@
 """Tests for exception handling in packages/file-export/fastfetchbot_file_export/transcribe.py"""
 
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch, MagicMock, mock_open
 
 import pytest
 
@@ -29,3 +29,73 @@ def test_preserves_original_cause(self):
             with pytest.raises(FileExportError) as exc_info:
                 get_audio_text("/tmp/test.mp3", "key")
             assert exc_info.value.__cause__ is original
+
+
+class TestGetAudioTextHappyPath:
+    def test_successful_transcription(self):
+        from fastfetchbot_file_export.transcribe import get_audio_text
+
+        # Mock OpenAI client
+        mock_client = MagicMock()
+        mock_transcription = MagicMock()
+        mock_transcription.text = "hello world"
+        mock_client.audio.transcriptions.create.return_value = mock_transcription
+
+        mock_punctuation_response = MagicMock()
+        mock_punctuation_response.choices = [MagicMock()]
+        mock_punctuation_response.choices[0].message.content = "Hello, world."
+
+        mock_summary_response = MagicMock()
+        mock_summary_response.choices = [MagicMock()]
+        mock_summary_response.choices[0].message.content = "A greeting."
+
+        # Track which call we're on
+        call_count = [0]
+        def chat_side_effect(**kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return mock_punctuation_response
+            return mock_summary_response
+
+        mock_client.chat.completions.create.side_effect = chat_side_effect
+
+        # Mock AudioSegment
+        mock_audio = MagicMock()
+        mock_audio.__getitem__ = MagicMock(return_value=mock_audio)
+        mock_audio.duration_seconds = 10  # short audio, single segment
+        mock_audio.dBFS = -10  # above silence threshold
+        mock_audio.export = MagicMock()
+
+        mock_segment = MagicMock()
+        mock_segment.dBFS = -10
+        mock_audio.__getitem__.return_value = mock_segment
+        mock_segment.export = MagicMock()
+
+        with patch("fastfetchbot_file_export.transcribe.OpenAI", return_value=mock_client), \
+             patch("fastfetchbot_file_export.transcribe.AudioSegment") as MockAudioSegment, \
+             patch("fastfetchbot_file_export.transcribe.os.remove") as mock_remove, \
+             patch("fastfetchbot_file_export.transcribe.os.path.splitext", return_value=("/tmp/audio", ".mp3")), \
+             patch("builtins.open", mock_open(read_data=b"audio data")), \
+             patch("fastfetchbot_file_export.transcribe.milliseconds_until_sound", return_value=0):
+
+            MockAudioSegment.from_file.return_value = mock_audio
+
+            result = get_audio_text("/tmp/audio.mp3", "test-api-key")
+
+        assert "Hello, world." in result
+        assert "A greeting." in result
+        # Should have cleaned up the original file
+        mock_remove.assert_called()
+
+    def test_audio_segment_failure_wraps_in_file_export_error(self):
+        from fastfetchbot_file_export.transcribe import get_audio_text
+
+        mock_client = MagicMock()
+
+        with patch("fastfetchbot_file_export.transcribe.OpenAI", return_value=mock_client), \
+             patch("fastfetchbot_file_export.transcribe.AudioSegment") as MockAudioSegment:
+
+            MockAudioSegment.from_file.side_effect = FileNotFoundError("no such file")
+
+            with pytest.raises(FileExportError, match="Audio transcription failed"):
+                get_audio_text("/tmp/missing.mp3", "key")
diff --git a/tests/unit/telegram_bot/test_message_sender.py b/tests/unit/telegram_bot/test_message_sender.py
new file mode 100644
index 0000000..962777b
--- /dev/null
+++ b/tests/unit/telegram_bot/test_message_sender.py
@@ -0,0 +1,154 @@
+"""Tests for exception handling in apps/telegram-bot/core/services/message_sender.py"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+class TestMediaFilesPackagingDownloadFailure:
+    """Test that download failures in media_files_packaging are caught and skipped."""
+
+    @pytest.mark.asyncio
+    async def test_http_download_failure_skips_media(self):
+        from core.services.message_sender import media_files_packaging
+
+        media_files = [
+            {"media_type": "image", "url": "https://example.com/img.jpg", "caption": ""},
+        ]
+        data = {
+            "url": "https://example.com",
+            "category": "twitter",
+            "message_type": "short",
+        }
+
+        with patch(
+            "core.services.message_sender.download_file_by_metadata_item",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("network error"),
+        ):
+            media_group, file_group = await media_files_packaging(media_files, data)
+
+        # Media item should be skipped, not crash
+        assert media_group == []
+        assert file_group == []
+
+    @pytest.mark.asyncio
+    async def test_gif_download_failure_skips_media(self):
+        from core.services.message_sender import media_files_packaging
+
+        media_files = [
+            {"media_type": "gif", "url": "https://example.com/anim.gif", "caption": ""},
+        ]
+        data = {
+            "url": "https://example.com",
+            "category": "twitter",
+            "message_type": "short",
+        }
+
+        with patch(
+            "core.services.message_sender.download_file_by_metadata_item",
+            new_callable=AsyncMock,
+            side_effect=ConnectionError("timeout"),
+        ):
+            media_group, file_group = await media_files_packaging(media_files, data)
+
+        assert media_group == []
+        assert file_group == []
+
+    @pytest.mark.asyncio
+    async def test_image_document_download_failure_skips(self):
+        """Test the second download_file_by_metadata_item call for large image documents."""
+        from core.services.message_sender import media_files_packaging
+
+        mock_io = MagicMock()
+        mock_io.name = "media-abc.jpg"
+        mock_io.size = 15 * 1024 * 1024  # 15MB, over image size limit to trigger document path
+
+        mock_image = MagicMock()
+        mock_image.size = (5000, 5000)  # large image to trigger document download
+        mock_image.width = 5000
+        mock_image.height = 5000
+
+        call_count = [0]
+
+        async def download_side_effect(*args, **kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return mock_io
+            raise RuntimeError("second download failed")
+
+        media_files = [
+            {"media_type": "image", "url": "https://example.com/big.jpg", "caption": ""},
+        ]
+        data = {
+            "url": "https://example.com",
+            "category": "twitter",
+            "message_type": "short",
+        }
+
+        with patch(
+            "core.services.message_sender.download_file_by_metadata_item",
+            new_callable=AsyncMock,
+            side_effect=download_side_effect,
+        ), patch(
+            "core.services.message_sender.check_image_type",
+            new_callable=AsyncMock,
+            return_value="jpg",
+        ), patch(
+            "core.services.message_sender.Image"
+        ) as MockImage, patch(
+            "core.services.message_sender.image_compressing",
+            return_value=mock_image,
+        ), patch(
+            "core.services.message_sender.settings"
+        ) as mock_settings:
+            MockImage.open.return_value = mock_image
+            mock_settings.TELEBOT_API_SERVER = None
+            mock_settings.TELEGRAM_IMAGE_DIMENSION_LIMIT = 2000
+            mock_settings.TELEGRAM_IMAGE_SIZE_LIMIT = 10 * 1024 * 1024  # 10MB
+
+            media_group, file_group = await media_files_packaging(media_files, data)
+
+        # The image media_group should have the photo, but file_group should be empty
+        # because the document download failed and was skipped
+        assert file_group == []
+
+
+class TestSendItemMessageExceptionHandling:
+    @pytest.mark.asyncio
+    async def test_exception_logged_and_sent_to_debug_channel(self):
+        from core.services.message_sender import send_item_message
+
+        mock_app = MagicMock()
+        mock_bot = AsyncMock()
+        mock_app.bot = mock_bot
+
+        mock_chat = MagicMock()
+        mock_chat.type = "private"
+        mock_chat.linked_chat_id = None
+        mock_bot.get_chat = AsyncMock(return_value=mock_chat)
+        mock_bot.send_message = AsyncMock(side_effect=RuntimeError("telegram API down"))
+
+        with patch("core.services.message_sender._get_application", return_value=mock_app), \
+             patch("core.services.message_sender.send_debug_channel", new_callable=AsyncMock) as mock_debug:
+
+            # Should not raise — the exception is caught and logged
+            await send_item_message(
+                data={
+                    "media_files": [],
+                    "message_type": "short",
+                    "text": "test",
+                    "title": "Test",
+                    "author": "a",
+                    "author_url": "",
+                    "url": "https://example.com",
+                    "telegraph_url": "",
+                    "category": "twitter",
+                    "content": "",
+                },
+                chat_id=12345,
+            )
+
+            # Debug channel should have been called with the traceback
+            mock_debug.assert_awaited_once()
+            assert "telegram API down" in mock_debug.call_args[0][0]
diff --git a/tests/unit/test_network.py b/tests/unit/test_network.py
new file mode 100644
index 0000000..4cd264f
--- /dev/null
+++ b/tests/unit/test_network.py
@@ -0,0 +1,103 @@
+"""Tests for exception handling in packages/shared/fastfetchbot_shared/utils/network.py"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from fastfetchbot_shared.models.classes import NamedBytesIO
+
+
+class TestDownloadFileByMetadataItem:
+    @pytest.mark.asyncio
+    async def test_success_returns_named_bytes_io(self):
+        from fastfetchbot_shared.utils.network import download_file_by_metadata_item
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.content = b"fake image data"
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(return_value=mock_response)
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        with patch("fastfetchbot_shared.utils.network.httpx.AsyncClient", return_value=mock_client), \
+             patch("fastfetchbot_shared.utils.network.get_random_user_agent", return_value="TestAgent"):
+            result = await download_file_by_metadata_item(
+                url="https://example.com/image.jpg",
+                data={"url": "https://example.com", "category": "twitter"},
+            )
+
+        assert isinstance(result, NamedBytesIO)
+        assert result.name.endswith(".jpg")
+
+    @pytest.mark.asyncio
+    async def test_network_error_raises(self):
+        from fastfetchbot_shared.utils.network import download_file_by_metadata_item
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(side_effect=ConnectionError("connection refused"))
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        with patch("fastfetchbot_shared.utils.network.httpx.AsyncClient", return_value=mock_client), \
+             patch("fastfetchbot_shared.utils.network.get_random_user_agent", return_value="TestAgent"):
+            with pytest.raises(ConnectionError, match="connection refused"):
+                await download_file_by_metadata_item(
+                    url="https://example.com/image.jpg",
+                    data={"url": "https://example.com", "category": "twitter"},
+                )
+
+    @pytest.mark.asyncio
+    async def test_redirect_follows_location(self):
+        from fastfetchbot_shared.utils.network import download_file_by_metadata_item
+
+        mock_response = MagicMock()
+        mock_response.status_code = 302
+        mock_response.headers = {"Location": "https://cdn.example.com/real.jpg"}
+        mock_response.content = b"redirected content"
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(return_value=mock_response)
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        with patch("fastfetchbot_shared.utils.network.httpx.AsyncClient", return_value=mock_client), \
+             patch("fastfetchbot_shared.utils.network.get_random_user_agent", return_value="TestAgent"):
+            result = await download_file_by_metadata_item(
+                url="https://example.com/image.jpg",
+                data={"url": "https://example.com", "category": "twitter"},
+            )
+
+        assert isinstance(result, NamedBytesIO)
+
+
+class TestGetResponseJson:
+    @pytest.mark.asyncio
+    async def test_exception_returns_none_and_logs(self):
+        from fastfetchbot_shared.utils.network import get_response_json
+
+        with patch(
+            "fastfetchbot_shared.utils.network.get_response",
+            new_callable=AsyncMock,
+            side_effect=ConnectionError("timeout"),
+        ):
+            result = await get_response_json("https://example.com/api")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_json_decode_error_returns_none(self):
+        from fastfetchbot_shared.utils.network import get_response_json
+
+        mock_response = MagicMock()
+        mock_response.json.side_effect = ValueError("invalid json")
+
+        with patch(
+            "fastfetchbot_shared.utils.network.get_response",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ):
+            result = await get_response_json("https://example.com/api")
+
+        assert result is None

From 0697a131d2b28bc62626dd8183a1536da17941cb Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Sat, 28 Mar 2026 01:29:49 -0500
Subject: [PATCH 6/6] fix: code glitches

---
 apps/telegram-bot/core/services/message_sender.py            | 5 ++++-
 .../fastfetchbot_shared/services/scrapers/zhihu/__init__.py  | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/apps/telegram-bot/core/services/message_sender.py b/apps/telegram-bot/core/services/message_sender.py
index a2ccc76..1e6480d 100644
--- a/apps/telegram-bot/core/services/message_sender.py
+++ b/apps/telegram-bot/core/services/message_sender.py
@@ -169,11 +169,14 @@ async def send_item_message(
 
 
 async def send_debug_channel(message: str) -> None:
+    import html as html_module
     from core.config import TELEBOT_DEBUG_CHANNEL
     application = _get_application()
     if TELEBOT_DEBUG_CHANNEL is not None:
         await application.bot.send_message(
-            chat_id=TELEBOT_DEBUG_CHANNEL, text=message, parse_mode=ParseMode.HTML
+            chat_id=TELEBOT_DEBUG_CHANNEL,
+            text=html_module.escape(message),
+            parse_mode=ParseMode.HTML,
         )
 
 
diff --git a/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py b/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
index f616499..8f8923d 100644
--- a/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
+++ b/packages/shared/fastfetchbot_shared/services/scrapers/zhihu/__init__.py
@@ -559,12 +559,12 @@ async def _get_zhihu_article(self):
                     self.raw_content = fix_images_and_links(self.raw_content)
                     self.raw_content = unmask_zhihu_links(self.raw_content)
             except Exception as e:
-                raise ScraperError("zhihu request failed")
+                raise ScraperError("zhihu request failed") from e
         else:
             try:
                 selector = await get_selector(self.request_url, headers=self.headers)
             except Exception as e:
-                raise ScraperError("zhihu request failed")
+                raise ScraperError("zhihu request failed") from e
             if self.method == "json":
                 json_data = selector.xpath('string(//script[@id="js-initialData"])')
                 json_data = json.loads(json_data)