From aed03a138e0ceffc7023c33ffb21640e9fd5773c Mon Sep 17 00:00:00 2001 From: Nacai <111849193+B67687@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:35:50 +0800 Subject: [PATCH 1/4] Fix Chinese translation workflow --- .github/workflows/translate-notebooks.yml | 49 +++-- translate_notebooks.py | 238 ++++++++++++---------- 2 files changed, 166 insertions(+), 121 deletions(-) diff --git a/.github/workflows/translate-notebooks.yml b/.github/workflows/translate-notebooks.yml index f579be9..988b263 100644 --- a/.github/workflows/translate-notebooks.yml +++ b/.github/workflows/translate-notebooks.yml @@ -4,34 +4,59 @@ on: push: branches: - main + paths: + - "**/*.ipynb" + - "translate_notebooks.py" + - ".github/workflows/translate-notebooks.yml" + pull_request: + branches: + - main + paths: + - "**/*.ipynb" + - "translate_notebooks.py" + - ".github/workflows/translate-notebooks.yml" + workflow_dispatch: + +permissions: + contents: write jobs: translate: + if: github.actor != 'github-actions[bot]' runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: "3.12" - name: Install dependencies run: | - pip install nbformat requests # nbformat for reading/writing notebooks + pip install nbformat requests - name: Translate Notebooks env: QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }} + DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + QWEN_API_ENDPOINT: ${{ vars.QWEN_API_ENDPOINT }} + QWEN_MODEL: ${{ vars.QWEN_MODEL }} run: | - python translate-notebooks.py # Translation script + python translate_notebooks.py - - name: Commit and push changes + - name: Commit translated notebooks + if: github.event_name != 'pull_request' run: | - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git config --global user.name "GitHub Actions Bot" - git add . - git commit -m "Translate notebooks to Chinese" - git checkout -b translated # Create a new branch called 'translated' - git push origin translated # Push to the 'translated' branch \ No newline at end of file + if [ -z "$(git status --porcelain -- translated-notebooks)" ]; then + echo "No translated notebook changes to commit." + exit 0 + fi + git config user.email "github-actions[bot]@users.noreply.github.com" + git config user.name "GitHub Actions Bot" + git add translated-notebooks + git commit -m "chore: update Chinese notebook translations" + git push origin HEAD:main diff --git a/translate_notebooks.py b/translate_notebooks.py index 2a1321e..c171a6f 100644 --- a/translate_notebooks.py +++ b/translate_notebooks.py @@ -1,125 +1,145 @@ -import nbformat +from __future__ import annotations + +import logging import os +import time +from pathlib import Path + +import nbformat import requests -import json -import time # For rate limiting -import logging # For logging errors and debugging -from ratelimit import limits, sleep_and_retry # For rate limiting - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -# --- Qwen API Configuration --- -QWEN_API_ENDPOINT = "YOUR_QWEN_API_ENDPOINT" # Replace with the actual endpoint -QWEN_API_RATE_LIMIT_CALLS = 10 # Example: 10 calls -QWEN_API_RATE_LIMIT_PERIOD = 60 # Example: 60 seconds - -@sleep_and_retry -@limits(calls=QWEN_API_RATE_LIMIT_CALLS, period=QWEN_API_RATE_LIMIT_PERIOD) -def translate_text(text, api_key): - """Translates text using the Qwen API with rate limiting and error handling.""" - if not text: - return "" # Handle empty text gracefully - try: - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - } - - data = { - "text": text, - "target_language": "zh" # Chinese - } - - response = requests.post(QWEN_API_ENDPOINT, headers=headers, data=json.dumps(data), timeout=10) # Add timeout - response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) - result = response.json() - if "translated_text" in result: - return result["translated_text"] - else: - logging.warning(f"Translation API response missing 'translated_text': {result}") - return text # Return original text if translation fails - - except requests.exceptions.RequestException as e: - logging.error(f"Error during translation: {e}") - return text # Return original text on error - except json.JSONDecodeError as e: - logging.error(f"Error decoding JSON response: {e}") - return text - except Exception as e: - logging.exception(f"Unexpected error during translation: {e}") # Log full exception - return text +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -def translate_notebook(notebook_path, api_key, output_folder): - """Translates the text content of a Jupyter Notebook and saves it to a new file in the output folder, preserving the folder structure.""" - try: - with open(notebook_path, 'r', encoding='utf-8') as f: - try: - nb = nbformat.read(f, as_version=4) - except nbformat.reader.NotJSONError as e: - logging.error(f"Error reading notebook {notebook_path}: Invalid JSON format. Skipping.") - return - except Exception as e: - logging.error(f"Error reading notebook {notebook_path}: {e}. Skipping.") - return - - for cell in nb.cells: - if cell.cell_type == 'markdown': # Only translate markdown cells - try: - cell.source = translate_text(cell.source, api_key) - except Exception as e: - logging.error(f"Error translating cell in {notebook_path}: {e}") - # Consider whether to continue translating other cells or skip the notebook - - # Create the output path, preserving the folder structure - relative_path = os.path.relpath(notebook_path, ".") # Path relative to the root - translated_path = os.path.join(output_folder, relative_path) - translated_dir = os.path.dirname(translated_path) # Directory of the translated file - - # Create the directory if it doesn't exist - try: - os.makedirs(translated_dir, exist_ok=True) - except OSError as e: - logging.error(f"Error creating directory {translated_dir}: {e}") - return +ROOT = Path(__file__).resolve().parent +OUTPUT_DIR = ROOT / "translated-notebooks" +DEFAULT_ENDPOINT = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions" +DEFAULT_MODEL = "qwen-mt-turbo" +REQUEST_TIMEOUT_SECONDS = 60 +REQUEST_DELAY_SECONDS = float(os.environ.get("QWEN_REQUEST_DELAY_SECONDS", "0.3")) +SKIP_DIRS = { + ".git", + ".github", + ".ipynb_checkpoints", + "translated-notebooks", +} - # Write the translated notebook - try: - with open(translated_path, 'w', encoding='utf-8') as f: - nbformat.write(nb, f) - logging.info(f"Translated: {notebook_path} -> {translated_path}") - except Exception as e: - logging.error(f"Error writing translated notebook to {translated_path}: {e}") +class TranslationError(RuntimeError): + pass - except FileNotFoundError: - logging.error(f"File not found: {notebook_path}") - except Exception as e: - logging.exception(f"Unexpected error processing {notebook_path}: {e}") # Log full exception +def should_skip(path: Path) -> bool: + return any(part in SKIP_DIRS for part in path.parts) -if __name__ == "__main__": - api_key = os.environ.get("QWEN_API_KEY") - if not api_key: - logging.error("Error: QWEN_API_KEY not found in environment variables.") - exit(1) - output_folder = "translated_notebooks" # Define the output folder name +def get_api_key() -> str: + return os.environ.get("QWEN_API_KEY") or os.environ.get("DASHSCOPE_API_KEY", "") + + +def get_endpoint() -> str: + return os.environ.get("QWEN_API_ENDPOINT", DEFAULT_ENDPOINT) + - # Find all .ipynb files in the repository - notebook_files = [] - for root, _, files in os.walk("."): - for file in files: - if file.endswith(".ipynb"): - notebook_files.append(os.path.join(root, file)) +def get_model() -> str: + return os.environ.get("QWEN_MODEL", DEFAULT_MODEL) - if not notebook_files: + +def translate_text(text: str, api_key: str) -> str: + if not text or not text.strip(): + return text + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + payload = { + "model": get_model(), + "messages": [{"role": "user", "content": text}], + "translation_options": { + "source_lang": "auto", + "target_lang": "Chinese", + }, + } + + try: + response = requests.post( + get_endpoint(), + headers=headers, + json=payload, + timeout=REQUEST_TIMEOUT_SECONDS, + ) + response.raise_for_status() + result = response.json() + return result["choices"][0]["message"]["content"] + except (KeyError, IndexError, TypeError, ValueError) as exc: + raise TranslationError(f"Unexpected translation response format: {exc}") from exc + except requests.RequestException as exc: + raise TranslationError(f"Translation request failed: {exc}") from exc + finally: + if REQUEST_DELAY_SECONDS > 0: + time.sleep(REQUEST_DELAY_SECONDS) + + +def translate_notebook(notebook_path: Path, api_key: str) -> bool: + try: + with notebook_path.open("r", encoding="utf-8") as handle: + notebook = nbformat.read(handle, as_version=4) + except nbformat.reader.NotJSONError: + logging.error("Skipping invalid notebook JSON: %s", notebook_path) + return False + except OSError as exc: + logging.error("Failed to read %s: %s", notebook_path, exc) + return False + + changed = False + for cell in notebook.cells: + if cell.cell_type != "markdown": + continue + + translated = translate_text(cell.source, api_key) + if translated != cell.source: + cell.source = translated + changed = True + + output_path = OUTPUT_DIR / notebook_path.relative_to(ROOT) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as handle: + nbformat.write(notebook, handle) + + logging.info("Translated %s -> %s", notebook_path.relative_to(ROOT), output_path.relative_to(ROOT)) + return changed + + +def find_notebooks() -> list[Path]: + return sorted( + path for path in ROOT.rglob("*.ipynb") if not should_skip(path) + ) + + +def main() -> int: + api_key = get_api_key() + if not api_key: + logging.error("QWEN_API_KEY or DASHSCOPE_API_KEY must be configured.") + return 1 + + notebooks = find_notebooks() + if not notebooks: logging.info("No notebook files found.") - exit(0) + return 0 - for notebook_file in notebook_files: - translate_notebook(notebook_file, api_key, output_folder) + translated_count = 0 + for notebook_path in notebooks: + try: + if translate_notebook(notebook_path, api_key): + translated_count += 1 + except TranslationError as exc: + logging.error("Stopping after translation failure in %s: %s", notebook_path, exc) + return 1 - logging.info("Translation process completed.") \ No newline at end of file + logging.info("Translation process completed. %s notebooks updated.", translated_count) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From f22aa698185425c200ffaaff34503628c516e0da Mon Sep 17 00:00:00 2001 From: Nacai <111849193+B67687@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:41:50 +0800 Subject: [PATCH 2/4] Fallback to default Qwen endpoint and model --- translate_notebooks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/translate_notebooks.py b/translate_notebooks.py index c171a6f..56fca8a 100644 --- a/translate_notebooks.py +++ b/translate_notebooks.py @@ -34,15 +34,15 @@ def should_skip(path: Path) -> bool: def get_api_key() -> str: - return os.environ.get("QWEN_API_KEY") or os.environ.get("DASHSCOPE_API_KEY", "") + return (os.environ.get("QWEN_API_KEY") or os.environ.get("DASHSCOPE_API_KEY") or "").strip() def get_endpoint() -> str: - return os.environ.get("QWEN_API_ENDPOINT", DEFAULT_ENDPOINT) + return (os.environ.get("QWEN_API_ENDPOINT") or DEFAULT_ENDPOINT).strip() def get_model() -> str: - return os.environ.get("QWEN_MODEL", DEFAULT_MODEL) + return (os.environ.get("QWEN_MODEL") or DEFAULT_MODEL).strip() def translate_text(text: str, api_key: str) -> str: From 7024d61bdb6ce724456240c26ee0435cbfa0bbce Mon Sep 17 00:00:00 2001 From: Nacai <111849193+B67687@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:53:42 +0800 Subject: [PATCH 3/4] Log DashScope error responses for translation failures --- translate_notebooks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/translate_notebooks.py b/translate_notebooks.py index 56fca8a..0fd22f6 100644 --- a/translate_notebooks.py +++ b/translate_notebooks.py @@ -69,7 +69,11 @@ def translate_text(text: str, api_key: str) -> str: json=payload, timeout=REQUEST_TIMEOUT_SECONDS, ) - response.raise_for_status() + if not response.ok: + raise TranslationError( + f"Translation request failed with status {response.status_code}: " + f"{response.text.strip()}" + ) result = response.json() return result["choices"][0]["message"]["content"] except (KeyError, IndexError, TypeError, ValueError) as exc: From 3ba14789a80f861bc1fade55a4c84631d9ae6e79 Mon Sep 17 00:00:00 2001 From: Nacai <111849193+B67687@users.noreply.github.com> Date: Thu, 9 Apr 2026 21:00:45 +0800 Subject: [PATCH 4/4] Rewrite notebook translation workflow for changed files --- .github/workflows/translate-notebooks.yml | 39 +++++++- translate_notebooks.py | 107 ++++++++++++++++++---- 2 files changed, 122 insertions(+), 24 deletions(-) diff --git a/.github/workflows/translate-notebooks.yml b/.github/workflows/translate-notebooks.yml index 988b263..a752da7 100644 --- a/.github/workflows/translate-notebooks.yml +++ b/.github/workflows/translate-notebooks.yml @@ -20,9 +20,13 @@ on: permissions: contents: write +concurrency: + group: translate-notebooks-${{ github.ref }} + cancel-in-progress: true + jobs: translate: - if: github.actor != 'github-actions[bot]' + if: github.actor != 'github-actions[bot]' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository) runs-on: ubuntu-latest steps: - name: Checkout code @@ -39,17 +43,44 @@ jobs: run: | pip install nbformat requests + - name: Determine notebooks to translate + id: notebooks + shell: bash + run: | + set -euo pipefail + + NOTEBOOK_LIST=.changed-notebooks.txt + : > "$NOTEBOOK_LIST" + + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + find . -type f -name '*.ipynb' -not -path './translated-notebooks/*' | sed 's|^\./||' | sort > "$NOTEBOOK_LIST" + elif [[ "${{ github.event_name }}" == "pull_request" ]]; then + git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" -- '*.ipynb' ':(exclude)translated-notebooks/**' | sort > "$NOTEBOOK_LIST" + elif [[ "${{ github.event.before }}" == "0000000000000000000000000000000000000000" ]]; then + find . -type f -name '*.ipynb' -not -path './translated-notebooks/*' | sed 's|^\./||' | sort > "$NOTEBOOK_LIST" + else + git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- '*.ipynb' ':(exclude)translated-notebooks/**' | sort > "$NOTEBOOK_LIST" + fi + + NOTEBOOK_COUNT="$(grep -c . "$NOTEBOOK_LIST" || true)" + echo "count=$NOTEBOOK_COUNT" >> "$GITHUB_OUTPUT" + echo "file_list=$NOTEBOOK_LIST" >> "$GITHUB_OUTPUT" + echo "Selected notebooks:" + cat "$NOTEBOOK_LIST" || true + - name: Translate Notebooks + if: steps.notebooks.outputs.count != '0' env: - QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }} DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }} + DASHSCOPE_API_ENDPOINT: ${{ vars.DASHSCOPE_API_ENDPOINT }} QWEN_API_ENDPOINT: ${{ vars.QWEN_API_ENDPOINT }} QWEN_MODEL: ${{ vars.QWEN_MODEL }} run: | - python translate_notebooks.py + python translate_notebooks.py --file-list "${{ steps.notebooks.outputs.file_list }}" - name: Commit translated notebooks - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request' && steps.notebooks.outputs.count != '0' run: | if [ -z "$(git status --porcelain -- translated-notebooks)" ]; then echo "No translated notebook changes to commit." diff --git a/translate_notebooks.py b/translate_notebooks.py index 0fd22f6..28172be 100644 --- a/translate_notebooks.py +++ b/translate_notebooks.py @@ -1,5 +1,6 @@ from __future__ import annotations +import argparse import logging import os import time @@ -33,39 +34,42 @@ def should_skip(path: Path) -> bool: return any(part in SKIP_DIRS for part in path.parts) +def env_value(*names: str, default: str = "") -> str: + for name in names: + value = os.environ.get(name) + if value and value.strip(): + return value.strip() + return default + + def get_api_key() -> str: - return (os.environ.get("QWEN_API_KEY") or os.environ.get("DASHSCOPE_API_KEY") or "").strip() + return env_value("DASHSCOPE_API_KEY", "QWEN_API_KEY") def get_endpoint() -> str: - return (os.environ.get("QWEN_API_ENDPOINT") or DEFAULT_ENDPOINT).strip() + return env_value("DASHSCOPE_API_ENDPOINT", "QWEN_API_ENDPOINT", default=DEFAULT_ENDPOINT) def get_model() -> str: - return (os.environ.get("QWEN_MODEL") or DEFAULT_MODEL).strip() + return env_value("QWEN_MODEL", default=DEFAULT_MODEL) -def translate_text(text: str, api_key: str) -> str: +def translate_text(text: str, api_key: str, session: requests.Session) -> str: if not text or not text.strip(): return text - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - } payload = { "model": get_model(), "messages": [{"role": "user", "content": text}], "translation_options": { "source_lang": "auto", - "target_lang": "Chinese", + "target_lang": "zh", }, } try: - response = requests.post( + response = session.post( get_endpoint(), - headers=headers, json=payload, timeout=REQUEST_TIMEOUT_SECONDS, ) @@ -85,7 +89,7 @@ def translate_text(text: str, api_key: str) -> str: time.sleep(REQUEST_DELAY_SECONDS) -def translate_notebook(notebook_path: Path, api_key: str) -> bool: +def translate_notebook(notebook_path: Path, api_key: str, session: requests.Session) -> bool: try: with notebook_path.open("r", encoding="utf-8") as handle: notebook = nbformat.read(handle, as_version=4) @@ -101,7 +105,7 @@ def translate_notebook(notebook_path: Path, api_key: str) -> bool: if cell.cell_type != "markdown": continue - translated = translate_text(cell.source, api_key) + translated = translate_text(cell.source, api_key, session) if translated != cell.source: cell.source = translated changed = True @@ -115,27 +119,90 @@ def translate_notebook(notebook_path: Path, api_key: str) -> bool: return changed -def find_notebooks() -> list[Path]: - return sorted( - path for path in ROOT.rglob("*.ipynb") if not should_skip(path) +def find_all_notebooks() -> list[Path]: + return sorted(path for path in ROOT.rglob("*.ipynb") if not should_skip(path)) + + +def resolve_notebook_paths(input_paths: list[str]) -> list[Path]: + resolved_paths: list[Path] = [] + seen: set[Path] = set() + + for input_path in input_paths: + candidate = (ROOT / input_path).resolve() + try: + relative = candidate.relative_to(ROOT) + except ValueError: + logging.warning("Skipping notebook outside repository: %s", input_path) + continue + + if should_skip(relative) or candidate.suffix != ".ipynb" or not candidate.exists(): + logging.warning("Skipping invalid notebook path: %s", input_path) + continue + + if candidate not in seen: + seen.add(candidate) + resolved_paths.append(candidate) + + return sorted(resolved_paths) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Translate Jupyter notebooks into Chinese.") + parser.add_argument("notebooks", nargs="*", help="Notebook paths relative to the repository root.") + parser.add_argument("--all", action="store_true", help="Translate all notebooks in the repository.") + parser.add_argument( + "--file-list", + help="Path to a newline-delimited file containing notebook paths relative to the repository root.", ) + return parser.parse_args() + + +def load_requested_notebooks(args: argparse.Namespace) -> list[Path]: + if args.all: + return find_all_notebooks() + + requested_paths = list(args.notebooks) + if args.file_list: + requested_paths.extend( + line.strip() + for line in Path(args.file_list).read_text(encoding="utf-8").splitlines() + if line.strip() + ) + + if not requested_paths: + return [] + + return resolve_notebook_paths(requested_paths) def main() -> int: + args = parse_args() + api_key = get_api_key() if not api_key: - logging.error("QWEN_API_KEY or DASHSCOPE_API_KEY must be configured.") + logging.error("DASHSCOPE_API_KEY or QWEN_API_KEY must be configured.") return 1 - notebooks = find_notebooks() + notebooks = load_requested_notebooks(args) if not notebooks: - logging.info("No notebook files found.") + logging.info("No notebook files selected for translation.") return 0 + logging.info("Using endpoint: %s", get_endpoint()) + logging.info("Using model: %s", get_model()) + translated_count = 0 + session = requests.Session() + session.headers.update( + { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + ) + for notebook_path in notebooks: try: - if translate_notebook(notebook_path, api_key): + if translate_notebook(notebook_path, api_key, session): translated_count += 1 except TranslationError as exc: logging.error("Stopping after translation failure in %s: %s", notebook_path, exc)