From 24c75a015a47f5e779d58d528c43f9e0eb76ad43 Mon Sep 17 00:00:00 2001 From: liao yinan Date: Tue, 16 Jun 2026 03:35:43 +0800 Subject: [PATCH] fix: ruff format compliance for _registry.py and tokenizer.py Two list comprehensions and one dict comprehension exceeded the 120-char line length, causing the CI lint (3.13) job to fail. --- tinybpe/_registry.py | 8 ++------ tinybpe/tokenizer.py | 3 ++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tinybpe/_registry.py b/tinybpe/_registry.py index eb77e07..bf13d9b 100644 --- a/tinybpe/_registry.py +++ b/tinybpe/_registry.py @@ -88,15 +88,11 @@ def _load_registry() -> tuple[dict[str, ModelInfo], dict[str, str | None]]: # decoding would be ambiguous — the C tokenizer cannot tell # whether that ID means a vocab token or a special token. max_vocab_id = entry["vocab_size"] - 1 - conflicting = [ - (tok, tid) for tok, tid in raw_special.items() if tid <= max_vocab_id - ] + conflicting = [(tok, tid) for tok, tid in raw_special.items() if tid <= max_vocab_id] if conflicting: import warnings - conflicting_repr = ", ".join( - f"{tok!r}→{tid}" for tok, tid in conflicting - ) + conflicting_repr = ", ".join(f"{tok!r}→{tid}" for tok, tid in conflicting) warnings.warn( f"Model {entry['name']!r}: special tokens overlap with byte or " f"merge IDs ({conflicting_repr}). " diff --git a/tinybpe/tokenizer.py b/tinybpe/tokenizer.py index f83a10f..ce03adb 100644 --- a/tinybpe/tokenizer.py +++ b/tinybpe/tokenizer.py @@ -154,7 +154,8 @@ def __init__( self._vocab_cache: dict[int, bytes] | None = None if bytes_maps is not None: self._vocab_cache = { - k: self._inv_map(v) for k, v in self._enc.vocab.items() # type: ignore[union-attr] + k: self._inv_map(v) + for k, v in self._enc.vocab.items() # type: ignore[union-attr] } # ------------------------------------------------------------------