From a65ab5d5d5188432b7cfbc16fd32341b1b31b6f9 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 20 May 2026 16:39:18 +0200 Subject: [PATCH] imspy_predictors RT: default to Chronologer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make Chronologer the goto retention-time predictor (it reaches ~4× tighter median residual than the imspy transformer baseline on timsTOF data, per rt_im_exploration.ipynb benchmarks). The transformer is still reachable via load_deep_retention_time_predictor(backend="transformer"). hub.py gains a per-model "url" override field; the new entry rt/chronologer_base.pt pulls Chronologer_20220601193755.pt straight from the upstream Searle Lab repo (Apache-2.0, attribution kept in the chronologer.py docstring). We deliberately don't re-host — any upstream fix or retraining propagates automatically. SHA-256 is locked (1a500c24...) so corruption surfaces immediately. Backward-compatible: existing callers passing backend=None get the new default; passing backend="transformer" preserves the previous behavior. The Chronologer branch falls back to the transformer if the upstream chronologer package isn't installed or the download is unreachable. --- .../src/imspy_predictors/pretrained/hub.py | 15 +++++++-- .../src/imspy_predictors/rt/predictors.py | 31 +++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/packages/imspy-predictors/src/imspy_predictors/pretrained/hub.py b/packages/imspy-predictors/src/imspy_predictors/pretrained/hub.py index 346d7962..6b2accbd 100644 --- a/packages/imspy-predictors/src/imspy_predictors/pretrained/hub.py +++ b/packages/imspy-predictors/src/imspy_predictors/pretrained/hub.py @@ -29,7 +29,9 @@ ) # Maps the *package-relative* path (as used by callers of get_model_path) -# to its download URL and expected SHA-256 hash. +# to its download URL and expected SHA-256 hash. Entries default to the +# rustims models GitHub Release; an explicit "url" overrides that for +# vendor-free upstream redistributions (e.g. Chronologer). MODELS = { "ccs/best_model.pt": { "filename": "ccs-best_model.pt", @@ -51,6 +53,15 @@ "filename": "pretrained_encoder.pt", "sha256": "43ccc2f836bf3d81943ddce353ade9628e7d036421ba5b5c182bf163e496385e", }, + # Chronologer base weights — fetched directly from upstream Searle Lab + # (Apache-2.0, attribution preserved in imspy_predictors.rt.chronologer). + # We deliberately do not re-host: pulls from the upstream repo so any + # future fix or retraining propagates automatically. + "rt/chronologer_base.pt": { + "filename": "chronologer_base.pt", + "url": "https://github.com/searlelab/chronologer/raw/main/models/Chronologer_20220601193755.pt", + "sha256": "1a500c246b49a1a23643bce7f2df86d5a107359bf0ec34365531c73431b6c0b3", + }, } @@ -153,7 +164,7 @@ def ensure_model(model_name: str) -> Path: # 3. Download into a temp file in the same filesystem, then atomic-rename. cached_path.parent.mkdir(parents=True, exist_ok=True) - url = f"{_RELEASE_BASE}/{meta['filename']}" + url = meta.get("url") or f"{_RELEASE_BASE}/{meta['filename']}" logger.info("Downloading model '%s' from %s ...", model_name, url) tmp_fd, tmp_path = tempfile.mkstemp( diff --git a/packages/imspy-predictors/src/imspy_predictors/rt/predictors.py b/packages/imspy-predictors/src/imspy_predictors/rt/predictors.py index ebaaf8f2..efe7457b 100644 --- a/packages/imspy-predictors/src/imspy_predictors/rt/predictors.py +++ b/packages/imspy-predictors/src/imspy_predictors/rt/predictors.py @@ -228,11 +228,19 @@ def load_deep_retention_time_predictor(backend: Optional[str] = None): """ Load a pretrained retention time predictor model. + Defaults to **Chronologer** (Searle Lab, Apache-2.0) — a residual-CNN that + reaches roughly 4× tighter median residual than the imspy transformer + baseline on timsTOF data. Pass ``backend="transformer"`` to opt back into + the legacy imspy ``UnifiedPeptideModel`` RT predictor. + Args: - backend: Kept for backward compatibility, ignored (always uses PyTorch) + backend: ``"chronologer"`` (default, or pass ``None``) loads the + Chronologer wrapper. ``"transformer"`` loads the legacy imspy + transformer RT model. Any other value is treated as the default. Returns: - Loaded PyTorch model + Either a :class:`Chronologer` wrapper or the legacy transformer + PyTorch model, depending on ``backend``. """ if not TORCH_AVAILABLE: raise ImportError( @@ -240,6 +248,25 @@ def load_deep_retention_time_predictor(backend: Optional[str] = None): "Install with: pip install torch" ) + backend = (backend or "chronologer").lower() + + if backend == "chronologer": + # Default: Chronologer is the goto RT predictor on timsTOF data. + # Falls back to the transformer path only if Chronologer can't be + # constructed (e.g. upstream `chronologer` package not installed, or + # the base-weights download is unreachable). + try: + from imspy_predictors.rt.chronologer import Chronologer + base_path = get_model_path('rt/chronologer_base.pt') + return Chronologer.from_base(str(base_path)) + except (ImportError, FileNotFoundError, RuntimeError) as e: + import logging + logging.getLogger(__name__).warning( + "Chronologer load failed (%s); falling back to transformer.", e, + ) + backend = "transformer" + + # Transformer path (legacy / fallback). # Try to load UnifiedPeptideModel first (new architecture) try: from imspy_predictors.models import UnifiedPeptideModel