diff --git a/src/sentry/preprod/snapshots/image_diff/compare.py b/src/sentry/preprod/snapshots/image_diff/compare.py index 8c28534a0fe2e2..8b8bc10213e822 100644 --- a/src/sentry/preprod/snapshots/image_diff/compare.py +++ b/src/sentry/preprod/snapshots/image_diff/compare.py @@ -8,7 +8,7 @@ from PIL import Image -from .odiff import OdiffServer +from .odiff import OdiffServer, compare_cli from .types import DiffResult logger = logging.getLogger(__name__) @@ -79,13 +79,17 @@ def compare_images_batch( tmpdir_path = Path(tmpdir) if server is not None: return _compare_pairs(pairs, server, tmpdir_path) - with OdiffServer() as new_server: - return _compare_pairs(pairs, new_server, tmpdir_path) + # Temporarily skip creating OdiffServer due to server-mode stdin + # buffer reuse bug (https://github.com/dmtrKovalenko/odiff/pull/170). + # Revert once the fix is released in odiff-bin. + # with OdiffServer() as new_server: + # return _compare_pairs(pairs, new_server, tmpdir_path) + return _compare_pairs(pairs, None, tmpdir_path) def _compare_pairs( pairs: Sequence[tuple[bytes | Image.Image, bytes | Image.Image]], - server: OdiffServer, + server: OdiffServer | None, tmpdir_path: Path, ) -> list[DiffResult | None]: return [ @@ -98,7 +102,7 @@ def _compare_single_pair( idx: int, before: bytes | Image.Image, after: bytes | Image.Image, - server: OdiffServer, + server: OdiffServer | None, tmpdir_path: Path, ) -> DiffResult | None: before_img: Image.Image | None = None @@ -123,7 +127,18 @@ def _compare_single_pair( after_padded.save(after_path, "PNG") output_path = tmpdir_path / f"diff_{idx}.png" - resp = server.compare( + # Use CLI mode instead of server mode to work around stdin buffer + # reuse bug (https://github.com/dmtrKovalenko/odiff/pull/170). + # Revert to server.compare() once the fix is released in odiff-bin. + # resp = server.compare( + # before_path, + # after_path, + # output_path, + # threshold=ODIFF_SENSITIVITY_DIFF_THRESHOLD, + # antialiasing=True, + # outputDiffMask=True, + # ) + resp = compare_cli( before_path, after_path, output_path, @@ -131,15 +146,16 @@ def _compare_single_pair( antialiasing=True, outputDiffMask=True, ) - changed_pixels = resp.diffCount or 0 total_pixels = max_w * max_h - if changed_pixels == 0: + if resp.match: + changed_pixels = 0 diff_mask = Image.new("L", (max_w, max_h), 0) elif not output_path.exists(): raise RuntimeError(f"odiff did not produce output file: {output_path}") else: diff_mask = _mask_from_diff_output(output_path) + changed_pixels = sum(diff_mask.histogram()[1:]) diff_mask_png = _encode_mask_png(diff_mask) diff --git a/src/sentry/preprod/snapshots/image_diff/odiff.py b/src/sentry/preprod/snapshots/image_diff/odiff.py index eb6d01bb91ec3d..683abc0e0127a9 100644 --- a/src/sentry/preprod/snapshots/image_diff/odiff.py +++ b/src/sentry/preprod/snapshots/image_diff/odiff.py @@ -10,6 +10,10 @@ from sentry.preprod.snapshots.image_diff.types import OdiffResponse from sentry.utils import json +_CLI_FLAG_MAP = { + "outputDiffMask": "diff-mask", +} + ODIFF_PLATFORM_SUFFIXES = { ("arm64", "Darwin"): "macos-arm64", ("aarch64", "Darwin"): "macos-arm64", @@ -38,6 +42,42 @@ def _find_odiff_binary() -> str: raise FileNotFoundError("odiff binary not found. Run 'pnpm install' to install odiff-bin.") +def compare_cli( + base_path: str | Path, + compare_path: str | Path, + output_path: str | Path, + **options: object, +) -> OdiffResponse: + """Run a single odiff comparison via CLI (no server mode). + + Workaround for server-mode stdin buffer reuse bug + (https://github.com/dmtrKovalenko/odiff/pull/170). + Remove once the fix is released in odiff-bin. + """ + binary = _find_odiff_binary() + cmd = [binary, str(base_path), str(compare_path), str(output_path)] + + for key, value in options.items(): + flag = _CLI_FLAG_MAP.get(key, key) + if isinstance(value, bool): + if value: + cmd.append(f"--{flag}") + else: + cmd.extend([f"--{flag}", str(value)]) + + result = subprocess.run(cmd, capture_output=True, timeout=30) + + # Exit codes: 0=match, 21=layout diff, 22=pixel diff + if result.returncode not in (0, 21, 22): + stderr = result.stderr.decode(errors="replace").strip() + raise RuntimeError(f"odiff CLI error (exit {result.returncode}): {stderr}") + + return OdiffResponse( + requestId=0, + match=(result.returncode == 0), + ) + + class OdiffServer: def __init__(self) -> None: self._process: subprocess.Popen[bytes] | None = None diff --git a/src/sentry/preprod/snapshots/tasks.py b/src/sentry/preprod/snapshots/tasks.py index 96589e60542eb0..673d2321f54229 100644 --- a/src/sentry/preprod/snapshots/tasks.py +++ b/src/sentry/preprod/snapshots/tasks.py @@ -15,7 +15,6 @@ from sentry.objectstore import get_preprod_session from sentry.preprod.models import PreprodArtifact, PreprodComparisonApproval from sentry.preprod.snapshots.image_diff.compare import DIFF_ALGORITHM_VERSION, compare_images_batch -from sentry.preprod.snapshots.image_diff.odiff import OdiffServer from sentry.preprod.snapshots.manifest import ( ComparisonManifest, ComparisonSummary, @@ -475,143 +474,146 @@ def compare_snapshots( # TODO: spawn N OdiffServer workers and distribute pairs across them # via a thread pool to parallelize the odiff comparison step per batch - with OdiffServer() as server: - for batch in batches: - diff_pairs: list[tuple[bytes, bytes]] = [] - batch_names: list[str] = [] - batch_hashes: list[tuple[str, str]] = [] - - unique_hashes: set[str] = set() - for candidate in batch: - unique_hashes.add(candidate.head_hash) - unique_hashes.add(candidate.base_hash) - - fetch_cache: dict[str, bytes] = {} - failed_hashes: set[str] = set() - cache_lock = threading.Lock() - - def _fetch_hash(h: str) -> None: - try: - data = session.get(f"{image_key_prefix}/{h}").payload.read() - with cache_lock: - fetch_cache[h] = data - except Exception: - with cache_lock: - failed_hashes.add(h) - - # Fetch unique hashes in parallel; session.get() is thread-safe - with ContextPropagatingThreadPoolExecutor(max_workers=8) as executor: - list(executor.map(_fetch_hash, unique_hashes)) - - for candidate in batch: - if candidate.head_hash in failed_hashes or candidate.base_hash in failed_hashes: - logger.warning( - "compare_snapshots: failed to fetch images for %s", - candidate.name, - extra={ - "head_artifact_id": head_artifact_id, - "head_hash": candidate.head_hash, - "base_hash": candidate.base_hash, - }, - ) - error_count += 1 - image_results[candidate.name] = { - "status": "errored", - "head_hash": candidate.head_hash, - "base_hash": candidate.base_hash, - "reason": "image_fetch_failed", - } - continue - head_data = fetch_cache[candidate.head_hash] - base_data = fetch_cache[candidate.base_hash] - total_fetched_bytes += len(head_data) + len(base_data) - total_fetched_count += 2 - diff_pairs.append((base_data, head_data)) - batch_names.append(candidate.name) - batch_hashes.append((candidate.head_hash, candidate.base_hash)) - - logger.info( - "compare_snapshots: running batch of %d pairs (%d unique hashes fetched)", - len(diff_pairs), - len(fetch_cache), - extra={"head_artifact_id": head_artifact_id, "names": batch_names}, - ) - diff_results = compare_images_batch(diff_pairs, server=server) - logger.info( - "compare_snapshots: batch complete, %d results", - len(diff_results), - extra={"head_artifact_id": head_artifact_id}, - ) - - for name, (head_hash, base_hash), diff_result in zip( - batch_names, batch_hashes, diff_results, strict=True - ): - if diff_result is None: - error_count += 1 - image_results[name] = { - "status": "errored", - "head_hash": head_hash, - "base_hash": base_hash, - "reason": "image_processing_failed", - } - continue - - stem = _image_name_to_path_stem(name) - diff_mask_key = ( - f"{image_key_prefix}/{head_artifact_id}/{base_artifact_id}/diff/{stem}.png" - ) - diff_mask_bytes = diff_result.diff_mask_png - logger.info( - "compare_snapshots: uploading mask for %s (%d bytes, changed_px=%d)", - name, - len(diff_mask_bytes), - diff_result.changed_pixels, + # Temporarily bypassing OdiffServer due to server-mode stdin buffer + # reuse bug (https://github.com/dmtrKovalenko/odiff/pull/170). + # Revert once the fix is released in odiff-bin. + # with OdiffServer() as server: + for batch in batches: + diff_pairs: list[tuple[bytes, bytes]] = [] + batch_names: list[str] = [] + batch_hashes: list[tuple[str, str]] = [] + + unique_hashes: set[str] = set() + for candidate in batch: + unique_hashes.add(candidate.head_hash) + unique_hashes.add(candidate.base_hash) + + fetch_cache: dict[str, bytes] = {} + failed_hashes: set[str] = set() + cache_lock = threading.Lock() + + def _fetch_hash(h: str) -> None: + try: + data = session.get(f"{image_key_prefix}/{h}").payload.read() + with cache_lock: + fetch_cache[h] = data + except Exception: + with cache_lock: + failed_hashes.add(h) + + # Fetch unique hashes in parallel; session.get() is thread-safe + with ContextPropagatingThreadPoolExecutor(max_workers=8) as executor: + list(executor.map(_fetch_hash, unique_hashes)) + + for candidate in batch: + if candidate.head_hash in failed_hashes or candidate.base_hash in failed_hashes: + logger.warning( + "compare_snapshots: failed to fetch images for %s", + candidate.name, extra={ "head_artifact_id": head_artifact_id, - "diff_mask_key": diff_mask_key, + "head_hash": candidate.head_hash, + "base_hash": candidate.base_hash, }, ) - session.put(diff_mask_bytes, key=diff_mask_key, content_type="image/png") - - diff_pct = ( - diff_result.changed_pixels / diff_result.total_pixels - if diff_result.total_pixels > 0 - else 0 - ) - effective_threshold = diff_threshold if diff_threshold is not None else 0.0 - is_changed = diff_pct > effective_threshold - if is_changed: - changed_count += 1 - else: - unchanged_count += 1 - - logger.debug( - "compare_snapshots: %s diff_pct=%.6f threshold=%s is_changed=%s pixels=%d/%d", - name, - diff_pct, - diff_threshold, - is_changed, - diff_result.changed_pixels, - diff_result.total_pixels, - extra={"head_artifact_id": head_artifact_id}, - ) + error_count += 1 + image_results[candidate.name] = { + "status": "errored", + "head_hash": candidate.head_hash, + "base_hash": candidate.base_hash, + "reason": "image_fetch_failed", + } + continue + head_data = fetch_cache[candidate.head_hash] + base_data = fetch_cache[candidate.base_hash] + total_fetched_bytes += len(head_data) + len(base_data) + total_fetched_count += 2 + diff_pairs.append((base_data, head_data)) + batch_names.append(candidate.name) + batch_hashes.append((candidate.head_hash, candidate.base_hash)) - diff_mask_image_id = f"{head_artifact_id}/{base_artifact_id}/diff/{stem}.png" + logger.info( + "compare_snapshots: running batch of %d pairs (%d unique hashes fetched)", + len(diff_pairs), + len(fetch_cache), + extra={"head_artifact_id": head_artifact_id, "names": batch_names}, + ) + diff_results = compare_images_batch(diff_pairs) + logger.info( + "compare_snapshots: batch complete, %d results", + len(diff_results), + extra={"head_artifact_id": head_artifact_id}, + ) + for name, (head_hash, base_hash), diff_result in zip( + batch_names, batch_hashes, diff_results, strict=True + ): + if diff_result is None: + error_count += 1 image_results[name] = { - "status": "changed" if is_changed else "unchanged", + "status": "errored", "head_hash": head_hash, "base_hash": base_hash, - "changed_pixels": diff_result.changed_pixels, - "total_pixels": diff_result.total_pixels, - "diff_mask_key": diff_mask_key, - "diff_mask_image_id": diff_mask_image_id, - "before_width": diff_result.before_width, - "before_height": diff_result.before_height, - "after_width": diff_result.after_width, - "after_height": diff_result.after_height, - "aligned_height": diff_result.aligned_height, + "reason": "image_processing_failed", } + continue + + stem = _image_name_to_path_stem(name) + diff_mask_key = ( + f"{image_key_prefix}/{head_artifact_id}/{base_artifact_id}/diff/{stem}.png" + ) + diff_mask_bytes = diff_result.diff_mask_png + logger.info( + "compare_snapshots: uploading mask for %s (%d bytes, changed_px=%d)", + name, + len(diff_mask_bytes), + diff_result.changed_pixels, + extra={ + "head_artifact_id": head_artifact_id, + "diff_mask_key": diff_mask_key, + }, + ) + session.put(diff_mask_bytes, key=diff_mask_key, content_type="image/png") + + diff_pct = ( + diff_result.changed_pixels / diff_result.total_pixels + if diff_result.total_pixels > 0 + else 0 + ) + effective_threshold = diff_threshold if diff_threshold is not None else 0.0 + is_changed = diff_pct > effective_threshold + if is_changed: + changed_count += 1 + else: + unchanged_count += 1 + + logger.debug( + "compare_snapshots: %s diff_pct=%.6f threshold=%s is_changed=%s pixels=%d/%d", + name, + diff_pct, + diff_threshold, + is_changed, + diff_result.changed_pixels, + diff_result.total_pixels, + extra={"head_artifact_id": head_artifact_id}, + ) + + diff_mask_image_id = f"{head_artifact_id}/{base_artifact_id}/diff/{stem}.png" + + image_results[name] = { + "status": "changed" if is_changed else "unchanged", + "head_hash": head_hash, + "base_hash": base_hash, + "changed_pixels": diff_result.changed_pixels, + "total_pixels": diff_result.total_pixels, + "diff_mask_key": diff_mask_key, + "diff_mask_image_id": diff_mask_image_id, + "before_width": diff_result.before_width, + "before_height": diff_result.before_height, + "after_width": diff_result.after_width, + "after_height": diff_result.after_height, + "aligned_height": diff_result.aligned_height, + } for name in sorted(added): image_results[name] = {"status": "added", "head_hash": head_by_name[name]}