diff --git a/patchdiff/produce.py b/patchdiff/produce.py index 8934303..a871941 100644 --- a/patchdiff/produce.py +++ b/patchdiff/produce.py @@ -3,15 +3,90 @@ This module provides an alternative to diffing by using proxy objects that monitor mutations as they are being made and emit patches immediately. This approach is inspired by Immer's proxy-based implementation. + +Proxies track their location through parent links instead of storing an +absolute path. A proxy's path is computed on demand by walking up to the +root, so paths stay correct when list indices shift, and proxies that have +been removed from the draft ("detached") stop recording patches: their data +is captured by the snapshot of whatever write re-inserts it later. """ from __future__ import annotations from copy import deepcopy -from typing import Any, Callable, Dict, List, Set, Tuple, Union +from typing import Any, Callable, Dict, Hashable, List, Tuple, Union +from weakref import ref from .pointer import Pointer +# Types that are immutable and can never contain a proxy, so they can be +# stored and snapshotted as-is. +_SCALAR_TYPES = frozenset({int, float, complex, bool, str, bytes, type(None)}) + +# Sentinel to distinguish "no default given" from falsy defaults like None or 0 +_MISSING = object() + + +def _snapshot(value: Any) -> Any: + """Deep copy a value for storage in a patch, replacing any proxies with + plain copies of their underlying data. + + Hand-rolled for the common JSON-like types (faster than copy.deepcopy); + unrecognized types fall back to deepcopy, which also unwraps third-party + proxies (like observ's) through their __deepcopy__ hooks. + """ + cls = value.__class__ + if cls in _SCALAR_TYPES: + return value + if cls is dict: + return {key: _snapshot(item) for key, item in value.items()} + if cls is list: + return [_snapshot(item) for item in value] + if cls is DictProxy or cls is ListProxy or cls is SetProxy: + return _snapshot(value._data) + if cls is set: + return {_snapshot(item) for item in value} + if cls is frozenset: + return frozenset(_snapshot(item) for item in value) + if cls is tuple: + return tuple(_snapshot(item) for item in value) + return deepcopy(value) + + +def _unwrap(value: Any) -> Any: + """Return value with any nested proxies replaced by plain deep copies of + their data, so that proxies never end up inside the draft. + + Returns the original object untouched when it contains no proxies (the + common case). Replacing a nested proxy with a copy gives value + semantics: later mutations through the original path are not shared, + which keeps the recorded patches consistent. + + Set members must be hashable and proxies are not, so sets can never + contain proxies and are returned as-is. + """ + cls = value.__class__ + if cls in _SCALAR_TYPES: + return value + if cls is DictProxy or cls is ListProxy or cls is SetProxy: + return _snapshot(value._data) + if cls is dict: + for item in value.values(): + if _unwrap(item) is not item: + return {key: _unwrap(item) for key, item in value.items()} + return value + if cls is list: + for item in value: + if _unwrap(item) is not item: + return [_unwrap(item) for item in value] + return value + if cls is tuple: + for item in value: + if _unwrap(item) is not item: + return tuple(_unwrap(item) for item in value) + return value + return value + def _add_reader_methods(proxy_class, method_names): """Add simple pass-through reader methods to a proxy class. @@ -33,12 +108,30 @@ def reader(self, *args, **kwargs): class PatchRecorder: - """Records patches as mutations happen on proxy objects.""" + """Records patches as mutations happen on proxy objects. + + Reverse patches must run in the opposite order of the forward patches. + They are appended here (O(1) instead of O(n) for inserting at the + front) and reversed once in finalize(). + + """ def __init__(self): self.patches: List[Dict] = [] self.reverse_patches: List[Dict] = [] + def finalize(self, root: "_Proxy") -> None: + """Put the reverse patches in reverse application order and + detach the root proxy. + + Call once, after all mutations have been recorded. Every attached + proxy computes its path through the root, so detaching the root + stops all recording: a proxy leaked out of the recipe can no + longer append to the already-returned patch lists. + """ + self.reverse_patches.reverse() + root._detached = True + def record_add( self, path: Pointer, value: Any, reverse_path: Pointer = None ) -> None: @@ -51,9 +144,11 @@ def record_add( If not provided, uses the same path. This is needed for sets where add uses "/-" but remove needs "/value". """ - self.patches.append({"op": "add", "path": path, "value": deepcopy(value)}) - self.reverse_patches.insert( - 0, {"op": "remove", "path": reverse_path if reverse_path else path} + if value.__class__ not in _SCALAR_TYPES: + value = _snapshot(value) + self.patches.append({"op": "add", "path": path, "value": value}) + self.reverse_patches.append( + {"op": "remove", "path": reverse_path if reverse_path else path} ) def record_remove( @@ -69,110 +164,241 @@ def record_remove( for lists where remove uses "/index" but add needs "/-" when removing from the end. """ + if old_value.__class__ not in _SCALAR_TYPES: + old_value = _snapshot(old_value) self.patches.append({"op": "remove", "path": path}) - self.reverse_patches.insert( - 0, + self.reverse_patches.append( { "op": "add", "path": reverse_path if reverse_path else path, - "value": deepcopy(old_value), - }, + "value": old_value, + } ) def record_replace(self, path: Pointer, old_value: Any, new_value: Any) -> None: """Record a replace operation, but only if the value actually changed.""" if old_value == new_value: return # Skip no-op replacements - self.patches.append( - {"op": "replace", "path": path, "value": deepcopy(new_value)} - ) - self.reverse_patches.insert( - 0, {"op": "replace", "path": path, "value": deepcopy(old_value)} - ) - - -class DictProxy: - """Proxy for dict objects that tracks mutations and generates patches.""" - - __slots__ = ("_data", "_path", "_proxies", "_recorder") - __hash__ = None # dicts are unhashable + if new_value.__class__ not in _SCALAR_TYPES: + new_value = _snapshot(new_value) + if old_value.__class__ not in _SCALAR_TYPES: + old_value = _snapshot(old_value) + self.patches.append({"op": "replace", "path": path, "value": new_value}) + self.reverse_patches.append({"op": "replace", "path": path, "value": old_value}) + + +class _Proxy: + """Common base for proxies: location tracking through parent links. + + A proxy knows its parent proxy and its key/index within that parent. + Its path is computed on demand by walking up to the root, so paths + remain correct as the tree changes. The `_proxies` cache doubles as a + registry of handed-out child proxies: structural changes update their + keys (list shifts, sort, reverse) or mark them detached (removals and + replacements). Detached proxies still mutate their data but no longer + record patches. + + The parent link is a weak reference, so proxies never form reference + cycles and are freed by reference counting (cyclic proxy garbage + caused gc latency spikes). A dead parent reference means an ancestor + was detached and dropped, so the proxy behaves as detached. + """ - def __init__(self, data: Dict, recorder: PatchRecorder, path: Pointer): + __slots__ = ( + "__weakref__", + "_data", + "_detached", + "_key", + "_parent", + "_proxies", + "_recorder", + ) + __hash__ = None # mutable containers are unhashable + + def __init__( + self, + data: Any, + recorder: PatchRecorder, + parent: "_Proxy" | None = None, + key: Hashable = None, + ): self._data = data self._recorder = recorder - self._path = path - self._proxies = {} - - def _wrap(self, key: Any, value: Any) -> Any: + self._parent = ref(parent) if parent is not None else None + self._key = key + self._detached = False + # Child-proxy registry, allocated lazily on first wrap: most + # proxies are leaves that never hand out children + self._proxies = None + + def __deepcopy__(self, memo): + # Deep copies of a proxy must yield plain data, never the proxy + return deepcopy(self._data, memo) + + def _location(self) -> tuple | None: + """Path tokens of this proxy in the draft, or None when this proxy + or any of its ancestors has been detached from the draft (a dead + parent reference counts as detached). + + Returns a tuple so callers can build a child Pointer in a single + allocation: Pointer((*tokens, key)).""" + if self._detached: + return None + parent_ref = self._parent + if parent_ref is None: + return () + node = parent_ref() + if node is None: + return None + tokens = [self._key] + while True: + if node._detached: + return None + parent_ref = node._parent + if parent_ref is None: + break + tokens.append(node._key) + node = parent_ref() + if node is None: + return None + tokens.reverse() + return tuple(tokens) + + def _wrap(self, key: Hashable, value: Any) -> Any: """Wrap nested structures in proxies using duck typing.""" # Check cache first - it's faster than hasattr() calls - if key in self._proxies: - return self._proxies[key] + proxies = self._proxies + if proxies is not None and key in proxies: + return proxies[key] # Use duck typing to support observ reactive objects and other proxies if hasattr(value, "keys"): # dict-like - proxy = DictProxy(value, self._recorder, self._path.append(key)) - self._proxies[key] = proxy - return proxy + proxy = DictProxy(value, self._recorder, self, key) elif hasattr(value, "append"): # list-like - proxy = ListProxy(value, self._recorder, self._path.append(key)) - self._proxies[key] = proxy - return proxy + proxy = ListProxy(value, self._recorder, self, key) elif hasattr(value, "add") and hasattr(value, "discard"): # set-like - proxy = SetProxy(value, self._recorder, self._path.append(key)) - self._proxies[key] = proxy - return proxy - return value + proxy = SetProxy(value, self._recorder, self, key) + else: + return value + if proxies is None: + proxies = self._proxies = {} + proxies[key] = proxy + return proxy + + def _detach(self, key: Hashable) -> None: + """Detach the handed-out child proxy for key, if any. + + Only called when self._proxies is non-empty (callers guard). + """ + proxy = self._proxies.pop(key, None) + if proxy is not None: + proxy._detached = True + + def _detach_all(self) -> None: + if not self._proxies: + return + for proxy in self._proxies.values(): + proxy._detached = True + self._proxies.clear() + + def _in_parent_chain(self, proxy: "_Proxy") -> bool: + """True when proxy is self or an ancestor of self.""" + node = self + while node is not None: + if node is proxy: + return True + parent_ref = node._parent + node = parent_ref() if parent_ref is not None else None + return False + + def _adopt(self, key: Hashable, value: Any) -> Any: + """Prepare value for storing at self._data[key], returning plain data. + + A detached proxy passed directly is re-attached at the new location + (move semantics): the held reference stays live and later mutations + through it record at the new path. A proxy that is still attached + elsewhere is copied (a value can only live in one place), as are + proxies nested inside plain containers. + """ + cls = value.__class__ + if cls is DictProxy or cls is ListProxy or cls is SetProxy: + if ( + value._detached + # only re-attach plain data; duck-typed data (e.g. observ + # proxies) is copied to plain data instead + and value._data.__class__ in (dict, list, set) + # guard against creating a cycle by adopting an ancestor + and not self._in_parent_chain(value) + ): + value._detached = False + value._parent = ref(self) + value._key = key + if self._proxies is None: + self._proxies = {} + self._proxies[key] = value + return value._data + return _snapshot(value._data) + return _unwrap(value) + + +class DictProxy(_Proxy): + """Proxy for dict objects that tracks mutations and generates patches.""" + + __slots__ = () def __getitem__(self, key: Any) -> Any: value = self._data[key] return self._wrap(key, value) def __setitem__(self, key: Any, value: Any) -> None: - path = self._path.append(key) - if key in self._data: - old_value = self._data[key] - self._recorder.record_replace(path, old_value, value) - else: - self._recorder.record_add(path, value) + # The current value for this key is being replaced, so a previously + # handed-out proxy for it no longer lives in the draft + if self._proxies: + self._detach(key) + if value.__class__ not in _SCALAR_TYPES: + value = self._adopt(key, value) + tokens = self._location() + if tokens is not None: + path = Pointer((*tokens, key)) + if key in self._data: + self._recorder.record_replace(path, self._data[key], value) + else: + self._recorder.record_add(path, value) self._data[key] = value - # Invalidate proxy cache for this key - if key in self._proxies: - del self._proxies[key] def __delitem__(self, key: Any) -> None: old_value = self._data[key] - path = self._path.append(key) - self._recorder.record_remove(path, old_value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, key)), old_value) del self._data[key] - # Invalidate proxy cache for this key - if key in self._proxies: - del self._proxies[key] + if self._proxies: + self._detach(key) def get(self, key: Any, default=None): if key in self._data: return self[key] return default - def pop(self, key: Any, default=None): + def pop(self, key: Any, default=_MISSING): if key in self._data: old_value = self._data[key] - path = self._path.append(key) - self._recorder.record_remove(path, old_value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, key)), old_value) result = self._data.pop(key) - # Invalidate proxy cache for this key - if key in self._proxies: - del self._proxies[key] + if self._proxies: + self._detach(key) return result - elif default: - return default - else: + if default is _MISSING: raise KeyError(key) + return default def setdefault(self, key: Any, default=None): if key not in self._data: self[key] = default - return default + # Return through __getitem__ so containers come back proxied and + # further mutations on the returned value are tracked return self[key] def update(self, *args, **kwargs): @@ -186,34 +412,25 @@ def update(self, *args, **kwargs): items.extend(other) items.extend(kwargs.items()) - # Generate patches and update data for key, value in items: - path = self._path.append(key) - if key in self._data: - old_value = self._data[key] - self._recorder.record_replace(path, old_value, value) - else: - self._recorder.record_add(path, value) - self._data[key] = value - # Invalidate proxy cache for this key - if key in self._proxies: - del self._proxies[key] + self[key] = value def clear(self): # Generate patches for all keys and clear data - for key, value in list(self._data.items()): - path = self._path.append(key) - self._recorder.record_remove(path, value) + tokens = self._location() + if tokens is not None: + for key, value in self._data.items(): + self._recorder.record_remove(Pointer((*tokens, key)), value) self._data.clear() - self._proxies.clear() + self._detach_all() def popitem(self): key, value = self._data.popitem() - path = self._path.append(key) - self._recorder.record_remove(path, value) - # Invalidate proxy cache for this key - if key in self._proxies: - del self._proxies[key] + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, key)), value) + if self._proxies: + self._detach(key) return key, value def values(self): @@ -260,38 +477,23 @@ def __ior__(self, other): # - __lt__, __le__, __gt__, __ge__: dicts don't support ordering comparisons -class ListProxy: +class ListProxy(_Proxy): """Proxy for list objects that tracks mutations and generates patches.""" - __slots__ = ("_data", "_path", "_proxies", "_recorder") - __hash__ = None # lists are unhashable + __slots__ = () - def __init__(self, data: List, recorder: PatchRecorder, path: Pointer): - self._data = data - self._recorder = recorder - self._path = path - self._proxies = {} - - def _wrap(self, index: int, value: Any) -> Any: - """Wrap nested structures in proxies using duck typing.""" - # Check cache first - it's faster than hasattr() calls - if index in self._proxies: - return self._proxies[index] - - # Use duck typing to support observ reactive objects and other proxies - if hasattr(value, "keys"): # dict-like - proxy = DictProxy(value, self._recorder, self._path.append(index)) - self._proxies[index] = proxy - return proxy - elif hasattr(value, "append"): # list-like - proxy = ListProxy(value, self._recorder, self._path.append(index)) - self._proxies[index] = proxy - return proxy - elif hasattr(value, "add") and hasattr(value, "discard"): # set-like - proxy = SetProxy(value, self._recorder, self._path.append(index)) - self._proxies[index] = proxy - return proxy - return value + def _shift_cache(self, start: int, delta: int) -> None: + """Reindex handed-out child proxies at indices >= start by delta, + after elements shifted in the underlying list.""" + if not self._proxies: + return + shifted = {} + for index, proxy in self._proxies.items(): + if index >= start: + index += delta + proxy._key = index + shifted[index] = proxy + self._proxies = shifted def __getitem__(self, index: Union[int, slice]) -> Any: value = self._data[index] @@ -309,135 +511,180 @@ def __setitem__(self, index: Union[int, slice], value: Any) -> None: if isinstance(index, slice): # Handle slice assignment with proper patch generation start, stop, step = index.indices(len(self._data)) + tokens = self._location() if step != 1: # Step slices must have same length old_values = self._data[index] - if len(old_values) != len(value): + new_values = list(value) + if len(old_values) != len(new_values): raise ValueError( - f"attempt to assign sequence of size {len(value)} " + f"attempt to assign sequence of size {len(new_values)} " f"to extended slice of size {len(old_values)}" ) # Replace each element in the stepped slice - for i, (idx, new_val) in enumerate( - zip(range(start, stop, step), value) - ): - path = self._path.append(idx) + for idx, new_val in zip(range(start, stop, step), new_values): + if self._proxies: + self._detach(idx) + if new_val.__class__ not in _SCALAR_TYPES: + new_val = self._adopt(idx, new_val) old_val = self._data[idx] - self._recorder.record_replace(path, old_val, new_val) + if tokens is not None: + self._recorder.record_replace( + Pointer((*tokens, idx)), old_val, new_val + ) self._data[idx] = new_val else: # Contiguous slice - can change length old_values = list(self._data[start:stop]) + old_len = len(old_values) new_values = list(value) + new_len = len(new_values) + + # Replaced positions lose their proxies; the tail shifts + if self._proxies: + for i in range(start, stop): + self._detach(i) + self._shift_cache(stop, new_len - old_len) + new_values = [ + item + if item.__class__ in _SCALAR_TYPES + else self._adopt(start + i, item) + for i, item in enumerate(new_values) + ] # Perform the slice assignment self._data[start:stop] = new_values - # Generate patches for the changes - old_len = len(old_values) - new_len = len(new_values) - - # Replace common elements - for i in range(min(old_len, new_len)): - if old_values[i] != new_values[i]: - path = self._path.append(start + i) - self._recorder.record_replace( - path, old_values[i], new_values[i] - ) - - # Add new elements if new slice is longer - if new_len > old_len: - for i in range(old_len, new_len): - path = self._path.append(start + i) - self._recorder.record_add(path, new_values[i]) - - # Remove extra elements if new slice is shorter - elif new_len < old_len: - # Remove from end to start to maintain correct indices - for i in range(old_len - 1, new_len - 1, -1): - path = self._path.append(start + i) - self._recorder.record_remove(path, old_values[i]) - - # Invalidate all proxy caches as indices may have shifted - self._proxies.clear() + if tokens is not None: + # Replace common elements + for i in range(min(old_len, new_len)): + if old_values[i] != new_values[i]: + self._recorder.record_replace( + Pointer((*tokens, start + i)), + old_values[i], + new_values[i], + ) + + # Add new elements if new slice is longer + if new_len > old_len: + for i in range(old_len, new_len): + self._recorder.record_add( + Pointer((*tokens, start + i)), new_values[i] + ) + + # Remove extra elements if new slice is shorter + elif new_len < old_len: + # Remove from end to start to maintain correct indices + for i in range(old_len - 1, new_len - 1, -1): + self._recorder.record_remove( + Pointer((*tokens, start + i)), old_values[i] + ) return # Resolve negative indices to positive for correct paths if index < 0: index = len(self._data) + index - path = self._path.append(index) + if self._proxies: + self._detach(index) + if value.__class__ not in _SCALAR_TYPES: + value = self._adopt(index, value) old_value = self._data[index] - self._recorder.record_replace(path, old_value, value) + tokens = self._location() + if tokens is not None: + self._recorder.record_replace(Pointer((*tokens, index)), old_value, value) self._data[index] = value - # Invalidate proxy cache for this index - if index in self._proxies: - del self._proxies[index] def __delitem__(self, index: Union[int, slice]) -> None: if isinstance(index, slice): # Handle slice deletion with proper patch generation start, stop, step = index.indices(len(self._data)) + tokens = self._location() if step != 1: # For step slices, delete from end to start to maintain indices indices = list(range(start, stop, step)) for idx in reversed(indices): old_value = self._data[idx] - path = self._path.append(idx) - self._recorder.record_remove(path, old_value) + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, idx)), old_value) del self._data[idx] + if self._proxies: + self._detach(idx) + self._shift_cache(idx + 1, -1) else: # Contiguous slice - delete from end to start old_values = list(self._data[start:stop]) - for i in range(len(old_values) - 1, -1, -1): - old_value = old_values[i] - path = self._path.append(start + i) - self._recorder.record_remove(path, old_value) + if tokens is not None: + for i in range(len(old_values) - 1, -1, -1): + self._recorder.record_remove( + Pointer((*tokens, start + i)), old_values[i] + ) del self._data[start:stop] - - # Invalidate all proxy caches as indices shifted - self._proxies.clear() + if self._proxies: + for i in range(start, stop): + self._detach(i) + self._shift_cache(stop, start - stop) return # Resolve negative indices to positive for correct paths if index < 0: index = len(self._data) + index old_value = self._data[index] - path = self._path.append(index) - self._recorder.record_remove(path, old_value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, index)), old_value) del self._data[index] - # Invalidate all proxy caches as indices shift - self._proxies.clear() + if self._proxies: + self._detach(index) + self._shift_cache(index + 1, -1) def append(self, value: Any) -> None: - # Forward patch uses "-" (append to end), reverse patch uses actual index - forward_path = self._path.append("-") - reverse_path = self._path.append(len(self._data)) - self._recorder.record_add(forward_path, value, reverse_path) + if value.__class__ not in _SCALAR_TYPES: + value = self._adopt(len(self._data), value) + tokens = self._location() + if tokens is not None: + # Forward patch uses "-" (append to end), reverse patch uses + # the actual index + forward_path = Pointer((*tokens, "-")) + reverse_path = Pointer((*tokens, len(self._data))) + self._recorder.record_add(forward_path, value, reverse_path) self._data.append(value) def insert(self, index: int, value: Any) -> None: - # Use the index for insertion - path = self._path.append(index) - self._recorder.record_add(path, value) + # Normalize the index the same way list.insert does (clamped), so + # the recorded path and the cache shift match the actual insertion + n = len(self._data) + if index < 0: + index = max(0, n + index) + else: + index = min(index, n) + if self._proxies: + self._shift_cache(index, 1) + if value.__class__ not in _SCALAR_TYPES: + value = self._adopt(index, value) + tokens = self._location() + if tokens is not None: + self._recorder.record_add(Pointer((*tokens, index)), value) self._data.insert(index, value) - # Invalidate all proxy caches as indices shift - self._proxies.clear() def pop(self, index: int = -1) -> Any: if index < 0: index = len(self._data) + index old_value = self._data[index] - path = self._path.append(index) - # If popping from the end, the reverse (add) operation should use "-" to append - # rather than a specific index, since the index may not exist when reversing - is_last = index == len(self._data) - 1 - reverse_path = self._path.append("-") if is_last else None - self._recorder.record_remove(path, old_value, reverse_path) + tokens = self._location() + if tokens is not None: + path = Pointer((*tokens, index)) + # If popping from the end, the reverse (add) operation should use + # "-" to append rather than a specific index, since the index may + # not exist when reversing + is_last = index == len(self._data) - 1 + reverse_path = Pointer((*tokens, "-")) if is_last else None + self._recorder.record_remove(path, old_value, reverse_path) result = self._data.pop(index) - # Invalidate all proxy caches as indices shift - self._proxies.clear() + if self._proxies: + self._detach(index) + self._shift_cache(index + 1, -1) return result def remove(self, value: Any) -> None: @@ -445,23 +692,34 @@ def remove(self, value: Any) -> None: del self[index] def clear(self) -> None: - # Generate patches for all elements (from end to start for correct indices) - # All reverse patches use "-" to append, since we're restoring to an empty list - reverse_path = self._path.append("-") - for i in range(len(self._data) - 1, -1, -1): - path = self._path.append(i) - self._recorder.record_remove(path, self._data[i], reverse_path) + # Generate patches for all elements (from end to start for correct + # indices). All reverse patches use "-" to append, since we're + # restoring to an empty list + tokens = self._location() + if tokens is not None: + reverse_path = Pointer((*tokens, "-")) + for i in range(len(self._data) - 1, -1, -1): + self._recorder.record_remove( + Pointer((*tokens, i)), self._data[i], reverse_path + ) self._data.clear() - self._proxies.clear() + self._detach_all() def extend(self, values): # Generate patches and extend data - values_list = list(values) start_index = len(self._data) - for i, value in enumerate(values_list): - forward_path = self._path.append("-") - reverse_path = self._path.append(start_index + i) - self._recorder.record_add(forward_path, value, reverse_path) + values_list = [ + value + if value.__class__ in _SCALAR_TYPES + else self._adopt(start_index + i, value) + for i, value in enumerate(values) + ] + tokens = self._location() + if tokens is not None: + forward_path = Pointer((*tokens, "-")) + for i, value in enumerate(values_list): + reverse_path = Pointer((*tokens, start_index + i)) + self._recorder.record_add(forward_path, value, reverse_path) self._data.extend(values_list) def reverse(self) -> None: @@ -469,16 +727,25 @@ def reverse(self) -> None: n = len(self._data) # Reverse the underlying data self._data.reverse() + # Reindex handed-out child proxies to their new positions + if self._proxies: + remapped = {} + for index, proxy in self._proxies.items(): + new_index = n - 1 - index + proxy._key = new_index + remapped[new_index] = proxy + self._proxies = remapped # Generate patches for each changed position # After reverse, element at position i came from position n-1-i - for i in range(n): - old_value = self._data[n - 1 - i] - new_value = self._data[i] - if old_value != new_value: - path = self._path.append(i) - self._recorder.record_replace(path, old_value, new_value) - # Invalidate all proxy caches as positions changed - self._proxies.clear() + tokens = self._location() + if tokens is not None: + for i in range(n): + old_value = self._data[n - 1 - i] + new_value = self._data[i] + if old_value != new_value: + self._recorder.record_replace( + Pointer((*tokens, i)), old_value, new_value + ) def sort(self, *args, **kwargs) -> None: """Sort the list in place and generate appropriate patches.""" @@ -486,13 +753,26 @@ def sort(self, *args, **kwargs) -> None: old_list = list(self._data) # Sort the underlying data self._data.sort(*args, **kwargs) + # Reindex handed-out child proxies by following their element's + # identity to its new position + if self._proxies: + positions = {} + for i, item in enumerate(self._data): + positions.setdefault(id(item), []).append(i) + remapped = {} + for _index, proxy in sorted(self._proxies.items()): + new_index = positions[id(proxy._data)].pop(0) + proxy._key = new_index + remapped[new_index] = proxy + self._proxies = remapped # Generate patches for each changed position - for i in range(len(self._data)): - if i < len(old_list) and old_list[i] != self._data[i]: - path = self._path.append(i) - self._recorder.record_replace(path, old_list[i], self._data[i]) - # Invalidate all proxy caches as positions changed - self._proxies.clear() + tokens = self._location() + if tokens is not None: + for i in range(len(self._data)): + if old_list[i] != self._data[i]: + self._recorder.record_replace( + Pointer((*tokens, i)), old_list[i], self._data[i] + ) def __iter__(self): """Iterate over list elements, wrapping nested structures in proxies.""" @@ -549,57 +829,57 @@ def __imul__(self, n): # - __class_getitem__: typing support (list[int]), not relevant for instances -class SetProxy: - """Proxy for set objects that tracks mutations and generates patches.""" +class SetProxy(_Proxy): + """Proxy for set objects that tracks mutations and generates patches. - __slots__ = ("_data", "_path", "_recorder") - __hash__ = None # sets are unhashable + Set members must be hashable, so they are never proxied and never + contain proxies; the registry of child proxies stays unused. + """ - def __init__(self, data: Set, recorder: PatchRecorder, path: Pointer): - self._data = data - self._recorder = recorder - self._path = path + __slots__ = () def add(self, value: Any) -> None: if value not in self._data: - path = self._path.append("-") - reverse_path = self._path.append(value) - self._recorder.record_add(path, value, reverse_path) + tokens = self._location() + if tokens is not None: + path = Pointer((*tokens, "-")) + reverse_path = Pointer((*tokens, value)) + self._recorder.record_add(path, value, reverse_path) self._data.add(value) def remove(self, value: Any) -> None: - path = self._path.append(value) - self._recorder.record_remove(path, value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, value)), value) self._data.remove(value) def discard(self, value: Any) -> None: if value in self._data: - path = self._path.append(value) - self._recorder.record_remove(path, value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, value)), value) self._data.discard(value) def pop(self) -> Any: value = self._data.pop() - path = self._path.append(value) - self._recorder.record_remove(path, value) + tokens = self._location() + if tokens is not None: + self._recorder.record_remove(Pointer((*tokens, value)), value) return value def clear(self) -> None: # Generate patches for all values and clear data - for value in list(self._data): - path = self._path.append(value) - self._recorder.record_remove(path, value) + tokens = self._location() + if tokens is not None: + for value in self._data: + self._recorder.record_remove(Pointer((*tokens, value)), value) self._data.clear() def update(self, *others): # Generate patches and update data for other in others: for value in other: - if value not in self._data: - path = self._path.append("-") - reverse_path = self._path.append(value) - self._recorder.record_add(path, value, reverse_path) - self._data.add(value) + self.add(value) def __ior__(self, other): """Implement |= operator (union update).""" @@ -742,26 +1022,34 @@ def produce( # Don't unwrap or copy - use the base object as-is draft = base else: - # Create a deep copy of the base object - draft = deepcopy(base) + # Create a deep copy of the base object. _snapshot copies the + # common JSON-like types directly (faster than copy.deepcopy); + # unknown types (e.g. observ proxies) fall back to deepcopy. + # Note: unlike deepcopy, _snapshot does not preserve shared + # references within the base; aliased subtrees become independent + # copies in the draft (which is what patches can express anyway). + draft = _snapshot(base) # Create a patch recorder recorder = PatchRecorder() # Wrap the draft in a proxy using duck typing (similar to diff()) # This allows compatibility with observ reactive objects and other proxies - path = Pointer() if hasattr(draft, "keys"): # dict-like - proxy = DictProxy(draft, recorder, path) + proxy = DictProxy(draft, recorder) elif hasattr(draft, "append"): # list-like - proxy = ListProxy(draft, recorder, path) + proxy = ListProxy(draft, recorder) elif hasattr(draft, "add"): # set-like - proxy = SetProxy(draft, recorder, path) + proxy = SetProxy(draft, recorder) else: raise TypeError(f"Unsupported type for produce: {type(draft)}") # Call the recipe function with the proxy recipe(proxy) + # Put the reverse patches in reverse application order and release + # all proxies (breaks their reference cycles) + recorder.finalize(proxy) + # Return the mutated draft and the patches return draft, recorder.patches, recorder.reverse_patches diff --git a/tests/test_produce_core.py b/tests/test_produce_core.py index 9990e78..12220b5 100644 --- a/tests/test_produce_core.py +++ b/tests/test_produce_core.py @@ -917,3 +917,59 @@ def recipe(draft): } assert apply({}, patches) == result assert apply(result, reverse) == {} + + def test_tuple_and_frozenset_values_are_snapshotted(self): + """Tuples and frozensets in patch values are copied per element.""" + + def recipe(draft): + draft["t"] = (1, [2, 3]) + draft["f"] = frozenset({1, 2}) + + result, patches, _reverse = produce({}, recipe) + assert result == {"t": (1, [2, 3]), "f": frozenset({1, 2})} + # The list inside the assigned tuple must not be shared with the + # patch value snapshot + result["t"][1].append(99) + assert apply({}, patches) == {"t": (1, [2, 3]), "f": frozenset({1, 2})} + + +class TestDeepcopyOfProxies: + """deepcopy of a proxy (e.g. taken inside a recipe) yields plain data.""" + + def test_deepcopy_of_draft_and_children_yields_plain_data(self): + import copy + + base = {"d": {"x": 1}, "l": [1, 2], "s": {1, 2}} + captured = {} + + def recipe(draft): + captured["root"] = copy.deepcopy(draft) + captured["dict"] = copy.deepcopy(draft["d"]) + captured["list"] = copy.deepcopy(draft["l"]) + captured["set"] = copy.deepcopy(draft["s"]) + + produce(base, recipe) + + assert type(captured["root"]) is dict + assert captured["root"] == base + assert type(captured["dict"]) is dict + assert type(captured["list"]) is list + assert type(captured["set"]) is set + # The copies must be independent of the draft + assert captured["root"]["d"] is not base["d"] + + +def test_snapshot_unwraps_proxy_passed_directly(): + """_snapshot must unwrap proxies itself (defense in depth: write paths + pre-unwrap values, but a future write path that forgets must not leak + proxies into patches).""" + from patchdiff.produce import PatchRecorder, _snapshot + + recorder = PatchRecorder() + proxy = DictProxy({"x": [1]}, recorder) + + snap = _snapshot(proxy) + + assert type(snap) is dict + assert snap == {"x": [1]} + assert snap["x"] is not proxy._data["x"] diff --git a/tests/test_produce_dict.py b/tests/test_produce_dict.py index f81504c..8ad468e 100644 --- a/tests/test_produce_dict.py +++ b/tests/test_produce_dict.py @@ -271,6 +271,23 @@ def recipe(draft): assert len(patches) == 0 # No mutations +def test_dict_pop_with_falsy_default(): + """Test pop() with falsy defaults (None, 0, "") returns the default + instead of raising KeyError.""" + base = {"a": 1} + + def recipe(draft): + assert draft.pop("missing", None) is None + assert draft.pop("missing", 0) == 0 + assert draft.pop("missing", "") == "" + assert draft.pop("missing", False) is False + + result, patches, _reverse = produce(base, recipe) + + assert result == {"a": 1} + assert len(patches) == 0 # No mutations + + def test_dict_pop_missing_key_no_default(): """Test pop() with missing key and no default raises KeyError.""" base = {"a": 1} diff --git a/tests/test_produce_live_paths.py b/tests/test_produce_live_paths.py new file mode 100644 index 0000000..117d9f1 --- /dev/null +++ b/tests/test_produce_live_paths.py @@ -0,0 +1,421 @@ +"""Tests for live path tracking of proxies handed out by produce(). + +Proxies track their location through parent links, so a held reference +stays correct while the tree changes around it: list shifts reindex it, +removal detaches it (mutations are no longer recorded but still applied +to its data), and assigning a detached proxy back into the draft +re-attaches it at its new location (move semantics). +""" + +import copy + +from patchdiff import apply, produce +from patchdiff.produce import DictProxy, ListProxy, SetProxy + +PROXY_TYPES = (DictProxy, ListProxy, SetProxy) + + +def assert_no_proxies(value, path="$"): + """Recursively assert that value contains no proxy objects.""" + assert not isinstance(value, PROXY_TYPES), f"proxy leaked at {path}: {value!r}" + if isinstance(value, dict): + for key, item in value.items(): + assert_no_proxies(item, f"{path}.{key}") + elif isinstance(value, (list, tuple, set)): + for i, item in enumerate(value): + assert_no_proxies(item, f"{path}[{i}]") + + +def assert_clean_roundtrip(base, recipe): + """Run produce() and assert results/patches are proxy-free and consistent.""" + base_copy = copy.deepcopy(base) + + result, patches, reverse = produce(base, recipe) + + assert_no_proxies(result, "result") + for i, patch in enumerate(patches): + if "value" in patch: + assert_no_proxies(patch["value"], f"patches[{i}].value") + for i, patch in enumerate(reverse): + if "value" in patch: + assert_no_proxies(patch["value"], f"reverse[{i}].value") + + applied = apply(base_copy, patches) + assert applied == result, f"patches do not reproduce result: {patches}" + + reverted = apply(result, reverse) + assert reverted == base_copy, f"reverse patches do not restore base: {reverse}" + + return result, patches, reverse + + +def scene_tree(): + return {"children": [{"title": "a"}, {"title": "b"}, {"title": "c"}]} + + +# -- Re-attachment (move semantics) -- + + +def test_move_via_pop_and_setitem_keeps_reference_live(): + """Assigning a detached proxy directly re-attaches it: later mutations + through the held reference are recorded at the new path.""" + + def recipe(draft): + item = draft["children"][-1] + draft["children"].pop() + draft["moved"] = item + item["title"] = "moved" # must record at /moved/title + + result, patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["moved"] == {"title": "moved"} + assert "/moved/title" in [str(p["path"]) for p in patches] + + +def test_move_via_append_keeps_reference_live(): + """Appending a detached proxy directly re-attaches it at its new index.""" + + def recipe(draft): + item = draft["children"][0] + del draft["children"][0] + draft["children"].append(item) + item["title"] = "first-to-last" # must record at /children/2/title + + result, patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["children"][-1] == {"title": "first-to-last"} + assert "/children/2/title" in [str(p["path"]) for p in patches] + + +def test_reattached_subtree_children_are_live_again(): + """Child proxies of a detached subtree become live again when the + subtree is re-attached: their paths recompute through the new location.""" + base = {"group": {"items": [{"x": 1}]}, "out": []} + + def recipe(draft): + group = draft["group"] + items = group["items"] + del draft["group"] # detaches group and, transitively, items + items.append({"x": 2}) # not recorded; carried by the re-attach + draft["out"].append(group) # re-attach at /out/0 + items[0]["x"] = 99 # must record at /out/0/items/0/x + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result["out"][0]["items"] == [{"x": 99}, {"x": 2}] + assert "/out/0/items/0/x" in [str(p["path"]) for p in patches] + + +def test_self_assignment_keeps_reference_live(): + """draft["a"] = draft["a"] is a no-op that must not orphan the proxy.""" + base = {"a": {"x": 1}} + + def recipe(draft): + p = draft["a"] + draft["a"] = p + p["x"] = 5 # must still record at /a/x + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {"a": {"x": 5}} + assert [str(p["path"]) for p in patches] == ["/a/x"] + + +def test_attached_proxy_assignment_copies(): + """A proxy still attached elsewhere is copied, not aliased: mutations + through the original afterwards only affect the original path.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] # copy: "a" is still attached + draft["a"]["x"] = 42 + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {"a": {"x": 42}, "b": {"x": 1}} + + +def test_detached_proxy_cannot_become_its_own_descendant(): + """Adopting a detached ancestor into its own subtree must not create a + parent-link cycle (it falls back to copying).""" + base = {"a": {"b": {}}} + + def recipe(draft): + a = draft["a"] + del draft["a"] + a["self"] = a # would create a cycle; must copy instead + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {} + + +# -- Detached proxies stop recording -- + + +def test_mutation_through_replaced_value_not_recorded(): + """Replacing a key detaches the old value's proxy; mutations through + the held reference no longer affect the draft or the patches.""" + base = {"a": {"x": 1}} + + def recipe(draft): + p = draft["a"] + draft["a"] = {"x": 2} + p["x"] = 99 # detached: not recorded, not in result + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {"a": {"x": 2}} + assert len(patches) == 1 + + +def test_mutation_through_cleared_container_not_recorded(): + base = {"items": [{"x": 1}]} + + def recipe(draft): + p = draft["items"][0] + draft["items"].clear() + p["x"] = 99 # detached: not recorded + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {"items": []} + assert all("99" not in str(p) for p in patches) + + +def test_dead_ancestor_reference_treated_as_detached(): + """Parent links are weak references. When a detached ancestor is + garbage collected, proxies below it must behave as detached: + mutations apply to their data but are not recorded.""" + base = {"a": {"b": {"c": {"x": 1}}}} + + def recipe(draft): + b = draft["a"]["b"] # keep the middle proxy alive + c = b["c"] + del draft["a"] # detaches the "a" proxy; nothing else holds it + c["x"] = 99 # the "a" proxy is collected: must not record + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {} + assert len(patches) == 1 # only the remove + + +def test_mutation_through_child_of_detached_subtree_not_recorded(): + """Detachment is transitive: children of a removed subtree must not + record either.""" + base = {"group": {"items": [{"x": 1}]}} + + def recipe(draft): + items = draft["group"]["items"] + del draft["group"] + items.append({"x": 2}) # not recorded + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {} + assert len(patches) == 1 # only the remove + + +# -- Held proxies survive index shifts -- + + +def test_proxy_live_after_multiple_inserts_and_dels(): + def recipe(draft): + p = draft["children"][1] # "b" + draft["children"].insert(0, {"title": "x"}) # b -> 2 + draft["children"].insert(0, {"title": "y"}) # b -> 3 + del draft["children"][1] # b -> 2 + p["title"] = "B" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["y", "a", "B", "c"] + + +def test_proxy_live_after_slice_assignment_shift(): + base = {"items": [{"t": "a"}, {"t": "b"}, {"t": "c"}, {"t": "d"}]} + + def recipe(draft): + p = draft["items"][3] + draft["items"][1:3] = [{"t": "x"}] # shrink by one: p moves to 2 + p["t"] = "D" + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["t"] for i in result["items"]] == ["a", "x", "D"] + + +def test_proxy_live_after_slice_deletion(): + base = {"items": [{"t": "a"}, {"t": "b"}, {"t": "c"}, {"t": "d"}]} + + def recipe(draft): + p = draft["items"][3] + del draft["items"][0:2] # p moves to 1 + p["t"] = "D" + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["t"] for i in result["items"]] == ["c", "D"] + + +def test_proxy_replaced_by_slice_assignment_is_detached(): + base = {"items": [{"t": "a"}, {"t": "b"}]} + + def recipe(draft): + p = draft["items"][0] + draft["items"][0:1] = [{"t": "x"}] + p["t"] = "A" # detached: not recorded + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["t"] for i in result["items"]] == ["x", "b"] + + +def test_step_slice_assignment_detaches_replaced_proxies(): + base = {"items": [{"t": "a"}, {"t": "b"}, {"t": "c"}, {"t": "d"}]} + + def recipe(draft): + p0 = draft["items"][0] + p1 = draft["items"][1] + draft["items"][::2] = [{"t": "A"}, {"t": "C"}] # replaces 0 and 2 + p0["t"] = "zzz" # detached: not recorded, not in result + p1["t"] = "B" # untouched by the slice: still live + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["t"] for i in result["items"]] == ["A", "B", "C", "d"] + + +def test_step_slice_deletion_shifts_held_proxies(): + base = {"items": [{"t": "a"}, {"t": "b"}, {"t": "c"}, {"t": "d"}, {"t": "e"}]} + + def recipe(draft): + p1 = draft["items"][1] + p3 = draft["items"][3] + del draft["items"][::2] # deletes 0, 2 and 4 + p1["t"] = "B" # now at index 0 + p3["t"] = "D" # now at index 1 + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["t"] for i in result["items"]] == ["B", "D"] + + +def test_proxy_live_after_reverse(): + def recipe(draft): + p = draft["children"][0] # "a" + draft["children"].reverse() # a moves to index 2 + p["title"] = "A" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["c", "b", "A"] + + +def test_proxy_live_after_sort(): + base = {"items": [{"k": 3}, {"k": 1}, {"k": 2}]} + + def recipe(draft): + p = draft["items"][0] # {"k": 3}, will move to index 2 + draft["items"].sort(key=lambda item: item["k"]) + p["k"] = 30 + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [i["k"] for i in result["items"]] == [1, 2, 30] + + +def test_proxy_live_after_pop_of_earlier_index(): + def recipe(draft): + p = draft["children"][2] + draft["children"].pop(0) # p moves to 1 + p["title"] = "C" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["b", "C"] + + +def test_proxy_live_after_remove_of_earlier_element(): + def recipe(draft): + p = draft["children"][1] + draft["children"].remove(draft["children"][0]) # p moves to 0 + p["title"] = "B" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["B", "c"] + + +# -- Proxy lifecycle after produce() returns -- + + +def test_produce_leaves_no_cyclic_garbage(): + """Proxies form parent/child reference cycles during the recipe; + produce() must break them on the way out so they are freed by + reference counting (cyclic garbage caused gc latency spikes).""" + import gc + + base = {"a": {"b": {"c": [{"x": 1}, {"x": 2}]}}} + + def recipe(draft): + draft["a"]["b"]["c"][0]["x"] = 99 + + gc.collect() + gc.disable() + try: + produce(base, recipe) + assert gc.collect() == 0 + finally: + gc.enable() + + +def test_proxies_are_inert_after_produce(): + """A proxy leaked out of the recipe must not record into the + already-returned patch lists.""" + captured = {} + + def recipe(draft): + captured["p"] = draft["a"] + draft["a"]["x"] = 2 + + _result, patches, reverse = produce({"a": {"x": 1}}, recipe) + + n_patches, n_reverse = len(patches), len(reverse) + captured["p"]["x"] = 99 # after produce: must not record + assert len(patches) == n_patches + assert len(reverse) == n_reverse + + +# -- Full regroup scenario from the scene-tree use case -- + + +def test_full_regroup_scenario(): + """Find items, remove them from their parents, group them under a new + node, and keep editing them afterwards through the held references.""" + base = { + "children": [ + {"title": "a", "children": []}, + {"title": "b", "children": []}, + {"title": "c", "children": []}, + ] + } + + def recipe(draft): + # Collect proxies first (iteration yields proxies) + to_group = [c for c in draft["children"] if c["title"] in ("b", "c")] + # Remove them from the root (reverse order keeps indices valid) + for child in reversed(to_group): + draft["children"].remove(child) + # Build the new group; nested detached proxies are copied with + # their current data + new_group = {"title": "Group", "children": to_group} + draft["children"].append(new_group) + # Edit through the draft afterwards + draft["children"][1]["children"][0]["title"] = "b!" + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert [c["title"] for c in result["children"]] == ["a", "Group"] + assert [c["title"] for c in result["children"][1]["children"]] == ["b!", "c"] diff --git a/tests/test_produce_proxy_leak.py b/tests/test_produce_proxy_leak.py new file mode 100644 index 0000000..e637a73 --- /dev/null +++ b/tests/test_produce_proxy_leak.py @@ -0,0 +1,402 @@ +"""Tests showing that proxies must never leak into the result or the patches. + +When a recipe reads a nested container from the draft, it receives a proxy +(DictProxy/ListProxy/SetProxy). If that proxy is then written back into the +draft (assignment, append, update, ...), the proxy object itself currently +ends up in the result data and in the recorded patch values. These tests +pin down the expected behavior: results and patches must only ever contain +plain data. +""" + +import json + +import pytest + +from patchdiff import apply, produce, to_json +from patchdiff.produce import DictProxy, ListProxy, SetProxy + +PROXY_TYPES = (DictProxy, ListProxy, SetProxy) + + +def assert_no_proxies(value, path="$"): + """Recursively assert that value contains no proxy objects.""" + assert not isinstance(value, PROXY_TYPES), f"proxy leaked at {path}: {value!r}" + if isinstance(value, dict): + for key, item in value.items(): + assert_no_proxies(item, f"{path}.{key}") + elif isinstance(value, (list, tuple, set)): + for i, item in enumerate(value): + assert_no_proxies(item, f"{path}[{i}]") + + +def assert_clean_roundtrip(base, recipe): + """Run produce() and assert results/patches are proxy-free and consistent.""" + import copy + + base_copy = copy.deepcopy(base) + + result, patches, reverse = produce(base, recipe) + + assert_no_proxies(result, "result") + for i, patch in enumerate(patches): + if "value" in patch: + assert_no_proxies(patch["value"], f"patches[{i}].value") + for i, patch in enumerate(reverse): + if "value" in patch: + assert_no_proxies(patch["value"], f"reverse[{i}].value") + + applied = apply(base_copy, patches) + assert applied == result, f"patches do not reproduce result: {patches}" + + reverted = apply(result, reverse) + assert reverted == base_copy, f"reverse patches do not restore base: {reverse}" + + return result, patches, reverse + + +def test_reassign_nested_dict_to_other_key(): + """Reading a nested dict and storing it under another key must store + plain data, not the proxy wrapper.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["b"]) is dict + assert type(patches[0]["value"]) is dict + assert result == {"a": {"x": 1}, "b": {"x": 1}} + + +def test_reassign_nested_list_to_other_key(): + base = {"nums": [1, 2, 3]} + + def recipe(draft): + draft["backup"] = draft["nums"] + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["backup"]) is list + assert type(patches[0]["value"]) is list + + +def test_reassign_nested_set_to_other_key(): + base = {"tags": {"a", "b"}} + + def recipe(draft): + draft["tags_copy"] = draft["tags"] + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["tags_copy"]) is set + assert type(patches[0]["value"]) is set + + +def test_append_nested_dict_to_list(): + """Appending a nested proxy to a list must not leak the proxy.""" + base = {"a": {"x": 1}, "items": []} + + def recipe(draft): + draft["items"].append(draft["a"]) + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["items"][0]) is dict + assert type(patches[0]["value"]) is dict + + +def test_insert_and_extend_with_proxies(): + base = {"a": {"x": 1}, "b": [1, 2], "items": [0]} + + def recipe(draft): + draft["items"].insert(0, draft["a"]) + draft["items"].extend([draft["b"]]) + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["items"][0]) is dict + assert type(result["items"][-1]) is list + + +def test_list_setitem_with_proxy(): + base = {"a": {"x": 1}, "items": [None]} + + def recipe(draft): + draft["items"][0] = draft["a"] + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["items"][0]) is dict + + +def test_list_slice_assignment_with_proxies(): + base = {"a": {"x": 1}, "items": [1, 2, 3]} + + def recipe(draft): + draft["items"][1:2] = [draft["a"], draft["a"]] + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["items"][1]) is dict + assert type(result["items"][2]) is dict + + +def test_tuple_containing_proxy_is_unwrapped(): + """Proxies hidden inside tuples must be unwrapped as well.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["pair"] = (draft["a"], 2) + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["pair"]) is tuple + assert type(result["pair"][0]) is dict + + +def test_dict_update_with_proxy_values(): + base = {"a": {"x": 1}} + + def recipe(draft): + draft.update({"b": draft["a"]}) + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["b"]) is dict + assert type(patches[0]["value"]) is dict + + +def test_dict_setdefault_with_proxy_default(): + base = {"a": {"x": 1}} + + def recipe(draft): + draft.setdefault("b", draft["a"]) + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert type(result["b"]) is dict + + +def test_move_item_between_containers(): + """A common 'move' idiom: read from one container, push into another.""" + base = {"todo": [{"task": "write tests"}], "done": []} + + def recipe(draft): + item = draft["todo"][0] + draft["done"].append(item) + del draft["todo"][0] + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert result == {"todo": [], "done": [{"task": "write tests"}]} + assert type(result["done"][0]) is dict + + +def test_iteration_yields_must_not_leak(): + """Values obtained via items()/values()/iteration are proxies; storing + them back must not leak.""" + base = {"groups": {"a": {"n": 1}, "b": {"n": 2}}, "flat": []} + + def recipe(draft): + for _key, value in draft["groups"].items(): + draft["flat"].append(value) + + result, _patches, _reverse = assert_clean_roundtrip(base, recipe) + + assert all(type(v) is dict for v in result["flat"]) + + +def test_reverse_patch_value_is_plain_after_delete_of_leaked_value(): + """Deleting a key whose value was assigned from a proxy must record a + plain value in the reverse (add) patch.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + del draft["b"] + + _result, _patches, reverse = assert_clean_roundtrip(base, recipe) + + for patch in reverse: + if "value" in patch: + assert_no_proxies(patch["value"], "reverse value") + + +def test_patches_are_json_serializable(): + """Patches containing leaked proxies cannot be serialized.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + + _result, patches, reverse = produce(base, recipe) + + # Must not raise + json.loads(to_json(patches)) + json.loads(to_json(reverse)) + + +def test_alias_then_mutate_records_consistent_patches(): + """After draft["b"] = draft["a"], mutating either path must produce + patches that, when applied to base, reproduce the result exactly — + no double-recording through stale proxy paths.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + draft["b"]["x"] = 99 + + result, patches, _reverse = assert_clean_roundtrip(base, recipe) + + # The mutation of "b" must be recorded exactly once + paths = [str(p["path"]) for p in patches] + assert paths.count("/b/x") <= 1 + assert "/a/x" not in paths or result["a"] == {"x": 99} + + +def test_assign_proxy_then_mutate_original_path(): + """Mutating via the original path after aliasing: patches must still + reproduce the result when applied to the base.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + draft["a"]["x"] = 42 + + assert_clean_roundtrip(base, recipe) + + +def test_result_values_usable_after_produce(): + """A leaked proxy in the result keeps recording into a dead recorder and + breaks basic expectations like isinstance checks and json dumps.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + + result, _patches, _reverse = produce(base, recipe) + + # The result must be plain data: serializable and isinstance-friendly + json.dumps(result) + assert isinstance(result["b"], dict) + + +# -- Scene-tree regrouping scenarios -- +# Moving items around in a hierarchical tree is a core use case. These +# patterns detach an item from one place and reattach it elsewhere, +# possibly nested inside freshly created plain containers, possibly with +# mutations while detached. + + +def scene_tree(): + return {"children": [{"title": "a"}, {"title": "b"}, {"title": "c"}]} + + +def test_regroup_pop_then_append(): + """pop() returns raw data, so this works today — guard against regression.""" + + def recipe(draft): + item = draft["children"].pop(-1) + new_group = {"title": "New group", "children": [item]} + draft["children"].append(new_group) + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["children"][-1] == { + "title": "New group", + "children": [{"title": "c"}], + } + + +def test_regroup_find_then_move(): + """Find an item by iterating (yields proxies), remove it, and nest it + inside a freshly built plain dict. The proxy hides inside plain + containers, so unwrapping must be recursive.""" + + def recipe(draft): + for child in draft["children"]: + if child["title"] == "c": + draft["children"].remove(child) + new_group = {"title": "New group", "children": [child]} + draft["children"].append(new_group) + break + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["children"][-1]["children"] == [{"title": "c"}] + assert type(result["children"][-1]["children"][0]) is dict + + +def test_regroup_getitem_then_pop_then_append(): + """Item read via __getitem__ (a proxy) before being popped, then + reattached inside a new group.""" + + def recipe(draft): + item = draft["children"][-1] + draft["children"].pop() + draft["children"].append({"title": "G", "children": [item]}) + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["children"][-1]["children"] == [{"title": "c"}] + + +def test_regroup_mutate_while_detached(): + """Mutating an item between detach and reattach. The mutation must not + be recorded against the stale (removed) path; the reattach snapshot + already carries the mutated state.""" + + def recipe(draft): + item = draft["children"][-1] + draft["children"].pop() + item["title"] = "moved" + draft["children"].append({"title": "G", "children": [item]}) + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert result["children"][-1]["children"] == [{"title": "moved"}] + + +def test_stale_index_path_after_insert(): + """A held proxy must not record against its old index after the list + shifted underneath it. Currently this silently corrupts patches: + replaying them renames the wrong element.""" + + def recipe(draft): + p = draft["children"][2] + draft["children"].insert(0, {"title": "new"}) + p["title"] = "renamed" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["new", "a", "b", "renamed"] + + +def test_stale_index_path_after_del(): + """Same as above but shifting the other way via a deletion.""" + + def recipe(draft): + p = draft["children"][2] + del draft["children"][0] + p["title"] = "renamed" + + result, _patches, _reverse = assert_clean_roundtrip(scene_tree(), recipe) + + assert [c["title"] for c in result["children"]] == ["b", "renamed"] + + +@pytest.mark.parametrize("in_place", [False, True]) +def test_no_proxy_leak_with_in_place_modes(in_place): + """The leak must not occur in either copy or in_place mode.""" + base = {"a": {"x": 1}} + + def recipe(draft): + draft["b"] = draft["a"] + + result, patches, _reverse = produce(base, recipe, in_place=in_place) + + assert_no_proxies(result, "result") + for patch in patches: + if "value" in patch: + assert_no_proxies(patch["value"], "patch value")