diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py index 252e42cf4..784c38125 100644 --- a/confluence-mdx/bin/reverse_sync/patch_builder.py +++ b/confluence-mdx/bin/reverse_sync/patch_builder.py @@ -19,7 +19,9 @@ sha256_text, SidecarEntry, build_mdx_line_range_index, + build_list_all_anchor_entries, ) +from reverse_sync.xhtml_normalizer import extract_plain_text from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml from mdx_to_storage.inline import convert_inline @@ -1140,19 +1142,46 @@ def _mark_used(block_id: str, m: BlockMapping): and (roundtrip_sidecar is not None or has_any_change) and (list_sidecar is None or mapping_via_v3_fallback or has_any_change) ) + # preserved anchor list (ac:image 포함, ac:link 미포함) without sidecar reconstruction: + # collapse_ws 후 old/new가 동일한 경우(마커 공백만 변경) text-level 패치가 no-op이 되므로 + # mapping의 XHTML에서 anchor entries를 추출하여 synthetic reconstruction으로 재생성 + should_replace_preserved_anchor_list = False + synthetic_list_sidecar: Optional[SidecarBlock] = None + _marker_ws_only = has_content_change and _old_plain == _new_plain + if (mapping is not None + and _marker_ws_only + and _contains_preserved_anchor_markup(mapping.xhtml_text) + and not _contains_preserved_link_markup(mapping.xhtml_text) + and not sidecar_block_requires_reconstruction(list_sidecar)): + anchor_items = build_list_all_anchor_entries(mapping.xhtml_text) + if anchor_items: + synthetic_list_sidecar = SidecarBlock( + block_index=0, + xhtml_xpath=mapping.xhtml_xpath, + xhtml_fragment=mapping.xhtml_text, + reconstruction={ + 'kind': 'list', + 'old_plain_text': extract_plain_text(mapping.xhtml_text), + 'ordered': mapping.xhtml_xpath.startswith('ol['), + 'items': anchor_items, + }, + ) + should_replace_preserved_anchor_list = True if (mapping is not None and ( # anchor case: sidecar anchor metadata가 있으면 ac: 포함 여부 무관 sidecar_block_requires_reconstruction(list_sidecar) # clean case: preserved anchor 없는 clean list or should_replace_clean_list + # preserved anchor list (ac:image only): synthetic sidecar로 재생성 + or should_replace_preserved_anchor_list )): _mark_used(mapping.block_id, mapping) patches.append( _build_replace_fragment_patch( mapping, change.new_block, - sidecar_block=list_sidecar, + sidecar_block=synthetic_list_sidecar if should_replace_preserved_anchor_list else list_sidecar, mapping_lost_info=mapping_lost_info, ) ) diff --git a/confluence-mdx/bin/reverse_sync/sidecar.py b/confluence-mdx/bin/reverse_sync/sidecar.py index 3e037a911..a4c4b13fd 100644 --- a/confluence-mdx/bin/reverse_sync/sidecar.py +++ b/confluence-mdx/bin/reverse_sync/sidecar.py @@ -371,7 +371,41 @@ def _walk_list(list_el, path: list, entries: list) -> None: _walk_list(child, current_path, entries) -def _build_list_anchor_entries(fragment: str) -> list: +def _walk_list_all_anchors(list_el, path: list, entries: list) -> None: + """list 요소를 재귀 순회하며 li 직접 자식 ac:image도 포함하여 anchor entry를 수집한다. + + _walk_list는 p 내부 ac:image만 수집하지만, 이 함수는 li의 직접 자식 + ac:image도 수집하여 synthetic reconstruction에 사용한다. + """ + from bs4 import NavigableString, Tag + items = [c for c in list_el.children if isinstance(c, Tag) and c.name == 'li'] + for idx, li in enumerate(items): + current_path = path + [idx] + li_text_offset = 0 + for child in li.children: + if isinstance(child, NavigableString): + li_text_offset += len(str(child).strip()) + continue + if not isinstance(child, Tag): + continue + if child.name == 'p': + for a in _extract_anchors_from_p(child): + entries.append({**a, 'path': current_path}) + li_text_offset += len(extract_plain_text(str(child))) + elif child.name == 'ac:image': + entries.append({ + 'kind': 'image', + 'offset': li_text_offset, + 'raw_xhtml': str(child), + 'path': current_path, + }) + elif child.name in ('ul', 'ol'): + _walk_list_all_anchors(child, current_path, entries) + else: + li_text_offset += len(extract_plain_text(str(child))) + + +def build_list_anchor_entries(fragment: str) -> list: """list fragment 내 li > p > ac:image를 path 기반 anchor entry로 추출한다. 각 entry: @@ -390,6 +424,22 @@ def _build_list_anchor_entries(fragment: str) -> list: return entries +def build_list_all_anchor_entries(fragment: str) -> list: + """list fragment 내 모든 ac:image를 anchor entry로 추출한다. + + build_list_anchor_entries와 달리 li 직접 자식 ac:image도 포함한다. + synthetic reconstruction 생성 시 사용한다. + """ + from bs4 import BeautifulSoup + soup = BeautifulSoup(fragment, 'html.parser') + root = soup.find(['ul', 'ol']) + if root is None: + return [] + entries = [] + _walk_list_all_anchors(root, [], entries) + return entries + + def _build_reconstruction_metadata( fragment: str, mapping: BlockMapping | None, @@ -406,7 +456,7 @@ def _build_reconstruction_metadata( metadata["anchors"] = _build_anchor_entries(fragment) elif mapping.type == "list": metadata["ordered"] = mapping.xhtml_xpath.startswith("ol[") - metadata["items"] = _build_list_anchor_entries(fragment) + metadata["items"] = build_list_anchor_entries(fragment) elif mapping.children: children_meta = [] for child_id in mapping.children: @@ -430,7 +480,7 @@ def _build_reconstruction_metadata( if anchors: child_data["anchors"] = anchors elif child_m.type == "list": - items = _build_list_anchor_entries(child_m.xhtml_text) + items = build_list_anchor_entries(child_m.xhtml_text) if items: child_data["items"] = items children_meta.append(child_data) diff --git a/confluence-mdx/tests/test_reverse_sync_patch_builder.py b/confluence-mdx/tests/test_reverse_sync_patch_builder.py index ef32f8ac6..ce9ed6c4f 100644 --- a/confluence-mdx/tests/test_reverse_sync_patch_builder.py +++ b/confluence-mdx/tests/test_reverse_sync_patch_builder.py @@ -2967,6 +2967,105 @@ def test_image_anchor_list_keeps_collapsed_text_diff(self): ) assert patches[0]['new_plain_text'] == '목록 좌측 상단에서 Delete 버튼을 클릭합니다.' + def test_image_anchor_list_marker_ws_only_produces_replace_fragment(self): + """마커 뒤 공백만 변경된 이미지 preserved anchor 리스트도 replace_fragment 패치를 생성한다.""" + xhtml = ( + '
  1. 목록 좌측 상단에서 Delete 버튼을 클릭합니다.

    ' + '' + '' + '

' + ) + old_content = ( + '4. 목록 좌측 상단에서 Delete 버튼을 클릭합니다.
\n' + '
\n' + ' img\n' + '
\n' + ) + new_content = ( + '4. 목록 좌측 상단에서 Delete 버튼을 클릭합니다.
\n' + '
\n' + ' img\n' + '
\n' + ) + change = _make_change(0, old_content, new_content, type_='list') + mapping = BlockMapping( + block_id='list-image-ws-1', + type='list', + xhtml_xpath='ol[1]', + xhtml_text=xhtml, + xhtml_plain_text='목록 좌측 상단에서 Delete 버튼을 클릭합니다.', + xhtml_element_index=0, + children=[], + ) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'ol[1]', xhtml, sha256_text(old_content), (1, 4)) + ]) + + patches, _, skipped = build_patches( + [change], [change.old_block], [change.new_block], + mappings=[mapping], + roundtrip_sidecar=roundtrip_sidecar, + ) + + assert len(patches) == 1, ( + f"마커 공백만 변경된 이미지 preserved anchor 리스트도 패치를 생성해야 합니다. " + f"skipped={skipped}" + ) + assert patches[0].get('action') == 'replace_fragment', ( + f"preserved anchor 리스트는 replace_fragment 패치여야 합니다: {patches[0]}" + ) + new_xhtml = patches[0]['new_element_xhtml'] + assert '
  • 좌측 상단에서 Delete 버튼을 클릭합니다.

    ' + '' + '' + '

  • ' + ) + old_content = ( + '4. 좌측 상단에서 Delete 버튼을 클릭합니다.
    \n' + '
    \n' + ' img\n' + '
    \n' + ) + new_content = ( + '4. 좌측 상단에서 Delete 버튼을 클릭합니다.
    \n' + '
    \n' + ' img\n' + '
    \n' + ) + change = _make_change(0, old_content, new_content, type_='list') + mapping = BlockMapping( + block_id='list-img-e2e', + type='list', + xhtml_xpath='ol[1]', + xhtml_text=xhtml, + xhtml_plain_text='좌측 상단에서 Delete 버튼을 클릭합니다.', + xhtml_element_index=0, + children=[], + ) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'ol[1]', xhtml, sha256_text(old_content), (1, 4)) + ]) + + patches, _, skipped = build_patches( + [change], [change.old_block], [change.new_block], + mappings=[mapping], + roundtrip_sidecar=roundtrip_sidecar, + ) + + assert len(patches) == 1, f"패치가 생성되어야 합니다. skipped={skipped}" + # patch_xhtml 적용하여 XHTML이 실제로 갱신되는지 확인 + patched = patch_xhtml(xhtml, patches) + assert '