Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion confluence-mdx/bin/reverse_sync/patch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
sha256_text,
SidecarEntry,
build_mdx_line_range_index,
build_list_all_anchor_entries,
)
from reverse_sync.xhtml_normalizer import extract_plain_text
from reverse_sync.lost_info_patcher import apply_lost_info, distribute_lost_info_to_mappings
from reverse_sync.mdx_to_xhtml_inline import mdx_block_to_xhtml_element, mdx_block_to_inner_xhtml
from mdx_to_storage.inline import convert_inline
Expand Down Expand Up @@ -1140,19 +1142,46 @@ def _mark_used(block_id: str, m: BlockMapping):
and (roundtrip_sidecar is not None or has_any_change)
and (list_sidecar is None or mapping_via_v3_fallback or has_any_change)
)
# preserved anchor list (ac:image 포함, ac:link 미포함) without sidecar reconstruction:
# collapse_ws 후 old/new가 동일한 경우(마커 공백만 변경) text-level 패치가 no-op이 되므로
# mapping의 XHTML에서 anchor entries를 추출하여 synthetic reconstruction으로 재생성
should_replace_preserved_anchor_list = False
synthetic_list_sidecar: Optional[SidecarBlock] = None
_marker_ws_only = has_content_change and _old_plain == _new_plain
if (mapping is not None
and _marker_ws_only
and _contains_preserved_anchor_markup(mapping.xhtml_text)
and not _contains_preserved_link_markup(mapping.xhtml_text)
and not sidecar_block_requires_reconstruction(list_sidecar)):
anchor_items = build_list_all_anchor_entries(mapping.xhtml_text)
if anchor_items:
synthetic_list_sidecar = SidecarBlock(
block_index=0,
xhtml_xpath=mapping.xhtml_xpath,
xhtml_fragment=mapping.xhtml_text,
reconstruction={
'kind': 'list',
'old_plain_text': extract_plain_text(mapping.xhtml_text),
'ordered': mapping.xhtml_xpath.startswith('ol['),
'items': anchor_items,
},
)
should_replace_preserved_anchor_list = True
if (mapping is not None
and (
# anchor case: sidecar anchor metadata가 있으면 ac: 포함 여부 무관
sidecar_block_requires_reconstruction(list_sidecar)
# clean case: preserved anchor 없는 clean list
or should_replace_clean_list
# preserved anchor list (ac:image only): synthetic sidecar로 재생성
or should_replace_preserved_anchor_list
)):
_mark_used(mapping.block_id, mapping)
patches.append(
_build_replace_fragment_patch(
mapping,
change.new_block,
sidecar_block=list_sidecar,
sidecar_block=synthetic_list_sidecar if should_replace_preserved_anchor_list else list_sidecar,
mapping_lost_info=mapping_lost_info,
)
)
Expand Down
56 changes: 53 additions & 3 deletions confluence-mdx/bin/reverse_sync/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,41 @@ def _walk_list(list_el, path: list, entries: list) -> None:
_walk_list(child, current_path, entries)


def _build_list_anchor_entries(fragment: str) -> list:
def _walk_list_all_anchors(list_el, path: list, entries: list) -> None:
"""list 요소를 재귀 순회하며 li 직접 자식 ac:image도 포함하여 anchor entry를 수집한다.

_walk_list는 p 내부 ac:image만 수집하지만, 이 함수는 li의 직접 자식
ac:image도 수집하여 synthetic reconstruction에 사용한다.
"""
from bs4 import NavigableString, Tag
items = [c for c in list_el.children if isinstance(c, Tag) and c.name == 'li']
for idx, li in enumerate(items):
current_path = path + [idx]
li_text_offset = 0
for child in li.children:
if isinstance(child, NavigableString):
li_text_offset += len(str(child).strip())
continue
if not isinstance(child, Tag):
continue
if child.name == 'p':
for a in _extract_anchors_from_p(child):
entries.append({**a, 'path': current_path})
li_text_offset += len(extract_plain_text(str(child)))
elif child.name == 'ac:image':
entries.append({
'kind': 'image',
'offset': li_text_offset,
'raw_xhtml': str(child),
'path': current_path,
})
elif child.name in ('ul', 'ol'):
_walk_list_all_anchors(child, current_path, entries)
else:
li_text_offset += len(extract_plain_text(str(child)))


def build_list_anchor_entries(fragment: str) -> list:
"""list fragment 내 li > p > ac:image를 path 기반 anchor entry로 추출한다.

각 entry:
Expand All @@ -390,6 +424,22 @@ def _build_list_anchor_entries(fragment: str) -> list:
return entries


def build_list_all_anchor_entries(fragment: str) -> list:
"""list fragment 내 모든 ac:image를 anchor entry로 추출한다.

build_list_anchor_entries와 달리 li 직접 자식 ac:image도 포함한다.
synthetic reconstruction 생성 시 사용한다.
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(fragment, 'html.parser')
root = soup.find(['ul', 'ol'])
if root is None:
return []
entries = []
_walk_list_all_anchors(root, [], entries)
return entries


def _build_reconstruction_metadata(
fragment: str,
mapping: BlockMapping | None,
Expand All @@ -406,7 +456,7 @@ def _build_reconstruction_metadata(
metadata["anchors"] = _build_anchor_entries(fragment)
elif mapping.type == "list":
metadata["ordered"] = mapping.xhtml_xpath.startswith("ol[")
metadata["items"] = _build_list_anchor_entries(fragment)
metadata["items"] = build_list_anchor_entries(fragment)
elif mapping.children:
children_meta = []
for child_id in mapping.children:
Expand All @@ -430,7 +480,7 @@ def _build_reconstruction_metadata(
if anchors:
child_data["anchors"] = anchors
elif child_m.type == "list":
items = _build_list_anchor_entries(child_m.xhtml_text)
items = build_list_anchor_entries(child_m.xhtml_text)
if items:
child_data["items"] = items
children_meta.append(child_data)
Expand Down
99 changes: 99 additions & 0 deletions confluence-mdx/tests/test_reverse_sync_patch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2967,6 +2967,105 @@ def test_image_anchor_list_keeps_collapsed_text_diff(self):
)
assert patches[0]['new_plain_text'] == '목록 좌측 상단에서 Delete 버튼을 클릭합니다.'

def test_image_anchor_list_marker_ws_only_produces_replace_fragment(self):
"""마커 뒤 공백만 변경된 이미지 preserved anchor 리스트도 replace_fragment 패치를 생성한다."""
xhtml = (
'<ol><li><p>목록 좌측 상단에서 Delete 버튼을 클릭합니다.</p>'
'<ac:image ac:align="center">'
'<ri:attachment ri:filename="delete.png"></ri:attachment>'
'</ac:image><p> </p></li></ol>'
)
old_content = (
'4. 목록 좌측 상단에서 Delete 버튼을 클릭합니다.<br/>\n'
' <figure data-layout="center" data-align="center">\n'
' <img src="/delete.png" alt="img" width="736" />\n'
' </figure>\n'
)
new_content = (
'4. 목록 좌측 상단에서 Delete 버튼을 클릭합니다.<br/>\n'
' <figure data-layout="center" data-align="center">\n'
' <img src="/delete.png" alt="img" width="736" />\n'
' </figure>\n'
)
change = _make_change(0, old_content, new_content, type_='list')
mapping = BlockMapping(
block_id='list-image-ws-1',
type='list',
xhtml_xpath='ol[1]',
xhtml_text=xhtml,
xhtml_plain_text='목록 좌측 상단에서 Delete 버튼을 클릭합니다.',
xhtml_element_index=0,
children=[],
)
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(0, 'ol[1]', xhtml, sha256_text(old_content), (1, 4))
])

patches, _, skipped = build_patches(
[change], [change.old_block], [change.new_block],
mappings=[mapping],
roundtrip_sidecar=roundtrip_sidecar,
)

assert len(patches) == 1, (
f"마커 공백만 변경된 이미지 preserved anchor 리스트도 패치를 생성해야 합니다. "
f"skipped={skipped}"
)
assert patches[0].get('action') == 'replace_fragment', (
f"preserved anchor 리스트는 replace_fragment 패치여야 합니다: {patches[0]}"
)
new_xhtml = patches[0]['new_element_xhtml']
assert '<ac:image' in new_xhtml, (
f"replace_fragment에 ac:image가 보존되어야 합니다: {new_xhtml}"
)
assert 'Delete 버튼을 클릭합니다.' in new_xhtml

def test_image_anchor_list_marker_ws_patch_xhtml_e2e(self):
"""마커 공백 변경 → build_patches → patch_xhtml 전 과정 E2E 검증."""
xhtml = (
'<ol><li><p>좌측 상단에서 Delete 버튼을 클릭합니다.</p>'
'<ac:image ac:align="center">'
'<ri:attachment ri:filename="del.png"></ri:attachment>'
'</ac:image><p> </p></li></ol>'
)
old_content = (
'4. 좌측 상단에서 Delete 버튼을 클릭합니다.<br/>\n'
' <figure data-layout="center" data-align="center">\n'
' <img src="/del.png" alt="img" width="736" />\n'
' </figure>\n'
)
new_content = (
'4. 좌측 상단에서 Delete 버튼을 클릭합니다.<br/>\n'
' <figure data-layout="center" data-align="center">\n'
' <img src="/del.png" alt="img" width="736" />\n'
' </figure>\n'
)
change = _make_change(0, old_content, new_content, type_='list')
mapping = BlockMapping(
block_id='list-img-e2e',
type='list',
xhtml_xpath='ol[1]',
xhtml_text=xhtml,
xhtml_plain_text='좌측 상단에서 Delete 버튼을 클릭합니다.',
xhtml_element_index=0,
children=[],
)
roundtrip_sidecar = _make_roundtrip_sidecar([
SidecarBlock(0, 'ol[1]', xhtml, sha256_text(old_content), (1, 4))
])

patches, _, skipped = build_patches(
[change], [change.old_block], [change.new_block],
mappings=[mapping],
roundtrip_sidecar=roundtrip_sidecar,
)

assert len(patches) == 1, f"패치가 생성되어야 합니다. skipped={skipped}"
# patch_xhtml 적용하여 XHTML이 실제로 갱신되는지 확인
patched = patch_xhtml(xhtml, patches)
assert '<ac:image' in patched, f"ac:image가 보존되어야 합니다: {patched}"
assert 'Delete 버튼을 클릭합니다.' in patched, f"텍스트가 유지되어야 합니다: {patched}"


# ── _normalize_list_for_content_compare 마커 공백 보존 테스트 ──

Expand Down
Loading