diff --git a/doorstop/core/publishers/base.py b/doorstop/core/publishers/base.py index 0acfdb88..21cb0b42 100644 --- a/doorstop/core/publishers/base.py +++ b/doorstop/core/publishers/base.py @@ -5,12 +5,12 @@ import os from abc import ABCMeta, abstractmethod from re import compile as re_compile -from typing import Any, Dict +from re import match as re_match +from typing import Any, Dict, List from markdown import markdown from doorstop import common -from doorstop.common import DoorstopError from doorstop.core.template import get_template from doorstop.core.types import is_tree @@ -211,77 +211,113 @@ def getLinkify(self): """Get the linkify flag.""" return self.linkify - def process_lists(self, line, next_line): - """Process lists in the line. Intended for LaTeX and HTML publishers.""" - # Don't process custom attributes. - if "CUSTOM-ATTRIB" in line: - return (False, "", line) - # Loop over both list types. - for temp_type in ["itemize", "enumerate"]: - matches = self.list["regexp"][temp_type].findall(line) - if matches: - list_type = temp_type - # Cannot have both types on the same line. - break - block = [] - no_paragraph = False - if matches: - indent = len(line) - len(line.lstrip()) - if not self.list["found"][list_type]: - block.append(self.list["start"][list_type]) - self.list["found"][list_type] = True - self.list["depth"][list_type] = indent - elif self.list["depth"][list_type] < indent: - block.append(self.list["start"][list_type]) - if self.list["depth"][list_type] == 0: - self.list["indent"][list_type] = indent - elif ( - self.list["depth"][list_type] + self.list["indent"][list_type] - != indent - ): - raise DoorstopError( - "Cannot change indentation depth inside a list." - ) - self.list["depth"][list_type] = indent - elif self.list["depth"][list_type] > indent: - while self.list["depth"][list_type] > indent: - block.append(self.list["end"][list_type]) - self.list["depth"][list_type] = ( - self.list["depth"][list_type] - self.list["indent"][list_type] - ) - # Check both list types. - for list_type in ["itemize", "enumerate"]: - if self.list["found"][list_type]: - no_paragraph = True - # Replace the list identifier. - line = ( - self.list["sub"][list_type].sub( - self.list["start_item"][list_type], line - ) - + self.list["end_item"][list_type] - ) - # Look ahead - need empty line to end itemize! - block, line = self._check_for_list_end( - line, next_line, block, list_type - ) - if len(block) > 0: - return (no_paragraph, "\n".join(block), line) - else: - return (no_paragraph, "", line) - - def _check_for_list_end(self, line, next_line, block, list_type): - """Check if the list has ended.""" - if next_line == "" or next_line.startswith("

"): - block.append(line) - while self.list["depth"][list_type] > 0: - block.append(self.list["end"][list_type]) - self.list["depth"][list_type] = ( - self.list["depth"][list_type] - self.list["indent"][list_type] + def _normalize_list_indentation(self, text): + """Normalize list indentation based on relative hierarchy. + + Handles inconsistent indentation by tracking relative levels instead + of absolute indent values. Converts to 4-space standard required by + most Markdown processors. + + :param text: Markdown text with potentially inconsistent list indentation + :return: Text with normalized 4-space indentation per level + """ + lines = text.split("\n") + list_items: List[Dict[str, int]] = [] + + # Parse list structure + for i, line in enumerate(lines): + match = re_match(r"^(\s*)([-*]|\d+\.)\s+", line) + if match: + list_items.append( + { + "index": i, + "indent": len(match.group(1)), + } ) - line = self.list["end"][list_type] - self.list["found"][list_type] = False - self.list["depth"][list_type] = 0 - return (block, line) + + if not list_items: + return text + + # Split into separate list blocks (separated by non-list lines) + list_blocks: List[List[Dict[str, int]]] = [] + current_block: List[Dict[str, int]] = [] + prev_index = -2 + + for item in list_items: + # If line is not directly after previous, start new block + if item["index"] != prev_index + 1: + if current_block: + list_blocks.append(current_block) + current_block = [item] + else: + current_block.append(item) + prev_index = item["index"] + + if current_block: + list_blocks.append(current_block) + + # Process each block independently + result = list(lines) + + for block in list_blocks: + # Determine hierarchy levels using stack + indent_stack: List[int] = [] + + for item in block: + indent = item["indent"] + + # Pop stack until we find a level less than current indent + while indent_stack and indent_stack[-1] >= indent: + indent_stack.pop() + + item["level"] = len(indent_stack) + indent_stack.append(indent) + + # Apply normalization: level * 4 spaces + for item in block: + new_indent = item["level"] * 4 + if item["indent"] != new_indent: + result[item["index"]] = ( + " " * new_indent + result[item["index"]].lstrip() + ) + + return "\n".join(result) + + def _fix_list_spacing(self, text): + """Add blank lines around lists for proper markdown processing. + + Markdown requires blank lines before and after list blocks to + properly recognize them as lists. + + :param text: Markdown text + :return: Text with proper spacing around lists + """ + list_pattern = r"^(\s*)([-*]|\d+\.)\s+" + lines = text.split("\n") + result: List[str] = [] + + for i, line in enumerate(lines): + is_list = re_match(list_pattern, line) + prev_is_list = i > 0 and re_match(list_pattern, lines[i - 1]) + next_is_list = i < len(lines) - 1 and re_match(list_pattern, lines[i + 1]) + prev_is_blank = i > 0 and lines[i - 1].strip() == "" + + # Add blank line before first list item + if is_list and not prev_is_list and not prev_is_blank and result: + result.append("") + + result.append(line) + + # Add blank line after last list item + if ( + is_list + and not next_is_list + and i < len(lines) - 1 + and lines[i + 1].strip() + ): + result.append("") + + return "\n".join(result) def extract_prefix(document): diff --git a/doorstop/core/publishers/html.py b/doorstop/core/publishers/html.py index 1138ba34..ba6c8edb 100644 --- a/doorstop/core/publishers/html.py +++ b/doorstop/core/publishers/html.py @@ -275,7 +275,6 @@ def lines(self, obj, **kwargs): :param linkify: turn links into hyperlinks :return: iterator of lines of text - """ linkify = kwargs.get("linkify", False) toc = kwargs.get("toc", False) @@ -283,48 +282,47 @@ def lines(self, obj, **kwargs): # Determine if a full HTML document should be generated try: iter(obj) + document = True except TypeError: document = False - else: - document = True - # Check for defined document attributes. + # Check for defined document attributes if document: doc_attributes = get_document_attributes( obj, is_html=True, extensions=self.EXTENSIONS ) - # Generate HTML + # Generate and process markdown text = "\n".join(self._lines_markdown(obj, linkify=linkify, to_html=True)) - # We need to handle escaped back-ticks before we pass the text to markdown. + + # Normalize list indentation and add proper spacing + text = self._normalize_list_indentation(text) + text = self._fix_list_spacing(text) + + # Convert to HTML text = text.replace("\\`", "##!!TEMPINLINE!!##") body_to_check = markdown.markdown(text, extensions=self.EXTENSIONS).splitlines() + + # Process HTML lines block = [] - # Check for nested lists since they are not supported by the markdown_sane_lists plugin. - for i, line in enumerate(body_to_check): - # Replace the temporary inline code blocks with the escaped back-ticks. If there are - # multiple back-ticks in a row, we need group them in a single block. + for line in body_to_check: + # Replace temporary inline code blocks with escaped back-ticks line = re.sub( r"(##!!TEMPINLINE!!##)+", lambda m: "" + "`" * int(len(m.group()) / 18) + "", line, ) - # Check if we are at the end of the body. - if i == len(body_to_check) - 1: - next_line = "" - else: - next_line = body_to_check[i + 1] - _, processed_block, processed_line = self.process_lists(line, next_line) - if processed_block != "": - block.append(processed_block) - block.append(processed_line) + block.append(line) + body = "\n".join(block) + # Generate table of contents if requested if toc: toc_html = self.table_of_contents(True, obj) else: toc_html = "" + # Generate full document or just body if document: if self.template == "": self.template = HTMLTEMPLATE diff --git a/doorstop/core/publishers/latex.py b/doorstop/core/publishers/latex.py index 4c205a3f..2ea9fcc6 100644 --- a/doorstop/core/publishers/latex.py +++ b/doorstop/core/publishers/latex.py @@ -83,6 +83,124 @@ def create_index(self, directory, index=None, extensions=(".tex",), tree=None): def table_of_contents(self, linkify=None, obj=None): """No table of contents LaTeX.""" + def process_lists(self, line, next_line): + """Process lists in the line. Intended for LaTeX publishers. + + This method handles list processing specific to LaTeX output, + including nested lists with flexible indentation. + + :param line: Current line to process + :param next_line: Next line (for lookahead) + :return: tuple of (no_paragraph, processed_block, line) + """ + # Don't process custom attributes + if "CUSTOM-ATTRIB" in line: + return (False, "", line) + + # Loop over both list types + matches = None # IMPORTANT: Initialization! + detected_list_type = None + for temp_type in ["itemize", "enumerate"]: + temp_matches = self.list["regexp"][temp_type].findall(line) + if temp_matches: + matches = temp_matches + detected_list_type = temp_type + break + + block = [] + no_paragraph = False + + if ( + matches and detected_list_type is not None + ): # matches and detected_list_type is always defined + indent = len(line) - len(line.lstrip()) + + # Initialize stack if not present + if "stack" not in self.list: + self.list["stack"] = {"itemize": [], "enumerate": []} + + if not self.list["found"][detected_list_type]: + # Start first list + block.append(self.list["start"][detected_list_type]) + self.list["found"][detected_list_type] = True + self.list["depth"][detected_list_type] = indent + self.list["stack"][detected_list_type] = [indent] + + elif self.list["depth"][detected_list_type] < indent: + # Deeper nesting + block.append(self.list["start"][detected_list_type]) + self.list["depth"][detected_list_type] = indent + self.list["stack"][detected_list_type].append(indent) + + elif self.list["depth"][detected_list_type] > indent: + # Back to shallower level + while ( + len(self.list["stack"][detected_list_type]) > 0 + and self.list["stack"][detected_list_type][-1] > indent + ): + block.append(self.list["end"][detected_list_type]) + self.list["stack"][detected_list_type].pop() + + if len(self.list["stack"][detected_list_type]) > 0: + self.list["depth"][detected_list_type] = self.list["stack"][ + detected_list_type + ][-1] + else: + self.list["depth"][detected_list_type] = 0 + + # Check both list types + for list_type in ["itemize", "enumerate"]: + if self.list["found"][list_type]: + no_paragraph = True + # Replace the list identifier + line = ( + self.list["sub"][list_type].sub( + self.list["start_item"][list_type], line + ) + + self.list["end_item"][list_type] + ) + # Look ahead - need empty line to end itemize + block, line = self._check_for_list_end( + line, next_line, block, list_type + ) + + if len(block) > 0: + return (no_paragraph, "\n".join(block), line) + else: + return (no_paragraph, "", line) + + def _check_for_list_end(self, line, next_line, block, list_type): + """Check if the list has ended. + + :param line: Current line (already converted to LaTeX) + :param next_line: Next line to check + :param block: List of output lines + :param list_type: "itemize" or "enumerate" + :return: tuple of (block, line) + """ + if next_line == "" or next_line.startswith("

"): + block.append(line) + + # Close all open levels using stack + num_levels = len(self.list["stack"][list_type]) + + # Close all except the last + for _ in range(num_levels - 1): + block.append(self.list["end"][list_type]) + + # Clear the stack + self.list["stack"][list_type] = [] + + # Return the last end tag as line + line = self.list["end"][list_type] + + self.list["found"][list_type] = False + self.list["depth"][list_type] = 0 + + return (block, line) + + return (block, line) + def lines(self, obj, **kwargs): """Yield lines for a LaTeX report. diff --git a/doorstop/core/publishers/tests/test_base_list_normalization.py b/doorstop/core/publishers/tests/test_base_list_normalization.py new file mode 100644 index 00000000..2c7d5274 --- /dev/null +++ b/doorstop/core/publishers/tests/test_base_list_normalization.py @@ -0,0 +1,303 @@ +"""Unit tests for list normalization in BasePublisher.""" + +import unittest + +from doorstop.core.publishers.markdown import MarkdownPublisher + + +class TestListNormalization(unittest.TestCase): + """Tests for list indentation normalization.""" + + def setUp(self): + """Setup test fixtures.""" + # Use MarkdownPublisher instead of BasePublisher (it's concrete) + self.publisher = MarkdownPublisher(None, ".md") + + def test_normalize_2_to_4_spaces(self): + """Verify that 2-space indentation is normalized to 4 spaces.""" + text = """- Item 1 + - Nested with 2 spaces +- Item 2""" + + expected = """- Item 1 + - Nested with 2 spaces +- Item 2""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_mixed_indentation(self): + """Verify that mixed indentation (1, 2, 4, 6 spaces) is normalized.""" + text = """- Item 1 + - One space + - Two spaces + - Six spaces +- Back to root""" + + expected = """- Item 1 + - One space + - Two spaces + - Six spaces +- Back to root""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_already_correct(self): + """Verify that already-correct indentation is unchanged.""" + text = """- Item 1 + - Nested with 4 spaces + - Double nested with 8 spaces +- Item 2""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(text, result) + + def test_normalize_no_lists(self): + """Verify that text without lists is unchanged.""" + text = """Just some text +Without any lists +At all""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(text, result) + + def test_normalize_ordered_lists(self): + """Verify that ordered lists are normalized correctly.""" + text = """1. First item + 2. Nested item + 3. Double nested +1. Second item""" + + expected = """1. First item + 2. Nested item + 3. Double nested +1. Second item""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_separate_list_blocks(self): + """Verify that separate list blocks are normalized independently.""" + text = """First list: +- Item 1 + - Nested + +Some text in between + +Second list: +- Item A + - Nested A""" + + expected = """First list: +- Item 1 + - Nested + +Some text in between + +Second list: +- Item A + - Nested A""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_with_headings(self): + """Verify that lists separated by headings are handled correctly.""" + text = """# Heading 1 +- Item 1 + - Nested + +## Heading 2 +- Item 2 + - Nested""" + + expected = """# Heading 1 +- Item 1 + - Nested + +## Heading 2 +- Item 2 + - Nested""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_asterisk_lists(self): + """Verify that asterisk-style lists are normalized.""" + text = """* Item 1 + * Nested with 2 spaces +* Item 2""" + + expected = """* Item 1 + * Nested with 2 spaces +* Item 2""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + def test_normalize_triple_nested(self): + """Verify that three levels of nesting work correctly.""" + text = """- Level 1 + - Level 2 + - Level 3 +- Back to 1""" + + expected = """- Level 1 + - Level 2 + - Level 3 +- Back to 1""" + + result = self.publisher._normalize_list_indentation(text) + self.assertEqual(expected, result) + + +class TestListSpacing(unittest.TestCase): + """Tests for list spacing fixes.""" + + def setUp(self): + """Setup test fixtures.""" + self.publisher = MarkdownPublisher(None, ".md") + + def test_add_blank_line_before_list(self): + """Verify that blank line is added before list.""" + text = """Some text before +- List item +- Another item""" + + expected = """Some text before + +- List item +- Another item""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(expected, result) + + def test_add_blank_line_after_list(self): + """Verify that blank line is added after list.""" + text = """- List item +- Another item +Text after""" + + expected = """- List item +- Another item + +Text after""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(expected, result) + + def test_no_double_blank_lines(self): + """Verify that existing blank lines are not duplicated.""" + text = """Some text + +- List item +- Another item + +More text""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(text, result) + + def test_list_at_start(self): + """Verify that list at document start has no leading blank line.""" + text = """- List item +- Another item""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(text, result) + + def test_consecutive_lists(self): + """Verify that consecutive list items are not separated.""" + text = """- Item 1 +- Item 2 +- Item 3""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(text, result) + + def test_nested_lists_not_separated(self): + """Verify that nested list items are not separated from parent.""" + text = """- Item 1 + - Nested + - More nested +- Item 2""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(text, result) + + def test_both_before_and_after(self): + """Verify that blank lines are added both before and after.""" + text = """Text before +- List item +Text after""" + + expected = """Text before + +- List item + +Text after""" + + result = self.publisher._fix_list_spacing(text) + self.assertEqual(expected, result) + + +class TestCombinedNormalizationAndSpacing(unittest.TestCase): + """Tests for combined normalization and spacing.""" + + def setUp(self): + """Setup test fixtures.""" + self.publisher = MarkdownPublisher(None, ".md") + + def test_full_pipeline(self): + """Verify that normalization and spacing work together.""" + text = """Text before list: +- Item 1 + - Nested with 2 spaces +- Item 2 +Text after list""" + + # First normalize + text = self.publisher._normalize_list_indentation(text) + # Then fix spacing + result = self.publisher._fix_list_spacing(text) + + expected = """Text before list: + +- Item 1 + - Nested with 2 spaces +- Item 2 + +Text after list""" + + self.assertEqual(expected, result) + + def test_complex_document_structure(self): + """Verify complex document with multiple lists.""" + text = """# Title +Some intro text +- List 1 item 1 + - Nested +- List 1 item 2 + +Middle text + +- List 2 item 1 + - Nested +- List 2 item 2 +End text""" + + # Apply both transformations + text = self.publisher._normalize_list_indentation(text) + result = self.publisher._fix_list_spacing(text) + + # Verify normalization (2 -> 4 spaces) + self.assertIn(" - Nested", result) + # Verify spacing around first list + self.assertIn("text\n\n- List 1", result) + # Verify spacing around second list + self.assertIn("item 2\n\nEnd", result) + + +if __name__ == "__main__": + unittest.main() diff --git a/doorstop/core/publishers/tests/test_html_list_handling.py b/doorstop/core/publishers/tests/test_html_list_handling.py new file mode 100644 index 00000000..e75bbe22 --- /dev/null +++ b/doorstop/core/publishers/tests/test_html_list_handling.py @@ -0,0 +1,214 @@ +"""Unit tests for HTML list handling.""" + +import unittest + +from doorstop.core import publisher +from doorstop.core.tests import MockItemAndVCS + + +# Helper function like in other tests +def getLines(gen): + """Get lines from a generator.""" + return "\n".join(gen) + + +class TestHtmlListHandling(unittest.TestCase): + """Tests for HTML list generation with nested lists.""" + + def test_nested_list_2_spaces(self): + """Verify that 2-space nested lists render correctly in HTML.""" + # Setup + generated_data = ( + r"text: |" + "\n" + r" Test list:" + "\n" + r" " + "\n" + r" - Item 1" + "\n" + r" - Nested" + "\n" + r" - Item 2" + ) + item = MockItemAndVCS("TEST-001.yml", _file=generated_data) + + # Act + result = getLines(publisher.publish_lines(item, ".html")) + + # Assert + self.assertIn("

", result) + + def test_multiple_nesting_levels(self): + """Verify that multiple nesting levels work correctly.""" + # Setup + generated_data = ( + r"text: |" + "\n" + r" Multi-level:" + "\n" + r" " + "\n" + r" - Level 1" + "\n" + r" - Level 2" + "\n" + r" - Level 3" + "\n" + r" - Back to 1" + ) + item = MockItemAndVCS("TEST-002.yml", _file=generated_data) + + # Act + result = getLines(publisher.publish_lines(item, ".html")) + + # Assert + self.assertGreaterEqual(result.count(""), 2) + + def test_ordered_list_nesting(self): + """Verify that ordered lists with nesting work.""" + # Setup + generated_data = ( + r"text: |" + "\n" + r" Ordered:" + "\n" + r" " + "\n" + r" 1. First" + "\n" + r" 1. Nested" + "\n" + r" 2. Second" + ) + item = MockItemAndVCS("TEST-003.yml", _file=generated_data) + + # Act + result = getLines(publisher.publish_lines(item, ".html")) + + # Assert + self.assertIn("
    ", result) + self.assertIn("
", result) + + def test_list_without_blank_line(self): + """Verify that lists without leading blank line still work.""" + # Setup + generated_data = ( + r"text: |" + "\n" + r" Text before:" + "\n" + r" - Item 1" + "\n" + r" - Nested" + "\n" + r" - Item 2" + ) + item = MockItemAndVCS("TEST-004.yml", _file=generated_data) + + # Act + result = getLines(publisher.publish_lines(item, ".html")) + + # Assert + self.assertIn("