Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 108 additions & 72 deletions doorstop/core/publishers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import os
from abc import ABCMeta, abstractmethod
from re import compile as re_compile
from typing import Any, Dict
from re import match as re_match
from typing import Any, Dict, List

from markdown import markdown

from doorstop import common
from doorstop.common import DoorstopError
from doorstop.core.template import get_template
from doorstop.core.types import is_tree

Expand Down Expand Up @@ -211,77 +211,113 @@ def getLinkify(self):
"""Get the linkify flag."""
return self.linkify

def process_lists(self, line, next_line):
"""Process lists in the line. Intended for LaTeX and HTML publishers."""
# Don't process custom attributes.
if "CUSTOM-ATTRIB" in line:
return (False, "", line)
# Loop over both list types.
for temp_type in ["itemize", "enumerate"]:
matches = self.list["regexp"][temp_type].findall(line)
if matches:
list_type = temp_type
# Cannot have both types on the same line.
break
block = []
no_paragraph = False
if matches:
indent = len(line) - len(line.lstrip())
if not self.list["found"][list_type]:
block.append(self.list["start"][list_type])
self.list["found"][list_type] = True
self.list["depth"][list_type] = indent
elif self.list["depth"][list_type] < indent:
block.append(self.list["start"][list_type])
if self.list["depth"][list_type] == 0:
self.list["indent"][list_type] = indent
elif (
self.list["depth"][list_type] + self.list["indent"][list_type]
!= indent
):
raise DoorstopError(
"Cannot change indentation depth inside a list."
)
self.list["depth"][list_type] = indent
elif self.list["depth"][list_type] > indent:
while self.list["depth"][list_type] > indent:
block.append(self.list["end"][list_type])
self.list["depth"][list_type] = (
self.list["depth"][list_type] - self.list["indent"][list_type]
)
# Check both list types.
for list_type in ["itemize", "enumerate"]:
if self.list["found"][list_type]:
no_paragraph = True
# Replace the list identifier.
line = (
self.list["sub"][list_type].sub(
self.list["start_item"][list_type], line
)
+ self.list["end_item"][list_type]
)
# Look ahead - need empty line to end itemize!
block, line = self._check_for_list_end(
line, next_line, block, list_type
)
if len(block) > 0:
return (no_paragraph, "\n".join(block), line)
else:
return (no_paragraph, "", line)

def _check_for_list_end(self, line, next_line, block, list_type):
"""Check if the list has ended."""
if next_line == "" or next_line.startswith("<p>"):
block.append(line)
while self.list["depth"][list_type] > 0:
block.append(self.list["end"][list_type])
self.list["depth"][list_type] = (
self.list["depth"][list_type] - self.list["indent"][list_type]
def _normalize_list_indentation(self, text):
"""Normalize list indentation based on relative hierarchy.

Handles inconsistent indentation by tracking relative levels instead
of absolute indent values. Converts to 4-space standard required by
most Markdown processors.

:param text: Markdown text with potentially inconsistent list indentation
:return: Text with normalized 4-space indentation per level
"""
lines = text.split("\n")
list_items: List[Dict[str, int]] = []

# Parse list structure
for i, line in enumerate(lines):
match = re_match(r"^(\s*)([-*]|\d+\.)\s+", line)
if match:
list_items.append(
{
"index": i,
"indent": len(match.group(1)),
}
)
line = self.list["end"][list_type]
self.list["found"][list_type] = False
self.list["depth"][list_type] = 0
return (block, line)

if not list_items:
return text

# Split into separate list blocks (separated by non-list lines)
list_blocks: List[List[Dict[str, int]]] = []
current_block: List[Dict[str, int]] = []
prev_index = -2

for item in list_items:
# If line is not directly after previous, start new block
if item["index"] != prev_index + 1:
if current_block:
list_blocks.append(current_block)
current_block = [item]
else:
current_block.append(item)
prev_index = item["index"]

if current_block:
list_blocks.append(current_block)

# Process each block independently
result = list(lines)

for block in list_blocks:
# Determine hierarchy levels using stack
indent_stack: List[int] = []

for item in block:
indent = item["indent"]

# Pop stack until we find a level less than current indent
while indent_stack and indent_stack[-1] >= indent:
indent_stack.pop()

item["level"] = len(indent_stack)
indent_stack.append(indent)

# Apply normalization: level * 4 spaces
for item in block:
new_indent = item["level"] * 4
if item["indent"] != new_indent:
result[item["index"]] = (
" " * new_indent + result[item["index"]].lstrip()
)

return "\n".join(result)

def _fix_list_spacing(self, text):
"""Add blank lines around lists for proper markdown processing.

Markdown requires blank lines before and after list blocks to
properly recognize them as lists.

:param text: Markdown text
:return: Text with proper spacing around lists
"""
list_pattern = r"^(\s*)([-*]|\d+\.)\s+"
lines = text.split("\n")
result: List[str] = []

for i, line in enumerate(lines):
is_list = re_match(list_pattern, line)
prev_is_list = i > 0 and re_match(list_pattern, lines[i - 1])
next_is_list = i < len(lines) - 1 and re_match(list_pattern, lines[i + 1])
prev_is_blank = i > 0 and lines[i - 1].strip() == ""

# Add blank line before first list item
if is_list and not prev_is_list and not prev_is_blank and result:
result.append("")

result.append(line)

# Add blank line after last list item
if (
is_list
and not next_is_list
and i < len(lines) - 1
and lines[i + 1].strip()
):
result.append("")

return "\n".join(result)


def extract_prefix(document):
Expand Down
36 changes: 17 additions & 19 deletions doorstop/core/publishers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,56 +275,54 @@ def lines(self, obj, **kwargs):
:param linkify: turn links into hyperlinks

:return: iterator of lines of text

"""
linkify = kwargs.get("linkify", False)
toc = kwargs.get("toc", False)

# Determine if a full HTML document should be generated
try:
iter(obj)
document = True
except TypeError:
document = False
else:
document = True

# Check for defined document attributes.
# Check for defined document attributes
if document:
doc_attributes = get_document_attributes(
obj, is_html=True, extensions=self.EXTENSIONS
)

# Generate HTML
# Generate and process markdown
text = "\n".join(self._lines_markdown(obj, linkify=linkify, to_html=True))
# We need to handle escaped back-ticks before we pass the text to markdown.

# Normalize list indentation and add proper spacing
text = self._normalize_list_indentation(text)
text = self._fix_list_spacing(text)

# Convert to HTML
text = text.replace("\\`", "##!!TEMPINLINE!!##")
body_to_check = markdown.markdown(text, extensions=self.EXTENSIONS).splitlines()

# Process HTML lines
block = []
# Check for nested lists since they are not supported by the markdown_sane_lists plugin.
for i, line in enumerate(body_to_check):
# Replace the temporary inline code blocks with the escaped back-ticks. If there are
# multiple back-ticks in a row, we need group them in a single <code> block.
for line in body_to_check:
# Replace temporary inline code blocks with escaped back-ticks
line = re.sub(
r"(##!!TEMPINLINE!!##)+",
lambda m: "<code>" + "&#96;" * int(len(m.group()) / 18) + "</code>",
line,
)
# Check if we are at the end of the body.
if i == len(body_to_check) - 1:
next_line = ""
else:
next_line = body_to_check[i + 1]
_, processed_block, processed_line = self.process_lists(line, next_line)
if processed_block != "":
block.append(processed_block)
block.append(processed_line)
block.append(line)

body = "\n".join(block)

# Generate table of contents if requested
if toc:
toc_html = self.table_of_contents(True, obj)
else:
toc_html = ""

# Generate full document or just body
if document:
if self.template == "":
self.template = HTMLTEMPLATE
Expand Down
118 changes: 118 additions & 0 deletions doorstop/core/publishers/latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,124 @@ def create_index(self, directory, index=None, extensions=(".tex",), tree=None):
def table_of_contents(self, linkify=None, obj=None):
"""No table of contents LaTeX."""

def process_lists(self, line, next_line):
"""Process lists in the line. Intended for LaTeX publishers.

This method handles list processing specific to LaTeX output,
including nested lists with flexible indentation.

:param line: Current line to process
:param next_line: Next line (for lookahead)
:return: tuple of (no_paragraph, processed_block, line)
"""
# Don't process custom attributes
if "CUSTOM-ATTRIB" in line:
return (False, "", line)

# Loop over both list types
matches = None # IMPORTANT: Initialization!
detected_list_type = None
for temp_type in ["itemize", "enumerate"]:
temp_matches = self.list["regexp"][temp_type].findall(line)
if temp_matches:
matches = temp_matches
detected_list_type = temp_type
break

block = []
no_paragraph = False

if (
matches and detected_list_type is not None
): # matches and detected_list_type is always defined
indent = len(line) - len(line.lstrip())

# Initialize stack if not present
if "stack" not in self.list:
self.list["stack"] = {"itemize": [], "enumerate": []}

if not self.list["found"][detected_list_type]:
# Start first list
block.append(self.list["start"][detected_list_type])
self.list["found"][detected_list_type] = True
self.list["depth"][detected_list_type] = indent
self.list["stack"][detected_list_type] = [indent]

elif self.list["depth"][detected_list_type] < indent:
# Deeper nesting
block.append(self.list["start"][detected_list_type])
self.list["depth"][detected_list_type] = indent
self.list["stack"][detected_list_type].append(indent)

elif self.list["depth"][detected_list_type] > indent:
# Back to shallower level
while (
len(self.list["stack"][detected_list_type]) > 0
and self.list["stack"][detected_list_type][-1] > indent
):
block.append(self.list["end"][detected_list_type])
self.list["stack"][detected_list_type].pop()

if len(self.list["stack"][detected_list_type]) > 0:
self.list["depth"][detected_list_type] = self.list["stack"][
detected_list_type
][-1]
else:
self.list["depth"][detected_list_type] = 0

# Check both list types
for list_type in ["itemize", "enumerate"]:
if self.list["found"][list_type]:
no_paragraph = True
# Replace the list identifier
line = (
self.list["sub"][list_type].sub(
self.list["start_item"][list_type], line
)
+ self.list["end_item"][list_type]
)
# Look ahead - need empty line to end itemize
block, line = self._check_for_list_end(
line, next_line, block, list_type
)

if len(block) > 0:
return (no_paragraph, "\n".join(block), line)
else:
return (no_paragraph, "", line)

def _check_for_list_end(self, line, next_line, block, list_type):
"""Check if the list has ended.

:param line: Current line (already converted to LaTeX)
:param next_line: Next line to check
:param block: List of output lines
:param list_type: "itemize" or "enumerate"
:return: tuple of (block, line)
"""
if next_line == "" or next_line.startswith("<p>"):
block.append(line)

# Close all open levels using stack
num_levels = len(self.list["stack"][list_type])

# Close all except the last
for _ in range(num_levels - 1):
block.append(self.list["end"][list_type])

# Clear the stack
self.list["stack"][list_type] = []

# Return the last end tag as line
line = self.list["end"][list_type]

self.list["found"][list_type] = False
self.list["depth"][list_type] = 0

return (block, line)

return (block, line)

def lines(self, obj, **kwargs):
"""Yield lines for a LaTeX report.

Expand Down
Loading
Loading