Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,16 @@


def extract_and_combine_codeblocks(text: str) -> str:
"""Extract all ```python codeblocks from text and combine them.

If fenced blocks are present they are returned joined by a blank line
with no further filtering. Otherwise the raw text is treated as code
only if it contains ``print(`` and compiles (``await`` is stripped for
the compile check only; the original text is returned).
"""
"""Extract all ```python codeblocks from text and combine them."""
code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL)

if code_blocks:
return "\n\n".join(block.strip() for block in code_blocks)

recovered = _recover_non_closing_python_fence(text)
if recovered:
return recovered

stripped_text = text.strip()

if "print(" not in stripped_text:
Expand Down Expand Up @@ -87,3 +85,28 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
return result

return async_wrapper


def _recover_non_closing_python_fence(text: str) -> str:
"""Recover code from an unclosed ```python fence (#204); compile-guarded."""
# ``\s*`` (not ``\s*\n``) tolerates same-line fences like ```python print("x").
unclosed = re.search(r"```python\s*(.*)", text, re.DOTALL)
if not unclosed:
return ""
# Strip a trailing full OR partial markdown fence (1–3 backticks).
candidate = re.sub(r"\n?`{1,3}\s*$", "", unclosed.group(1)).strip()
if not candidate:
return ""
# Walk back line-by-line so trailing prose after otherwise-valid code
# is salvageable: ``print("x")\nhope this helps`` returns ``print("x")``.
lines = candidate.split("\n")
for end in range(len(lines), 0, -1):
attempt = "\n".join(lines[:end]).rstrip()
if not attempt:
continue
try:
compile(attempt.replace("await ", ""), "<string>", "exec")
return attempt
except SyntaxError:
continue
return ""
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,48 @@ def test_markdown_without_python_tag(self):
result = extract_and_combine_codeblocks('```\nprint("test")\n```')
assert result == ''

def test_incomplete_markdown_block(self):
"""Incomplete markdown block should not match."""
def test_incomplete_markdown_block_with_valid_code_recovered(self):
"""Unclosed fence with valid code returns the code."""
result = extract_and_combine_codeblocks('```python\nprint("test")')
assert result == 'print("test")'

def test_incomplete_markdown_block_recovers_multiline_code(self):
"""Unclosed fence with multi-line valid Python preceded by prose returns the code."""
text = (
"Config is available, so the scan can proceed. "
"I'll list all Gmail messages...\n"
"```python\n"
"import json, re\n"
"from datetime import datetime, timedelta\n"
"print('starting scan')\n"
)
result = extract_and_combine_codeblocks(text)
assert result.startswith("import json, re")
assert "print('starting scan')" in result

def test_incomplete_markdown_block_with_invalid_code_returns_empty(self):
"""Unclosed fence with non-compilable text returns empty."""
result = extract_and_combine_codeblocks('```python\nthis is not python at all $$$')
assert result == ''

def test_incomplete_markdown_block_strips_partial_trailing_fence(self):
"""Unclosed fence with 1 or 2 trailing backticks still returns the code."""
result_one_tick = extract_and_combine_codeblocks('```python\nprint("test")\n`')
result_two_ticks = extract_and_combine_codeblocks('```python\nprint("test")\n``')
assert result_one_tick == 'print("test")'
assert result_two_ticks == 'print("test")'

def test_incomplete_markdown_block_recovers_valid_code_with_trailing_prose(self):
"""Unclosed fence with valid code followed by prose returns just the code."""
text = '```python\nprint("test")\n\nHope that helps!\n'
result = extract_and_combine_codeblocks(text)
assert result == 'print("test")'

def test_incomplete_same_line_python_fence_recovered(self):
"""Unclosed fence with code on the same line as ```python returns the code."""
result = extract_and_combine_codeblocks('```python print("x")')
assert result == 'print("x")'

def test_nested_code_structures(self):
"""Nested code structures should be preserved."""
code = '''def outer():
Expand Down
Loading