diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml new file mode 100644 index 0000000..41c7552 --- /dev/null +++ b/.github/workflows/pull_request.yaml @@ -0,0 +1,48 @@ +name: pull_request + +on: + pull_request: + branches: + - main + +permissions: + contents: read + +jobs: + quality: + name: Quality Checks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Install dependencies + run: uv sync --all-extras + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "lts/*" + + - name: Prettier format check + run: npx prettier --check . + + - name: Lint and format check + run: | + uvx ruff check . + uvx ruff format --check . + + - name: Type check + run: uv run mypy testdown + + - name: Run tests with coverage + run: uv run pytest diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..26af393 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,37 @@ +name: release + +on: + push: + tags: + - "v[0-9]+.[0-9]+.[0-9]+" + +jobs: + release: + name: Build and Publish + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Set version from tag + run: | + TAG="${GITHUB_REF_NAME}" + VERSION="${TAG#v}" + uv version "$VERSION" + + - name: Build wheel and sdist + run: uv build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index b7faf40..9774f03 100644 --- a/.gitignore +++ b/.gitignore @@ -98,7 +98,7 @@ ipython_config.py # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. -#uv.lock +uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. @@ -182,9 +182,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder # .vscode/ @@ -196,8 +196,8 @@ cython_debug/ # Cursor # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to -# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data -# refer to https://docs.cursor.com/context/ignore-files +# exclude from AI features like autocomplete and code analysis. Recommended for +# sensitive data refer to https://docs.cursor.com/context/ignore-files .cursorignore .cursorindexingignore @@ -205,3 +205,5 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ +.claude/ +*.local.* diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..7ad4120 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,3 @@ +.venv/ +uv.lock +.claude/ diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..6b95aec --- /dev/null +++ b/.prettierrc @@ -0,0 +1,12 @@ +{ + "proseWrap": "always", + "overrides": [ + { + "files": ["*.yaml", "*.yml"], + "options": { + "printWidth": 88, + "tabWidth": 2 + } + } + ] +} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..f1d3915 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,6 @@ +- Format code with `uvx ruff format` +- Always validate changes using `uvx ruff check` +- Test using `uv run pytest ...` +- Project must always have 100% code coverage results. +- Format non-python files with `npx prettier --write ...` +- Lint non-python (md, yaml) files with `npx prettier --check` diff --git a/README.md b/README.md index f7925b9..5470d80 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,251 @@ # testdown -Markdown driven testing in Python + +Markdown-driven testing in Python. Write test scenarios as readable `.md` files +with named fenced code blocks, then extract and exercise those blocks in your +pytest suite. + +## Overview + +`testdown` lets you embed structured test data directly in Markdown files. Each +fenced code block carries a **language** tag and a **name**, and +`extract_blocks()` returns a collection you can query, iterate, and convert in +your test code. + +This approach keeps test inputs, expected outputs, and documentation together in +a single human-readable file — making scenario-based test suites easy to read, +review, and extend. + +## Installation + +```bash +pip install testdown +``` + +For DataFrame conversion support (pandas, polars, dftxt): + +```bash +pip install "testdown[data]" +``` + +## Block Naming Convention + +Named fenced code blocks use the format ` ` on the opening fence +line: + + +``` +```python setup +x = 1 + 1 +``` + +```json expected_result +{ "status": "ok" } +``` + +```csv sample_data +name,score +Alice,95 +Bob,87 +``` + +```` + +Names can be any whitespace-free string. A common convention for scenario files +is `expected__` so blocks can be discovered with +`find_all("expected_*_*")` — mirroring how parametrized test suites verify +multiple output categories per scenario. + +## Quick Start + +Given a Markdown scenario file `tests/scenarios/my_feature.md` with the named +blocks above, extract and use them in a test: + +```python +import testdown + +blocks = testdown.extract_blocks("tests/scenarios/my_feature.md") + +# Run Python setup code and access its module namespace +setup = blocks["setup"].exec_python_code() +assert setup.threshold == 0.5 + +# Convert a JSON block to a dict +result = blocks["expected_result"].to_dict() +assert result["status"] == "ok" + +# Check which blocks are present +assert "sample_data" in blocks + +# Find all blocks matching a wildcard pattern +expected_blocks = blocks.find_all("expected_*") +```` + +## API Reference + +### `extract_blocks(markdown_contents)` + +Parses a Markdown string or file path and returns a `MarkdownBlocks` collection. + +```python +import pathlib +import testdown + +# From a file path +blocks = testdown.extract_blocks(pathlib.Path("scenarios/my_test.md")) + +# From an inline string +blocks = testdown.extract_blocks(markdown_string) +``` + +### `MarkdownBlocks` + +A dict-like collection of extracted blocks. + +| Method / Operation | Description | +| ------------------------------ | ----------------------------- | +| `blocks["name"]` | Get a block by name | +| `"name" in blocks` | Check if a block exists | +| `del blocks["name"]` | Remove a block | +| `len(blocks)` | Number of blocks | +| `iter(blocks)` | Iterate over block names | +| `blocks.keys()` | All block names | +| `blocks.values()` | All `MarkdownBlock` instances | +| `blocks.items()` | Name/block pairs | +| `blocks.get("name", default)` | Get with optional default | +| `blocks.find_all("pattern_*")` | Wildcard search (fnmatch) | + +### `MarkdownBlock` + +Represents a single extracted code block with attributes `name`, `language`, +`index`, and `contents`. + +| Method | Description | +| ------------------------------------ | ---------------------------------------------- | +| `block.to_dict()` | Parse `json`, `yaml`, or `yml` block to `dict` | +| `block.to_dict(safe_load=False)` | Parse YAML with `yaml.full_load` | +| `block.exec_python_code(**kwargs)` | Execute `python` block, returns a module | +| `block.to_pandas_frame(csv_options)` | Convert `csv` or `df` block to `pd.DataFrame` | +| `block.to_frame(csv_options)` | Convert `csv` or `df` block to `pl.DataFrame` | +| `block.to_polars_frame(csv_options)` | Alias for `to_frame()` | + +`to_pandas_frame` and `to_frame`/`to_polars_frame` require the `data` extras. +`df` blocks use the [dftxt](https://github.com/rocketboosters/dftxt) +column-typed text format. + +## Usage Patterns + +### Parametrized scenario tests + +The most common pattern — mirror what's shown in `example/`: + +```python +import pathlib +import pytest +import testdown + +_SCENARIOS_DIR = pathlib.Path(__file__).parent / "scenarios" +_SCENARIOS = [f.name for f in _SCENARIOS_DIR.glob("*.md")] + + +@pytest.mark.parametrize("scenario_name", _SCENARIOS) +def test_my_feature(scenario_name): + blocks = testdown.extract_blocks(_SCENARIOS_DIR / scenario_name) + + # Run setup code defined in the scenario + setup = blocks["setup"].exec_python_code() + + # Verify each expected_* block + for block in blocks.find_all("expected_*"): + expected = block.to_dict() + observed = run_my_feature(setup) + assert observed == expected +``` + +### Executable setup blocks + +```python +blocks = testdown.extract_blocks("scenario.md") + +# Pass variables into the execution context +module = blocks["setup"].exec_python_code(env="staging") +config = module.configuration +``` + +### DataFrame assertions (requires `data` extras) + +```python +blocks = testdown.extract_blocks("scenario.md") + +# polars +expected_df = blocks["expected_output"].to_frame() + +# pandas +expected_df = blocks["expected_output"].to_pandas_frame() + +# Pass options to the underlying CSV reader +df = blocks["data"].to_pandas_frame(csv_options={"sep": "|"}) +df = blocks["data"].to_frame(csv_options={"separator": "|"}) +``` + +### Wildcard block discovery + +```python +blocks = testdown.extract_blocks("scenario.md") + +# Find all blocks whose names match a pattern +for block in blocks.find_all("expected_actual_*"): + category = block.name.split("_", 2)[2] + assert run_actual(category) == block.to_dict() +``` + +--- + +## Development + +### Setup + +```bash +# Install all dependencies including dev and data extras +uv sync --all-extras +``` + +### Linting and formatting + +```bash +# Check for lint errors +uvx ruff check . + +# Auto-fix lint errors where possible +uvx ruff check --fix . + +# Check formatting +uvx ruff format --check . + +# Apply formatting +uvx ruff format . + +# Check non-Python file formatting (JSON, YAML, Markdown, etc.) +npx prettier --check . + +# Apply Prettier formatting +npx prettier --write . +``` + +### Type checking + +```bash +uv run mypy testdown +``` + +### Tests and coverage + +```bash +# Run tests with coverage report (fails below 80%) +uv run pytest + +# Run a specific test file +uv run pytest tests/test_testdown.py + +# Run a specific test by name +uv run pytest -k test_extract_blocks_from_path +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a73c283 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,142 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "testdown" +version = "1.0.0" +authors = [ + {name = "Scott Ernst", email = "swernst@gmail.com"}, +] +description = "Markdown-driven testing in Python" +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.11" +dependencies = [ + "PyYAML>=6.0.3", +] + +[project.optional-dependencies] +data = [ + "pandas>=3.0.1", + "polars>=1.38.1", + "dftxt>=1.0", +] + +[dependency-groups] +dev = [ + "mypy>=1.0", + "pytest>=8.0", + "pytest-cov>=5.0", + "types-PyYAML", + "pandas-stubs", +] + +[tool.uv] +package = true + +[tool.ruff.lint.isort] +force-single-line = true + +[tool.ruff] +target-version = "py311" +line-length = 88 +exclude = ["example"] + +[tool.ruff.lint] +select = [ + "A", # flake8-builtins + "ANN", # flake8-annotations + "ARG", # flake8-unused-arguments + "B", # flake8-bugbear + "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "C90", # mccabe complexity + "D", # pydocstyle + "E", # pycodestyle errors + "ERA", # eradicate (commented-out code) + "F", # pyflakes + "FA", # flake8-future-annotations + "FBT", # flake8-boolean-trap + "FLY", # flynt + "FURB", # refurb + "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + "INP", # flake8-no-pep420 + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "N", # pep8-naming + "PERF", # perflint + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "PYI", # flake8-pyi + "Q", # flake8-quotes + "RET", # flake8-return + "RSE", # flake8-raise + "RUF", # ruff-specific rules + "S", # flake8-bandit + "SIM", # flake8-simplify + "SLF", # flake8-self + "SLOT", # flake8-slots + "T10", # flake8-debugger + "T20", # flake8-print + "TCH", # flake8-type-checking + "TID", # flake8-tidy-imports + "TRY", # tryceratops + "UP", # pyupgrade + "W", # pycodestyle warnings + "YTT", # flake8-2020 +] +ignore = [ + "D203", # one-blank-line-before-class (conflicts with D211) + "D213", # multi-line-summary-second-line (conflicts with D212) +] + +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = [ + "ANN", # type annotations not required in tests + "D", # docstrings not required in tests + "S", # security checks relaxed in tests + "SLF001", # private member access allowed in tests +] +"testdown/_types.py" = [ + "PLC0415", # inline imports are intentional for optional runtime dependencies + "TRY002", # built-in exception types are appropriate here + "TRY003", # descriptive error messages are intentional for usability +] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" + +[tool.mypy] +python_version = "3.11" +strict = true +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_any_generics = true + +[[tool.mypy.overrides]] +module = "dftxt.*" +ignore_missing_imports = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "--cov=testdown --cov-report=term-missing --cov-fail-under=100" + +[tool.coverage.run] +source = ["testdown"] +branch = true + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", +] diff --git a/testdown/__init__.py b/testdown/__init__.py new file mode 100644 index 0000000..37a48c7 --- /dev/null +++ b/testdown/__init__.py @@ -0,0 +1,11 @@ +"""Markdown driven testing in Python.""" + +from ._extracting import extract_blocks +from ._types import MarkdownBlock +from ._types import MarkdownBlocks + +__all__ = [ + "MarkdownBlock", + "MarkdownBlocks", + "extract_blocks", +] diff --git a/testdown/_extracting.py b/testdown/_extracting.py new file mode 100644 index 0000000..4aa4ca1 --- /dev/null +++ b/testdown/_extracting.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import pathlib +import re + +from testdown import _types + + +def extract_blocks( + markdown_contents: str | pathlib.Path, +) -> _types.MarkdownBlocks: + """Extract named blocks and return them as a collection.""" + source = ( + markdown_contents.read_text("utf-8") + if isinstance(markdown_contents, pathlib.Path) + else markdown_contents + ) + pattern = r"```(?P[^\s]+)\s+(?P[^\s]+)" + matched = re.search(pattern, source) + extracted = _types.MarkdownBlocks() + while matched is not None: + name: str = matched.group("name") + language: str = matched.group("lang") + contents, source = source[matched.end() :].split("```", 1) + extracted[name] = _types.MarkdownBlock( + name=name, + language=language, + contents=contents.strip(), + index=len(extracted), + ) + + matched = re.search(pattern, source) + + return extracted diff --git a/testdown/_types.py b/testdown/_types.py new file mode 100644 index 0000000..3d82aba --- /dev/null +++ b/testdown/_types.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import dataclasses +import fnmatch +import io +import json +import types +from typing import TYPE_CHECKING +from typing import Any +from typing import cast + +import yaml + +if TYPE_CHECKING: + from collections.abc import ItemsView + from collections.abc import Iterator + from collections.abc import KeysView + from collections.abc import ValuesView + + import pandas as pd + import polars as pl + +_DATA_EXTRAS_HINT = "Install the data extras: pip install 'testdown[data]'" + + +class MarkdownBlocks: + """Encapsulation of loaded Markdown blocks.""" + + def __init__(self) -> None: + """Construct an extracted markdown blocks instance.""" + self._blocks: dict[str, MarkdownBlock] = {} + + def __getitem__(self, key: str) -> MarkdownBlock: + """Get a markdown block by name.""" + return self._blocks[key] + + def __setitem__(self, key: str, value: MarkdownBlock) -> None: + """Set a markdown block by name.""" + self._blocks[key] = value + + def __delitem__(self, key: str) -> None: + """Delete a markdown block by name.""" + del self._blocks[key] + + def __contains__(self, key: object) -> bool: + """Check if a markdown block exists by name.""" + return key in self._blocks + + def __iter__(self) -> Iterator[str]: + """Iterate over markdown block names.""" + return iter(self._blocks) + + def __len__(self) -> int: + """Get the number of markdown blocks.""" + return len(self._blocks) + + def __repr__(self) -> str: + """Return a string representation of the markdown blocks.""" + return f"ExtractedMarkdownBlocks({self._blocks!r})" + + def keys(self) -> KeysView[str]: + """Return the names of all markdown blocks.""" + return self._blocks.keys() + + def values(self) -> ValuesView[MarkdownBlock]: + """Return all markdown blocks.""" + return self._blocks.values() + + def items(self) -> ItemsView[str, MarkdownBlock]: + """Return name-block pairs for all markdown blocks.""" + return self._blocks.items() + + def get( + self, key: str, default: MarkdownBlock | None = None + ) -> MarkdownBlock | None: + """Get a markdown block by name, with optional default.""" + return self._blocks.get(key, default) + + def find_all(self, pattern: str) -> tuple[MarkdownBlock, ...]: + """Find all blocks that match the specified wildcard pattern argument.""" + return tuple( + b + for b in self._blocks.values() + if fnmatch.fnmatch(b.name.lower(), pattern.lower()) + ) + + +@dataclasses.dataclass() +class MarkdownBlock: + """Data structure for an extracted Markdown blocks.""" + + name: str + index: int + language: str + contents: str + + def to_pandas_frame( + self, csv_options: dict[str, Any] | None = None + ) -> pd.DataFrame: + """Convert to a Pandas DataFrame.""" + try: + import pandas as pd + except ImportError: + raise ImportError( + f"pandas is required for this operation. {_DATA_EXTRAS_HINT}" + ) from None + + if self.language == "df": + try: + import dftxt as _dftxt + except ImportError: + raise ImportError( + f"dftxt is required for 'df' blocks. {_DATA_EXTRAS_HINT}" + ) from None + return cast("pd.DataFrame", _dftxt.reads(self.contents, kind="pandas")) + + if self.language == "csv": + return cast( + "pd.DataFrame", + pd.read_csv(io.StringIO(self.contents), **(csv_options or {})), + ) + + raise ValueError( + f"Block type {self.language!r} cannot be converted to a pandas DataFrame." + ) + + def to_frame(self, csv_options: dict[str, Any] | None = None) -> pl.DataFrame: + """Convert frame to a Polars DataFrame.""" + try: + import polars as pl + except ImportError: + raise ImportError( + f"polars is required for this operation. {_DATA_EXTRAS_HINT}" + ) from None + + if self.language == "df": + try: + import dftxt as _dftxt + except ImportError: + raise ImportError( + f"dftxt is required for 'df' blocks. {_DATA_EXTRAS_HINT}" + ) from None + return cast("pl.DataFrame", _dftxt.reads(self.contents, kind="polars")) + + if self.language == "csv": + return pl.read_csv(io.StringIO(self.contents), **(csv_options or {})) + + raise ValueError( + f"Block type {self.language!r} cannot be converted to a Polars DataFrame." + ) + + def to_polars_frame( + self, csv_options: dict[str, Any] | None = None + ) -> pl.DataFrame: + """Convert to a Polars DataFrame as an alias for the to_frame method.""" + return self.to_frame(csv_options) + + def to_dict(self, *, safe_load: bool = True) -> dict[str, Any]: + """Convert block to a Python dictionary if possible.""" + if self.language == "json": + return cast("dict[str, Any]", json.loads(self.contents)) + + if self.language in ("yaml", "yml"): + if safe_load: + return cast("dict[str, Any]", yaml.safe_load(self.contents)) + return cast("dict[str, Any]", yaml.full_load(self.contents)) + + raise ValueError(f"Block type {self.language!r} cannot be converted to a dict.") + + def exec_python_code(self, **kwargs: object) -> types.ModuleType: + """Execute the code block as a python script module. + + @return The ModuleType populated by the execution. + """ + temp_module = types.ModuleType( + f"markdown_code_block_{self.language}_{self.name}_{self.index}" + ) + if kwargs: + temp_module.__dict__.update(**kwargs) + exec(self.contents, temp_module.__dict__) # noqa: S102 + return temp_module diff --git a/testdown/py.typed b/testdown/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..dde37d7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the testdown package.""" diff --git a/tests/scenarios/extraction.md b/tests/scenarios/extraction.md new file mode 100644 index 0000000..53430be --- /dev/null +++ b/tests/scenarios/extraction.md @@ -0,0 +1,59 @@ +# Extraction Scenario + +A scenario that demonstrates named code block extraction across multiple common +block types, as used in real-world markdown-driven test suites like those in the +`example/` directory. + +## Setup + +Executable setup code block defining the test configuration: + +```python setup +environment = "test" +version = 1 +greeting = "hello" +``` + +## Configuration + +JSON-encoded configuration data for comparison assertions: + +```json config +{ "environment": "test", "version": 1 } +``` + +## Metadata + +YAML-encoded metadata block: + +```yaml metadata +name: extraction +enabled: true +tags: + - integration + - scenario +``` + +## Source Data + +Tabular CSV data for frame conversion testing: + +```csv population +name,score +Alice,95 +Bob,87 +Carol,72 +``` + +## Expected Results + +Blocks following the `expected__` naming convention mirror the +real-world pattern used in parametrized population tests: + +```json expected_config_environment +{ "environment": "test" } +``` + +```json expected_config_version +{ "version": 1 } +``` diff --git a/tests/test_testdown.py b/tests/test_testdown.py new file mode 100644 index 0000000..494312e --- /dev/null +++ b/tests/test_testdown.py @@ -0,0 +1,334 @@ +"""Tests for the testdown package.""" + +import pathlib +import sys +import types +from unittest import mock + +import pytest + +import testdown + +_SCENARIOS_DIR = pathlib.Path(__file__).parent / "scenarios" + +_SCENARIOS = [ + f.name for f in _SCENARIOS_DIR.iterdir() if f.is_file() and f.name.endswith(".md") +] + +_CSV_CONTENTS = "name,age\nAlice,30\nBob,25" +_DF_CONTENTS = "name age\n&&str &&int\nAlice 30" + + +def _make_block(name="test", language="python", contents="pass", index=0): + return testdown.MarkdownBlock( + name=name, + index=index, + language=language, + contents=contents, + ) + + +# --------------------------------------------------------------------------- +# extract_blocks +# --------------------------------------------------------------------------- + + +def test_extract_blocks_from_string(): + md = "```python setup\nx = 1\n```" + blocks = testdown.extract_blocks(md) + assert "setup" in blocks + assert blocks["setup"].language == "python" + assert blocks["setup"].contents == "x = 1" + + +def test_extract_blocks_from_path(tmp_path): + md_file = tmp_path / "test.md" + md_file.write_text('```json config\n{"key": "value"}\n```', encoding="utf-8") + blocks = testdown.extract_blocks(md_file) + assert "config" in blocks + assert blocks["config"].language == "json" + + +def test_extract_blocks_empty(): + blocks = testdown.extract_blocks("# Just a heading\nSome text.") + assert len(blocks) == 0 + + +def test_extract_blocks_multiple(): + md = ( + "```python setup\nx = 1\n```\n\n" + '```json config\n{"a": 1}\n```\n\n' + "```yaml meta\nkey: val\n```" + ) + blocks = testdown.extract_blocks(md) + assert set(blocks.keys()) == {"setup", "config", "meta"} + assert blocks["setup"].index < blocks["config"].index < blocks["meta"].index + + +# --------------------------------------------------------------------------- +# MarkdownBlocks +# --------------------------------------------------------------------------- + + +def test_markdown_blocks_setitem_getitem(): + blocks = testdown.MarkdownBlocks() + block = _make_block("foo") + blocks["foo"] = block + assert blocks["foo"] is block + + +def test_markdown_blocks_delitem(): + blocks = testdown.MarkdownBlocks() + blocks["foo"] = _make_block("foo") + del blocks["foo"] + assert "foo" not in blocks + + +def test_markdown_blocks_contains_true_and_false(): + blocks = testdown.MarkdownBlocks() + blocks["foo"] = _make_block("foo") + assert "foo" in blocks + assert "bar" not in blocks + + +def test_markdown_blocks_iter(): + blocks = testdown.MarkdownBlocks() + blocks["a"] = _make_block("a") + blocks["b"] = _make_block("b") + assert list(blocks) == ["a", "b"] + + +def test_markdown_blocks_len(): + blocks = testdown.MarkdownBlocks() + assert len(blocks) == 0 + blocks["x"] = _make_block("x") + assert len(blocks) == 1 + + +def test_markdown_blocks_repr(): + blocks = testdown.MarkdownBlocks() + blocks["a"] = _make_block("a") + assert "ExtractedMarkdownBlocks(" in repr(blocks) + + +def test_markdown_blocks_keys(): + blocks = testdown.MarkdownBlocks() + blocks["a"] = _make_block("a") + blocks["b"] = _make_block("b") + assert set(blocks.keys()) == {"a", "b"} + + +def test_markdown_blocks_values(): + blocks = testdown.MarkdownBlocks() + block = _make_block("a") + blocks["a"] = block + assert block in blocks.values() + + +def test_markdown_blocks_items(): + blocks = testdown.MarkdownBlocks() + block = _make_block("a") + blocks["a"] = block + assert ("a", block) in blocks.items() + + +def test_markdown_blocks_get_present(): + blocks = testdown.MarkdownBlocks() + block = _make_block("a") + blocks["a"] = block + assert blocks.get("a") is block + + +def test_markdown_blocks_get_missing_returns_none(): + blocks = testdown.MarkdownBlocks() + assert blocks.get("missing") is None + + +def test_markdown_blocks_get_custom_default(): + blocks = testdown.MarkdownBlocks() + fallback = _make_block("fallback") + assert blocks.get("missing", fallback) is fallback + + +def test_markdown_blocks_find_all_matches_and_skips(): + blocks = testdown.MarkdownBlocks() + blocks["expected_actual_foo"] = _make_block("expected_actual_foo") + blocks["expected_aggregate_foo"] = _make_block("expected_aggregate_foo") + blocks["setup"] = _make_block("setup") + results = blocks.find_all("expected_*_*") + expected_names = {"expected_actual_foo", "expected_aggregate_foo"} + assert {b.name for b in results} == expected_names + + +def test_markdown_blocks_find_all_no_match(): + blocks = testdown.MarkdownBlocks() + blocks["setup"] = _make_block("setup") + assert blocks.find_all("expected_*") == () + + +# --------------------------------------------------------------------------- +# MarkdownBlock.to_dict +# --------------------------------------------------------------------------- + + +def test_to_dict_json(): + block = _make_block(language="json", contents='{"key": "value"}') + assert block.to_dict() == {"key": "value"} + + +def test_to_dict_yaml_safe_load(): + block = _make_block(language="yaml", contents="key: value\nnumber: 42") + assert block.to_dict() == {"key": "value", "number": 42} + + +def test_to_dict_yaml_full_load(): + block = _make_block(language="yaml", contents="key: value") + assert block.to_dict(safe_load=False) == {"key": "value"} + + +def test_to_dict_yml(): + block = _make_block(language="yml", contents="name: test") + assert block.to_dict() == {"name": "test"} + + +def test_to_dict_invalid_language(): + block = _make_block(language="python", contents="x = 1") + with pytest.raises(ValueError, match="cannot be converted to a dict"): + block.to_dict() + + +# --------------------------------------------------------------------------- +# MarkdownBlock.exec_python_code +# --------------------------------------------------------------------------- + + +def test_exec_python_code_basic(): + block = _make_block(language="python", contents="result = 1 + 1") + module = block.exec_python_code() + assert isinstance(module, types.ModuleType) + assert module.result == 1 + 1 + + +def test_exec_python_code_with_kwargs(): + block = _make_block(language="python", contents="doubled = x + x") + module = block.exec_python_code(x=5) + assert module.doubled == 5 + 5 + + +# --------------------------------------------------------------------------- +# MarkdownBlock.to_pandas_frame +# --------------------------------------------------------------------------- + + +def test_to_pandas_frame_csv(): + block = _make_block(language="csv", contents=_CSV_CONTENTS) + df = block.to_pandas_frame() + assert set(df.columns) == {"name", "age"} + assert set(df["name"].tolist()) == {"Alice", "Bob"} + + +def test_to_pandas_frame_csv_with_options(): + block = _make_block(language="csv", contents="name|age\nAlice|30", name="data") + df = block.to_pandas_frame(csv_options={"sep": "|"}) + assert set(df.columns) == {"name", "age"} + + +def test_to_pandas_frame_df(): + block = _make_block(language="df", contents=_DF_CONTENTS) + df = block.to_pandas_frame() + assert set(df.columns) == {"name", "age"} + + +def test_to_pandas_frame_invalid_language(): + block = _make_block(language="python", contents="x = 1") + with pytest.raises(ValueError, match="cannot be converted to a pandas DataFrame"): + block.to_pandas_frame() + + +def test_to_pandas_frame_pandas_not_installed(): + block = _make_block(language="csv", contents=_CSV_CONTENTS) + with ( + mock.patch.dict(sys.modules, {"pandas": None}), + pytest.raises(ImportError, match="pandas is required"), + ): + block.to_pandas_frame() + + +def test_to_pandas_frame_dftxt_not_installed(): + block = _make_block(language="df", contents=_DF_CONTENTS) + with ( + mock.patch.dict(sys.modules, {"dftxt": None}), + pytest.raises(ImportError, match="dftxt is required"), + ): + block.to_pandas_frame() + + +# --------------------------------------------------------------------------- +# MarkdownBlock.to_frame / to_polars_frame +# --------------------------------------------------------------------------- + + +def test_to_frame_csv(): + block = _make_block(language="csv", contents=_CSV_CONTENTS) + df = block.to_frame() + assert set(df.columns) == {"name", "age"} + assert set(df["name"].to_list()) == {"Alice", "Bob"} + + +def test_to_frame_csv_with_options(): + block = _make_block(language="csv", contents="name|age\nAlice|30", name="data") + df = block.to_frame(csv_options={"separator": "|"}) + assert set(df.columns) == {"name", "age"} + + +def test_to_frame_df(): + block = _make_block(language="df", contents=_DF_CONTENTS) + df = block.to_frame() + assert set(df.columns) == {"name", "age"} + + +def test_to_frame_invalid_language(): + block = _make_block(language="python", contents="x = 1") + with pytest.raises(ValueError, match="cannot be converted to a Polars DataFrame"): + block.to_frame() + + +def test_to_frame_polars_not_installed(): + block = _make_block(language="csv", contents=_CSV_CONTENTS) + with ( + mock.patch.dict(sys.modules, {"polars": None}), + pytest.raises(ImportError, match="polars is required"), + ): + block.to_frame() + + +def test_to_frame_dftxt_not_installed(): + block = _make_block(language="df", contents=_DF_CONTENTS) + with ( + mock.patch.dict(sys.modules, {"dftxt": None}), + pytest.raises(ImportError, match="dftxt is required"), + ): + block.to_frame() + + +def test_to_polars_frame_delegates_to_frame(): + block = _make_block(language="csv", contents="name,age\nAlice,30") + df = block.to_polars_frame() + assert "name" in df.columns + + +# --------------------------------------------------------------------------- +# Scenario-based integration tests (example-inspired pattern) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("scenario_name", _SCENARIOS) +def test_scenario_population(scenario_name): + blocks = testdown.extract_blocks(_SCENARIOS_DIR / scenario_name) + assert "setup" in blocks + setup_module = blocks["setup"].exec_python_code() + assert isinstance(setup_module, types.ModuleType) + expected_blocks = blocks.find_all("expected_*_*") + for block in expected_blocks: + result = block.to_dict() + assert isinstance(result, dict)