From 300c731dd1492ac207899812a88784a5bc8a961f Mon Sep 17 00:00:00 2001 From: Samuel Collins Date: Thu, 19 Feb 2026 15:56:45 +0000 Subject: [PATCH] Fix doc attribute parsing to properly handle block comments The existing parsing logic is flawed, as it concatenates consecutive lines, e.g. this: /** This is the first paragraph. */ ...becomes this: /** * # Heading This is the first paragraph. */ Lists are also effected. --- src/bindgen/utilities.rs | 48 +++++++++++++--- .../documentation_block.c.sym | 5 ++ tests/expectations/documentation_attr.c | 11 ++-- .../expectations/documentation_attr.compat.c | 11 ++-- tests/expectations/documentation_attr.cpp | 11 ++-- tests/expectations/documentation_attr.pyx | 11 ++-- tests/expectations/documentation_block.c | 48 ++++++++++++++++ .../expectations/documentation_block.compat.c | 56 +++++++++++++++++++ tests/expectations/documentation_block.cpp | 47 ++++++++++++++++ tests/expectations/documentation_block.pyx | 45 +++++++++++++++ tests/rust/documentation_attr.rs | 22 ++++---- tests/rust/documentation_block.rs | 48 ++++++++++++++++ 12 files changed, 323 insertions(+), 40 deletions(-) create mode 100644 tests/expectations-symbols/documentation_block.c.sym create mode 100644 tests/expectations/documentation_block.c create mode 100644 tests/expectations/documentation_block.compat.c create mode 100644 tests/expectations/documentation_block.cpp create mode 100644 tests/expectations/documentation_block.pyx create mode 100644 tests/rust/documentation_block.rs diff --git a/src/bindgen/utilities.rs b/src/bindgen/utilities.rs index 8f0763b8..5b63cd7f 100644 --- a/src/bindgen/utilities.rs +++ b/src/bindgen/utilities.rs @@ -425,13 +425,43 @@ impl SynAttributeHelpers for [syn::Attribute] { } fn split_doc_attr(input: &str) -> Vec { - input - // Convert two newline (indicate "new paragraph") into two line break. - .replace("\n\n", " \n \n") - // Convert newline after two spaces (indicate "line break") into line break. - .split(" \n") - // Convert single newline (indicate hard-wrapped) into space. - .map(|s| s.replace('\n', " ")) - .map(|s| s.trim_end().to_string()) - .collect() + if !input.contains('\n') { + // This is a special case for single-line doc comments, which normally already contain a leading space + // if it is desired. + return vec![input.to_owned()]; + } + + // Calculate the common leading whitespace across all non-empty lines, so we can trim it from all lines while + // preserving relative indentation. This is important for items nested (esp. in modules) where the doc comment + // is usually indented to the same level as the item, leaving whitespace at the beginning of each line. + // We want to trim that, but preserve relative indentation. + // Note: we assume you aren't using mixed tabs and spaces, but that is probably safe to assume for rust code + // which is usually indented with spaces. + let common_indent = input + .lines() + .filter(|line| !line.trim().is_empty()) + .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) + .min() + .unwrap_or(0); + + let mut lines: Vec = input + .lines() + // Trim leading empty/whitespace lines + .skip_while(|line| line.trim().is_empty()) + // Add a leading space to non-empty lines to prevent misinterpreting leading symbols and + // mirror the behaviour of single-line doc comments, which already have a leading space. + .map(|s| { + if s.trim().is_empty() { + String::new() + } else { + format!(" {}", s.chars().skip(common_indent).collect::()) + } + }) + .collect(); + // Remove trailing empty/whitespace lines + while lines.last().is_some_and(|line| line.trim().is_empty()) { + lines.pop(); + } + + lines } diff --git a/tests/expectations-symbols/documentation_block.c.sym b/tests/expectations-symbols/documentation_block.c.sym new file mode 100644 index 00000000..9c89b979 --- /dev/null +++ b/tests/expectations-symbols/documentation_block.c.sym @@ -0,0 +1,5 @@ +{ +root; +block_function; +FOO; +}; \ No newline at end of file diff --git a/tests/expectations/documentation_attr.c b/tests/expectations/documentation_attr.c index d9f6eff1..e1dafbe2 100644 --- a/tests/expectations/documentation_attr.c +++ b/tests/expectations/documentation_attr.c @@ -8,13 +8,14 @@ *like this one with a new line character at its end *and this one as well. So they are in the same paragraph * - *Line ends with one new line should not break + *We treat empty doc comments as empty lines, so they break to the next paragraph. * - *Line ends with two spaces and a new line - *should break to next line + * Newlines are preserved with leading spaces added + * to prettify and avoid misinterpreting leading symbols. + *like headings and lists. * - *Line ends with two new lines + * Line ends with two new lines * - *Should break to next paragraph + * Should break to next paragraph */ void root(void); diff --git a/tests/expectations/documentation_attr.compat.c b/tests/expectations/documentation_attr.compat.c index 7c31c661..f81700c3 100644 --- a/tests/expectations/documentation_attr.compat.c +++ b/tests/expectations/documentation_attr.compat.c @@ -12,14 +12,15 @@ extern "C" { *like this one with a new line character at its end *and this one as well. So they are in the same paragraph * - *Line ends with one new line should not break + *We treat empty doc comments as empty lines, so they break to the next paragraph. * - *Line ends with two spaces and a new line - *should break to next line + * Newlines are preserved with leading spaces added + * to prettify and avoid misinterpreting leading symbols. + *like headings and lists. * - *Line ends with two new lines + * Line ends with two new lines * - *Should break to next paragraph + * Should break to next paragraph */ void root(void); diff --git a/tests/expectations/documentation_attr.cpp b/tests/expectations/documentation_attr.cpp index 5daa98c6..6c48b6b6 100644 --- a/tests/expectations/documentation_attr.cpp +++ b/tests/expectations/documentation_attr.cpp @@ -10,14 +10,15 @@ extern "C" { ///like this one with a new line character at its end ///and this one as well. So they are in the same paragraph /// -///Line ends with one new line should not break +///We treat empty doc comments as empty lines, so they break to the next paragraph. /// -///Line ends with two spaces and a new line -///should break to next line +/// Newlines are preserved with leading spaces added +/// to prettify and avoid misinterpreting leading symbols. +///like headings and lists. /// -///Line ends with two new lines +/// Line ends with two new lines /// -///Should break to next paragraph +/// Should break to next paragraph void root(); } // extern "C" diff --git a/tests/expectations/documentation_attr.pyx b/tests/expectations/documentation_attr.pyx index 88070eb4..4db84316 100644 --- a/tests/expectations/documentation_attr.pyx +++ b/tests/expectations/documentation_attr.pyx @@ -10,12 +10,13 @@ cdef extern from *: #like this one with a new line character at its end #and this one as well. So they are in the same paragraph # - #Line ends with one new line should not break + #We treat empty doc comments as empty lines, so they break to the next paragraph. # - #Line ends with two spaces and a new line - #should break to next line + # Newlines are preserved with leading spaces added + # to prettify and avoid misinterpreting leading symbols. + #like headings and lists. # - #Line ends with two new lines + # Line ends with two new lines # - #Should break to next paragraph + # Should break to next paragraph void root(); diff --git a/tests/expectations/documentation_block.c b/tests/expectations/documentation_block.c new file mode 100644 index 00000000..07b5896f --- /dev/null +++ b/tests/expectations/documentation_block.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +/** + * Some docs. + */ +extern const uint32_t FOO; + +/** + * The root of all evil. + * + * But at least it contains some more documentation as someone would expect + * from a simple test case like this. + * + * # Hint + * Always ensure that everything is properly documented, even if you feel lazy. + * **Sometimes** it is also helpful to include some markdown formatting. + * + * //////////////////////////////////////////////////////////////////////////// + * + * Attention: + * + * This is an indentation test. + * The indentation should be preserved in the generated documentation. + * + * ...and here is my shopping list to check that we do not mess with line breaks and indentation: + * - Bread + * - Brown + * - White + * - Milk + * - Eggs + */ +void root(void); + +/** + * In this block, we're testing indentation handling. + * Since all of these lines are equally indented, we want to discard the common leading whitespace, + * but preserve the relative indentation and line breaks. + * + * Including between paragraphs, + * + * - And + * - within + * - Lists + */ +void block_function(void); diff --git a/tests/expectations/documentation_block.compat.c b/tests/expectations/documentation_block.compat.c new file mode 100644 index 00000000..f08c5661 --- /dev/null +++ b/tests/expectations/documentation_block.compat.c @@ -0,0 +1,56 @@ +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * Some docs. + */ +extern const uint32_t FOO; + +/** + * The root of all evil. + * + * But at least it contains some more documentation as someone would expect + * from a simple test case like this. + * + * # Hint + * Always ensure that everything is properly documented, even if you feel lazy. + * **Sometimes** it is also helpful to include some markdown formatting. + * + * //////////////////////////////////////////////////////////////////////////// + * + * Attention: + * + * This is an indentation test. + * The indentation should be preserved in the generated documentation. + * + * ...and here is my shopping list to check that we do not mess with line breaks and indentation: + * - Bread + * - Brown + * - White + * - Milk + * - Eggs + */ +void root(void); + +/** + * In this block, we're testing indentation handling. + * Since all of these lines are equally indented, we want to discard the common leading whitespace, + * but preserve the relative indentation and line breaks. + * + * Including between paragraphs, + * + * - And + * - within + * - Lists + */ +void block_function(void); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus diff --git a/tests/expectations/documentation_block.cpp b/tests/expectations/documentation_block.cpp new file mode 100644 index 00000000..aff9cc12 --- /dev/null +++ b/tests/expectations/documentation_block.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +extern "C" { + +/// Some docs. +extern const uint32_t FOO; + +/// The root of all evil. +/// +/// But at least it contains some more documentation as someone would expect +/// from a simple test case like this. +/// +/// # Hint +/// Always ensure that everything is properly documented, even if you feel lazy. +/// **Sometimes** it is also helpful to include some markdown formatting. +/// +/// //////////////////////////////////////////////////////////////////////////// +/// +/// Attention: +/// +/// This is an indentation test. +/// The indentation should be preserved in the generated documentation. +/// +/// ...and here is my shopping list to check that we do not mess with line breaks and indentation: +/// - Bread +/// - Brown +/// - White +/// - Milk +/// - Eggs +void root(); + +/// In this block, we're testing indentation handling. +/// Since all of these lines are equally indented, we want to discard the common leading whitespace, +/// but preserve the relative indentation and line breaks. +/// +/// Including between paragraphs, +/// +/// - And +/// - within +/// - Lists +void block_function(); + +} // extern "C" diff --git a/tests/expectations/documentation_block.pyx b/tests/expectations/documentation_block.pyx new file mode 100644 index 00000000..d571bcee --- /dev/null +++ b/tests/expectations/documentation_block.pyx @@ -0,0 +1,45 @@ +from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, intptr_t +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t +cdef extern from *: + ctypedef bint bool + ctypedef struct va_list + +cdef extern from *: + + # Some docs. + extern const uint32_t FOO; + + # The root of all evil. + # + # But at least it contains some more documentation as someone would expect + # from a simple test case like this. + # + # # Hint + # Always ensure that everything is properly documented, even if you feel lazy. + # **Sometimes** it is also helpful to include some markdown formatting. + # + # //////////////////////////////////////////////////////////////////////////// + # + # Attention: + # + # This is an indentation test. + # The indentation should be preserved in the generated documentation. + # + # ...and here is my shopping list to check that we do not mess with line breaks and indentation: + # - Bread + # - Brown + # - White + # - Milk + # - Eggs + void root(); + + # In this block, we're testing indentation handling. + # Since all of these lines are equally indented, we want to discard the common leading whitespace, + # but preserve the relative indentation and line breaks. + # + # Including between paragraphs, + # + # - And + # - within + # - Lists + void block_function(); diff --git a/tests/rust/documentation_attr.rs b/tests/rust/documentation_attr.rs index d88ce806..955bbef8 100644 --- a/tests/rust/documentation_attr.rs +++ b/tests/rust/documentation_attr.rs @@ -1,12 +1,12 @@ -#[doc="With doc attr, each attr contribute to one line of document"] -#[doc="like this one with a new line character at its end"] -#[doc="and this one as well. So they are in the same paragraph"] -#[doc=""] -#[doc="Line ends with one new line\nshould not break"] -#[doc=""] -#[doc="Line ends with two spaces and a new line \nshould break to next line"] -#[doc=""] -#[doc="Line ends with two new lines\n\nShould break to next paragraph"] +#[doc = "With doc attr, each attr contribute to one line of document"] +#[doc = "like this one with a new line character at its end"] +#[doc = "and this one as well. So they are in the same paragraph"] +#[doc = ""] +#[doc = "We treat empty doc comments as empty lines, so they break to the next paragraph."] +#[doc = ""] +#[doc = "Newlines are preserved with leading spaces added\nto prettify and avoid misinterpreting leading symbols."] +#[doc = "like headings and lists."] +#[doc = ""] +#[doc = "Line ends with two new lines\n\nShould break to next paragraph"] #[no_mangle] -pub extern "C" fn root() { -} +pub extern "C" fn root() {} diff --git a/tests/rust/documentation_block.rs b/tests/rust/documentation_block.rs new file mode 100644 index 00000000..0422c451 --- /dev/null +++ b/tests/rust/documentation_block.rs @@ -0,0 +1,48 @@ +/** +The root of all evil. + +But at least it contains some more documentation as someone would expect +from a simple test case like this. + +# Hint +Always ensure that everything is properly documented, even if you feel lazy. +**Sometimes** it is also helpful to include some markdown formatting. + +//////////////////////////////////////////////////////////////////////////// + +Attention: + + This is an indentation test. + The indentation should be preserved in the generated documentation. + +...and here is my shopping list to check that we do not mess with line breaks and indentation: +- Bread + - Brown + - White +- Milk +- Eggs +*/ +#[no_mangle] +pub extern "C" fn root() {} + +/** +Some docs. +*/ +#[no_mangle] +pub static FOO: u32 = 4; + +mod abc { + /** + In this block, we're testing indentation handling. + Since all of these lines are equally indented, we want to discard the common leading whitespace, + but preserve the relative indentation and line breaks. + + Including between paragraphs, + + - And + - within + - Lists + */ + #[no_mangle] + pub extern "C" fn block_function() {} +}