Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 68 additions & 2 deletions EMBEDDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,74 @@ The tool supports an extended glob syntax for matching lines:
By default, patterns imply a wildcard (`*`) at both the start and end.
Use `^` and `$` to disable this behavior and match the exact line start or end.

If you need to match a literal `^` at the start of a line, use `^^`.
Similarly, use `$$` to match a literal `$` at the end of a line.
#### Multi-line patterns

Use `\n` inside a `start`, `end`, or `line` pattern to match consecutive source lines.
Spaces around `\n` are ignored, and each pattern line uses the same glob syntax as a
regular one-line pattern.

````markdown
<embed-code
file="src/test/java/example/CalculatorTest.java"
start="Test \n adds two values"
end="assertEquals(2, value); \n }"></embed-code>
```java
```
````

This matches a source range like:

```java
@Test
@DisplayName("adds two values")
void addsTwoValues() {
int value = 1 + 1;

assertEquals(2, value);
}
```

The `start` pattern above is interpreted as two consecutive line patterns:
`Test` and `adds two values`. Because ordinary patterns imply `*` at both ends,
these match `@Test` and `@DisplayName("adds two values")`.

Use `^` and `$` on each pattern line when you need exact line matching:

````markdown
<embed-code
file="src/test/java/example/CalculatorTest.java"
start="^ @Test$ \n ^ @DisplayName(\"adds two values\")$"
end="^ assertEquals(2, value);$ \n ^ }$"></embed-code>
```java
```
````

Without `\n`, a `start`, `end`, or `line` pattern matches only one source line.

#### Escaping

Use a backslash to match glob control characters literally. For example:

- `\*` matches a literal `*`.
- `\?` matches a literal `?`.
- `\[` matches a literal `[`.

Since `^` is only special at the start of a pattern, use `^^` to match a literal
`^` there. Since `$` is only special at the end of a pattern, use `$$` to match a
literal `$` there.

To match literal `\n` text in a source line, write it as `\\n` in the pattern.

````markdown
<embed-code
file="src/test/java/example/LineSeparator.java"
line="LINE_SEPARATOR = \"\\n\""></embed-code>
```java
```
````

It s possible to write quote characters in patterns as `\"` instead of the XML entity `&quot;`.
For example, `line="println(\"Hello\")"` is equivalent to `line="println(&quot;Hello&quot;)"`.

## Comment filtering

Expand Down
59 changes: 59 additions & 0 deletions embedding/embedding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,65 @@ var _ = Describe("Embedding", func() {
Expect(processor.IsUpToDate()).Should(BeTrue())
})

It("should embed a method with escaped newline patterns", func() {
config.DocIncludes = []string{"escaped-newline-pattern.md"}
docPath := fmt.Sprintf("%s/escaped-newline-pattern.md", config.DocumentationRoot)
processor := embedding.NewProcessor(docPath, config)

Expect(processor.Embed()).Error().ShouldNot(HaveOccurred())

docContent, err := os.ReadFile(docPath)
Expect(err).ShouldNot(HaveOccurred())
Expect(string(docContent)).Should(ContainSubstring("@Test\n" +
"@DisplayName(\"adds two values\")"))
Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}"))
Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues"))
})

It("should embed a method with exact escaped newline patterns", func() {
config.DocIncludes = []string{"escaped-newline-exact-pattern.md"}
docPath := fmt.Sprintf("%s/escaped-newline-exact-pattern.md", config.DocumentationRoot)
processor := embedding.NewProcessor(docPath, config)

Expect(processor.Embed()).Error().ShouldNot(HaveOccurred())

docContent, err := os.ReadFile(docPath)
Expect(err).ShouldNot(HaveOccurred())
Expect(string(docContent)).Should(ContainSubstring("@Test\n" +
"@DisplayName(\"adds two values\")"))
Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}"))
Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues"))
})

It("should embed matching lines with an escaped newline line pattern", func() {
config.DocIncludes = []string{"escaped-newline-line-pattern.md"}
docPath := fmt.Sprintf("%s/escaped-newline-line-pattern.md", config.DocumentationRoot)
processor := embedding.NewProcessor(docPath, config)

Expect(processor.Embed()).Error().ShouldNot(HaveOccurred())

docContent, err := os.ReadFile(docPath)
Expect(err).ShouldNot(HaveOccurred())
Expect(string(docContent)).Should(ContainSubstring("@Test\n" +
"@DisplayName(\"adds two values\")"))
Expect(string(docContent)).ShouldNot(ContainSubstring("void addsTwoValues"))
Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues"))
})

It("should embed a line with an escaped newline literal pattern", func() {
config.DocIncludes = []string{"escaped-newline-literal-pattern.md"}
docPath := fmt.Sprintf("%s/escaped-newline-literal-pattern.md", config.DocumentationRoot)
processor := embedding.NewProcessor(docPath, config)

Expect(processor.Embed()).Error().ShouldNot(HaveOccurred())

docContent, err := os.ReadFile(docPath)
Expect(err).ShouldNot(HaveOccurred())
Expect(string(docContent)).Should(ContainSubstring(
"private static final String LINE_SEPARATOR = \"\\n\";",
))
})

It("should report a missing closing tag", func() {
docPath := fmt.Sprintf("%s/missing-closing-tag.md", config.DocumentationRoot)
processor := embedding.NewProcessor(docPath, config)
Expand Down
68 changes: 66 additions & 2 deletions embedding/parsing/instruction.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ func (e Instruction) String() string {
// lines — a list of strings representing the input lines.
func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]string, error) {
if e.LinePattern != nil {
if e.LinePattern.HasLineSeparator() {
startPosition, endPosition, err := e.matchLineSequence(
e.LinePattern, lines, 0, "line", codeFileReference,
)
if err != nil {
return nil, err
}
requiredLines := lines[startPosition : endPosition+1]
indentation := indent.MaxCommonIndentation(requiredLines)

return indent.CutIndent(requiredLines, indentation), nil
}
linePosition, err := e.matchGlob(
e.LinePattern, lines, 0, "line", codeFileReference,
)
Expand Down Expand Up @@ -238,19 +250,71 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]
// startFrom — an index from which to start searching.
func (e Instruction) matchGlob(pattern *Pattern, lines []string, startFrom int,
kind string, codeFileReference string) (int, error) {
if pattern.HasLineSeparator() {
start, end, err := e.matchLineSequence(
pattern, lines, startFrom, kind, codeFileReference,
)
if err != nil {
return 0, err
}
if kind == "end" {
return end, nil
}
return start, nil
}
if line, found := matchSingleLine(pattern, lines, startFrom); found {
return line, nil
}
return 0, PatternNotFoundError{
Line: e.DocumentationLine,
CodeFileReference: codeFileReference,
Kind: kind,
Pattern: pattern,
}
}

// matchSingleLine returns the first source line matching the pattern.
func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool) {
lineCount := len(lines)
resultLine := startFrom
for resultLine < lineCount {
line := lines[resultLine]
if pattern.Match(line) {
return resultLine, nil
return resultLine, true
}
resultLine++
}
return 0, PatternNotFoundError{

return 0, false
}

// matchLineSequence returns the first line range matching the pattern or a not-found error.
func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFrom int,
kind string, codeFileReference string) (int, int, error) {
start, end, found := matchLineSequence(pattern, lines, startFrom)
if found {
return start, end, nil
}

return 0, 0, PatternNotFoundError{
Line: e.DocumentationLine,
CodeFileReference: codeFileReference,
Kind: kind,
Pattern: pattern,
}
}

// matchLineSequence returns the first source-line range matching an escaped-line pattern.
func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) {
patternLines, _ := pattern.linePatterns()
lineCount := len(patternLines)
lastStart := len(lines) - lineCount
for start := startFrom; start <= lastStart; start++ {
end := start + lineCount
if pattern.MatchLineSequence(lines[start:end]) {
return start, end - 1, true
Comment on lines +308 to +315
}
}

return 0, 0, false
}
48 changes: 48 additions & 0 deletions embedding/parsing/instruction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ var _ = Describe("Instruction", func() {
Expect(parsing.FromXML(xmlString, config)).Error().ShouldNot(HaveOccurred())
})

It("should parse backslash-escaped quotes in XML attributes", func() {
xmlString := `<embed-code file="org/example/Hello.java" line="println(\"Hello world\")"/>`

attributes, err := parsing.ParseXMLLine(xmlString)

Expect(err).ShouldNot(HaveOccurred())
Expect(attributes["line"]).Should(Equal(`println("Hello world")`))
})

It("should have an error for unsupported comments mode", func() {
instructionParams := TestInstructionParams{
comments: "summary",
Expand Down Expand Up @@ -301,6 +310,45 @@ var _ = Describe("Instruction", func() {
}))
})

It("should embed a line with an escaped asterisk pattern", func() {
instructionParams := TestInstructionParams{
lineGlob: `Use \* to multiply`,
}

actualLines := getXMLExtractionContent(
"literal-patterns.txt", instructionParams, config)

Expect(actualLines).Should(Equal([]string{
"Use * to multiply",
}))
})

It("should embed a line starting with a literal caret pattern", func() {
instructionParams := TestInstructionParams{
lineGlob: "^^ starts with caret",
}

actualLines := getXMLExtractionContent(
"literal-patterns.txt", instructionParams, config)

Expect(actualLines).Should(Equal([]string{
"^ starts with caret",
}))
})

It("should embed a line ending with a literal dollar pattern", func() {
instructionParams := TestInstructionParams{
lineGlob: "The value ends with $$",
}

actualLines := getXMLExtractionContent(
"literal-patterns.txt", instructionParams, config)

Expect(actualLines).Should(Equal([]string{
"The value ends with $",
}))
})

It("should successfully parse XML by only end glob", func() {
instructionParams := TestInstructionParams{
endGlob: "package*",
Expand Down
57 changes: 57 additions & 0 deletions embedding/parsing/pattern.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ const (
anyCharacterSequence = "*"
lineStart = "^"
lineEnd = "$"
lineSeparator = `\n`
escapedLineSeparator = `\\n`
)

// NewPattern creates a new Pattern based on provided glob string.
Expand All @@ -51,6 +53,12 @@ const (
// The modified pattern is the original one, but enclosed with the "*" wildcards,
// unless start of the line or end of the line wildcards were specified.
//
// A multi-line pattern uses "\n" as a separator between consecutive source-line
// patterns. For example, "Test \n adds two values" matches a line matching "Test"
// followed by a line matching "adds two values". Each part separated by "\n" is
// converted to Pattern separately and follows the same wildcard rules.
// Use "\\n" to match literal "\n" text instead of starting the next pattern line.
//
// glob — a string that represents a pattern that can include such wildcards:
// - "*" — matches any sequence of characters;
// - "^" — matches the start of the line;
Expand Down Expand Up @@ -100,6 +108,55 @@ func (p Pattern) Match(line string) bool {
return g.Match(line)
}

// HasLineSeparator reports whether the pattern contains an escaped line separator.
func (p Pattern) HasLineSeparator() bool {
_, hasSeparator := p.linePatterns()

return hasSeparator
}

// MatchLineSequence reports whether source lines match the escaped-line-separated pattern.
func (p Pattern) MatchLineSequence(lines []string) bool {
patternLines, _ := p.linePatterns()
if len(patternLines) != len(lines) {
return false
}
for i, patternLine := range patternLines {
pattern := NewPattern(patternLine)
if !pattern.Match(lines[i]) {
return false
}
}

return true
}

// linePatterns returns trimmed pattern lines separated by an escaped newline.
func (p Pattern) linePatterns() ([]string, bool) {
var patternLines []string
var line strings.Builder
hasSeparator := false
for i := 0; i < len(p.sourceGlob); {
remaining := p.sourceGlob[i:]
switch {
case strings.HasPrefix(remaining, escapedLineSeparator):
line.WriteString(escapedLineSeparator)
i += len(escapedLineSeparator)
case strings.HasPrefix(remaining, lineSeparator):
patternLines = append(patternLines, strings.TrimSpace(line.String()))
line.Reset()
hasSeparator = true
i += len(lineSeparator)
default:
line.WriteByte(p.sourceGlob[i])
i++
}
}
patternLines = append(patternLines, strings.TrimSpace(line.String()))

Comment on lines +134 to +156
return patternLines, hasSeparator
}

// Returns string representation of Pattern.
func (p Pattern) String() string {
return fmt.Sprintf("Pattern %s", p.sourceGlob)
Expand Down
Loading