From d6a20464d92e53b20fc3680d777a12c245c52da3 Mon Sep 17 00:00:00 2001 From: Doria Keung Date: Thu, 19 Mar 2026 12:08:07 -0400 Subject: [PATCH 1/2] Fix parser handling of message field values and group-in-oneof legalization Parse message field values with parseExprPrefix instead of parseExprInfix so that infix keywords like "to" are not consumed as part of the value. Allow groups inside oneof definitions, and remove the IsLeaf guard that prevented colon-less fields with angle-bracket message expressions. --- experimental/parser/legalize_def.go | 2 +- experimental/parser/parse_expr.go | 31 +++---- .../parser/testdata/parser/field/group.proto | 4 + .../parser/field/group.proto.stderr.txt | 9 +- .../testdata/parser/field/group.proto.yaml | 10 +++ .../testdata/parser/option/values.proto | 8 ++ .../parser/option/values.proto.stderr.txt | 84 +++++++++++++------ .../testdata/parser/option/values.proto.yaml | 21 ++++- 8 files changed, 126 insertions(+), 43 deletions(-) diff --git a/experimental/parser/legalize_def.go b/experimental/parser/legalize_def.go index a0915bfdd..308884a91 100644 --- a/experimental/parser/legalize_def.go +++ b/experimental/parser/legalize_def.go @@ -37,7 +37,7 @@ var validDefParents = [...]taxa.Set{ ast.DefKindExtend: taxa.NewSet(taxa.TopLevel, taxa.Message, taxa.Group), ast.DefKindField: taxa.NewSet(taxa.Message, taxa.Group, taxa.Extend, taxa.Oneof), ast.DefKindOneof: taxa.NewSet(taxa.Message, taxa.Group), - ast.DefKindGroup: taxa.NewSet(taxa.Message, taxa.Group, taxa.Extend), + ast.DefKindGroup: taxa.NewSet(taxa.Message, taxa.Group, taxa.Extend, taxa.Oneof), ast.DefKindEnumValue: taxa.NewSet(taxa.Enum), ast.DefKindMethod: taxa.NewSet(taxa.Service), ast.DefKindOption: taxa.NewSet( diff --git a/experimental/parser/parse_expr.go b/experimental/parser/parse_expr.go index a10eec3fa..578092b8c 100644 --- a/experimental/parser/parse_expr.go +++ b/experimental/parser/parse_expr.go @@ -61,17 +61,26 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn ) fallthrough case keyword.Colon: + // Field values in a message literal are never infix + // expressions. In protoc's grammar, a field value is a + // scalar, a message literal, or a list literal -- never a + // range or any other infix production. We use + // parseExprPrefix (not parseExprInfix) so that infix + // keywords like "to" are not consumed as part of the value. + // + // Without this, a field named "to" on the following line + // gets swallowed into the previous field's value as a range: + // + // reserved: true + // to: true <- would be parsed as "true to true" return p.NewExprField(ast.ExprFieldArgs{ Key: lhs, Colon: c.Next(), - Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), + Value: parseExprPrefix(p, c, where), }).AsAny() case keyword.Braces, keyword.Lt, keyword.Brackets: // This is for colon-less, array or dict-valued fields. - if next.IsLeaf() { - break - } // The previous expression cannot also be a key-value pair, since // this messes with parsing of dicts, which are not comma-separated. @@ -91,17 +100,11 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn break } + // Same rationale as the colon case above: field values are + // not infix expressions, so use parseExprPrefix. return p.NewExprField(ast.ExprFieldArgs{ - Key: lhs, - // Why not call parseExprSolo? Suppose the following - // (invalid) production: - // - // foo { ... } to { ... } - // - // Calling parseExprInfix will cause this to be parsed - // as a range expression, which will be diagnosed when - // we legalize. - Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), + Key: lhs, + Value: parseExprPrefix(p, c, where), }).AsAny() } } diff --git a/experimental/parser/testdata/parser/field/group.proto b/experimental/parser/testdata/parser/field/group.proto index 2a26eabff..98d72bf74 100644 --- a/experimental/parser/testdata/parser/field/group.proto +++ b/experimental/parser/testdata/parser/field/group.proto @@ -31,4 +31,8 @@ message Foo { } required group lowercase = 4 {} + + oneof O { + group Baz = 5 {} + } } \ No newline at end of file diff --git a/experimental/parser/testdata/parser/field/group.proto.stderr.txt b/experimental/parser/testdata/parser/field/group.proto.stderr.txt index 8fc6c1b2d..bff1b4793 100644 --- a/experimental/parser/testdata/parser/field/group.proto.stderr.txt +++ b/experimental/parser/testdata/parser/field/group.proto.stderr.txt @@ -40,4 +40,11 @@ error: group names must start with an uppercase letter 33 | required group lowercase = 4 {} | ^^^^^^^^^ -encountered 1 error and 5 warnings +warning: group syntax is deprecated + --> testdata/parser/field/group.proto:36:9 + | +36 | group Baz = 5 {} + | ^^^^^ + = note: group syntax is not available in proto3 or editions + +encountered 1 error and 6 warnings diff --git a/experimental/parser/testdata/parser/field/group.proto.yaml b/experimental/parser/testdata/parser/field/group.proto.yaml index 70f6ca622..8fe609bc8 100644 --- a/experimental/parser/testdata/parser/field/group.proto.yaml +++ b/experimental/parser/testdata/parser/field/group.proto.yaml @@ -64,3 +64,13 @@ decls: type.path.components: [{ ident: "group" }] value.literal.int_value: 4 body: {} + - def: + kind: KIND_ONEOF + name.components: [{ ident: "O" }] + body.decls: + - def: + kind: KIND_GROUP + name.components: [{ ident: "Baz" }] + type.path.components: [{ ident: "group" }] + value.literal.int_value: 5 + body: {} diff --git a/experimental/parser/testdata/parser/option/values.proto b/experimental/parser/testdata/parser/option/values.proto index 538fb5867..c93514f8c 100644 --- a/experimental/parser/testdata/parser/option/values.proto +++ b/experimental/parser/testdata/parser/option/values.proto @@ -70,6 +70,14 @@ option x = { x: x: > + x {} + x {a: 42} + x + x + + reserved: true + to: true + "ident": 42 "???": 42 42: 42 diff --git a/experimental/parser/testdata/parser/option/values.proto.stderr.txt b/experimental/parser/testdata/parser/option/values.proto.stderr.txt index cc409ef4a..335105bd0 100644 --- a/experimental/parser/testdata/parser/option/values.proto.stderr.txt +++ b/experimental/parser/testdata/parser/option/values.proto.stderr.txt @@ -81,11 +81,17 @@ error: cannot use `<...>` for message expression here = help: `<...>` message expressions are an obscure feature and not recommended -error: unexpected range expression in option setting value - --> testdata/parser/option/values.proto:57:8 +error: unexpected `to` in message expression + --> testdata/parser/option/values.proto:57:10 | 57 | x: x to y - | ^^^^^^ + | ^^ expected message field value + +error: unexpected identifier in message expression + --> testdata/parser/option/values.proto:57:13 + | +57 | x: x to y + | ^ expected message field value error: unexpected identifier in message expression --> testdata/parser/option/values.proto:59:5 @@ -193,56 +199,86 @@ warning: using `<...>` for message expression is not recommended = help: `<...>` message expressions are an obscure feature and not recommended +warning: using `<...>` for message expression is not recommended + --> testdata/parser/option/values.proto:75:7 + | +75 | x + | ^^^^^^^ + help: use `{...}` instead + | +75 | - x +75 | + x {a: 42} + | + = note: `<...>` are only permitted for sub-messages within a message + expression, but as top-level option values + = help: `<...>` message expressions are an obscure feature and not + recommended + +warning: using `<...>` for message expression is not recommended + --> testdata/parser/option/values.proto:76:7 + | +76 | x + | ^^^^^^^^^^^^^^ + help: use `{...}` instead + | +76 | - x +76 | + x {a: 42, b: 43} + | + = note: `<...>` are only permitted for sub-messages within a message + expression, but as top-level option values + = help: `<...>` message expressions are an obscure feature and not + recommended + error: cannot name extension field using `(...)` in message expression - --> testdata/parser/option/values.proto:77:5 + --> testdata/parser/option/values.proto:85:5 | -77 | (x.y): 42 +85 | (x.y): 42 | ^^^^^ expected this to be wrapped in `[...]` instead | help: replace the `(...)` with `[...]` | -77 | - (x.y): 42 -77 | + [x.y]: 42 +85 | - (x.y): 42 +85 | + [x.y]: 42 | error: unexpected absolute path in extension name - --> testdata/parser/option/values.proto:82:6 + --> testdata/parser/option/values.proto:90:6 | -82 | [.x.y]: 42 +90 | [.x.y]: 42 | ^^^^ expected a path without a leading `.` | help: remove the leading `.` | -82 | - [.x.y]: 42 -82 | + [x.y]: 42 +90 | - [.x.y]: 42 +90 | + [x.y]: 42 | error: unexpected array expression in message field value - --> testdata/parser/option/values.proto:83:5 + --> testdata/parser/option/values.proto:91:5 | -83 | [x, y, z]: 42 +91 | [x, y, z]: 42 | ^^^^^^^^^ | | | expected message field name, extension name, or `Any` type URL error: unexpected array expression in message field value - --> testdata/parser/option/values.proto:84:5 + --> testdata/parser/option/values.proto:92:5 | -84 | []: 42 +92 | []: 42 | ^^ expected message field name, extension name, or `Any` type URL error: type URL can only contain a single `/` - --> testdata/parser/option/values.proto:86:17 + --> testdata/parser/option/values.proto:94:17 | -86 | [buf.build/x/y]: 42 +94 | [buf.build/x/y]: 42 | - ^ | | | first one is here error: unexpected integer literal in array expression - --> testdata/parser/option/values.proto:88:16 + --> testdata/parser/option/values.proto:96:16 | -88 | x [{x: 5}, 1, , 2, 3], +96 | x [{x: 5}, 1, , 2, 3], | - ^ expected message expression | | | because this message field value is missing a `:` @@ -251,18 +287,18 @@ error: unexpected integer literal in array expression value is a message expression or a array expression of them warning: using `<...>` for message expression is not recommended - --> testdata/parser/option/values.proto:88:19 + --> testdata/parser/option/values.proto:96:19 | -88 | x [{x: 5}, 1, , 2, 3], +96 | x [{x: 5}, 1, , 2, 3], | ^^^^^^ help: use `{...}` instead | -88 | - x [{x: 5}, 1, , 2, 3], -88 | + x [{x: 5}, 1, {x: 5}, 2, 3], +96 | - x [{x: 5}, 1, , 2, 3], +96 | + x [{x: 5}, 1, {x: 5}, 2, 3], | = note: `<...>` are only permitted for sub-messages within a message expression, but as top-level option values = help: `<...>` message expressions are an obscure feature and not recommended -encountered 19 errors and 6 warnings +encountered 20 errors and 8 warnings diff --git a/experimental/parser/testdata/parser/option/values.proto.yaml b/experimental/parser/testdata/parser/option/values.proto.yaml index 4283bbc2b..af7947506 100644 --- a/experimental/parser/testdata/parser/option/values.proto.yaml +++ b/experimental/parser/testdata/parser/option/values.proto.yaml @@ -133,9 +133,9 @@ decls: - key.path.components: [{ ident: "x" }] value.path.components: [{ ident: "foo", separator: SEPARATOR_DOT }] - key.path.components: [{ ident: "x" }] - value.range: - start.path.components: [{ ident: "x" }] - end.path.components: [{ ident: "y" }] + value.path.components: [{ ident: "x" }] + - value.path.components: [{ ident: "to" }] + - value.path.components: [{ ident: "y" }] - value.path.components: [{ ident: "x" }] - { key.path.components: [{ ident: "x" }], value.array: {} } - key.path.components: [{ ident: "x" }] @@ -162,6 +162,21 @@ decls: - key.path.components: [{ ident: "a" }] value.dict.entries: - { key.path.components: [{ ident: "a" }], value.literal.int_value: 42 } + - { key.path.components: [{ ident: "x" }], value.dict: {} } + - key.path.components: [{ ident: "x" }] + value.dict.entries: + - { key.path.components: [{ ident: "a" }], value.literal.int_value: 42 } + - key.path.components: [{ ident: "x" }] + value.dict.entries: + - { key.path.components: [{ ident: "a" }], value.literal.int_value: 42 } + - key.path.components: [{ ident: "x" }] + value.dict.entries: + - { key.path.components: [{ ident: "a" }], value.literal.int_value: 42 } + - { key.path.components: [{ ident: "b" }], value.literal.int_value: 43 } + - key.path.components: [{ ident: "reserved" }] + value.path.components: [{ ident: "true" }] + - key.path.components: [{ ident: "to" }] + value.path.components: [{ ident: "true" }] - { key.literal.string_value: "ident", value.literal.int_value: 42 } - { key.literal.string_value: "???", value.literal.int_value: 42 } - { key.literal.int_value: 42, value.literal.int_value: 42 } From d2909addd2431ac21732603fef01f71b4a35c542 Mon Sep 17 00:00:00 2001 From: Doria Keung Date: Thu, 19 Mar 2026 15:57:02 -0400 Subject: [PATCH 2/2] Fix infix expression parsing correctly checking for colon-less message assignments --- experimental/parser/parse_expr.go | 30 ++--- .../testdata/parser/option/values.proto | 6 + .../parser/option/values.proto.stderr.txt | 112 ++++++++++-------- .../testdata/parser/option/values.proto.yaml | 17 ++- 4 files changed, 98 insertions(+), 67 deletions(-) diff --git a/experimental/parser/parse_expr.go b/experimental/parser/parse_expr.go index 578092b8c..c1a90e1fd 100644 --- a/experimental/parser/parse_expr.go +++ b/experimental/parser/parse_expr.go @@ -46,6 +46,20 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn } next := peekTokenExpr(p, c) + // If the next token is an identifier, we check two tokens ahead for validaton that this + // is an infix expression, either using ":" and "=" or colon-less assignments. + // + // If it is valid, then we return the left-hand side. Otherwise, we continue to parse + // the infix expression. + if next.Kind() == token.Ident { + clone := c.Clone() + clone.Next() + switch clone.Peek().Keyword() { + case keyword.Assign, keyword.Colon, keyword.Braces, keyword.Lt, keyword.Brackets: + return lhs + } + } + switch prec { case 0: if where.Subject() == taxa.Array || where.Subject() == taxa.Dict { @@ -61,22 +75,10 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn ) fallthrough case keyword.Colon: - // Field values in a message literal are never infix - // expressions. In protoc's grammar, a field value is a - // scalar, a message literal, or a list literal -- never a - // range or any other infix production. We use - // parseExprPrefix (not parseExprInfix) so that infix - // keywords like "to" are not consumed as part of the value. - // - // Without this, a field named "to" on the following line - // gets swallowed into the previous field's value as a range: - // - // reserved: true - // to: true <- would be parsed as "true to true" return p.NewExprField(ast.ExprFieldArgs{ Key: lhs, Colon: c.Next(), - Value: parseExprPrefix(p, c, where), + Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), }).AsAny() case keyword.Braces, keyword.Lt, keyword.Brackets: @@ -104,7 +106,7 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn // not infix expressions, so use parseExprPrefix. return p.NewExprField(ast.ExprFieldArgs{ Key: lhs, - Value: parseExprPrefix(p, c, where), + Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), }).AsAny() } } diff --git a/experimental/parser/testdata/parser/option/values.proto b/experimental/parser/testdata/parser/option/values.proto index c93514f8c..108770a32 100644 --- a/experimental/parser/testdata/parser/option/values.proto +++ b/experimental/parser/testdata/parser/option/values.proto @@ -75,6 +75,12 @@ option x = { x x + x1: 55 to {} + x2: {} to {} + x3 17 to {} + x4 {} to 91 + x1: to + reserved: true to: true diff --git a/experimental/parser/testdata/parser/option/values.proto.stderr.txt b/experimental/parser/testdata/parser/option/values.proto.stderr.txt index 335105bd0..8838a7f62 100644 --- a/experimental/parser/testdata/parser/option/values.proto.stderr.txt +++ b/experimental/parser/testdata/parser/option/values.proto.stderr.txt @@ -81,17 +81,11 @@ error: cannot use `<...>` for message expression here = help: `<...>` message expressions are an obscure feature and not recommended -error: unexpected `to` in message expression - --> testdata/parser/option/values.proto:57:10 - | -57 | x: x to y - | ^^ expected message field value - -error: unexpected identifier in message expression - --> testdata/parser/option/values.proto:57:13 +error: unexpected range expression in option setting value + --> testdata/parser/option/values.proto:57:8 | 57 | x: x to y - | ^ expected message field value + | ^^^^^^ error: unexpected identifier in message expression --> testdata/parser/option/values.proto:59:5 @@ -229,76 +223,94 @@ warning: using `<...>` for message expression is not recommended = help: `<...>` message expressions are an obscure feature and not recommended +error: unexpected identifier in message expression + --> testdata/parser/option/values.proto:80:5 + | +80 | x3 17 to {} + | ^^ expected message field value + +error: unexpected integer literal in message expression + --> testdata/parser/option/values.proto:80:8 + | +80 | x3 17 to {} + | ^^ expected message field value + +error: unexpected range expression in option setting value + --> testdata/parser/option/values.proto:81:8 + | +81 | x4 {} to 91 + | ^^^^^^^^ + error: cannot name extension field using `(...)` in message expression - --> testdata/parser/option/values.proto:85:5 + --> testdata/parser/option/values.proto:91:5 | -85 | (x.y): 42 +91 | (x.y): 42 | ^^^^^ expected this to be wrapped in `[...]` instead | help: replace the `(...)` with `[...]` | -85 | - (x.y): 42 -85 | + [x.y]: 42 +91 | - (x.y): 42 +91 | + [x.y]: 42 | error: unexpected absolute path in extension name - --> testdata/parser/option/values.proto:90:6 + --> testdata/parser/option/values.proto:96:6 | -90 | [.x.y]: 42 +96 | [.x.y]: 42 | ^^^^ expected a path without a leading `.` | help: remove the leading `.` | -90 | - [.x.y]: 42 -90 | + [x.y]: 42 +96 | - [.x.y]: 42 +96 | + [x.y]: 42 | error: unexpected array expression in message field value - --> testdata/parser/option/values.proto:91:5 + --> testdata/parser/option/values.proto:97:5 | -91 | [x, y, z]: 42 +97 | [x, y, z]: 42 | ^^^^^^^^^ | | | expected message field name, extension name, or `Any` type URL error: unexpected array expression in message field value - --> testdata/parser/option/values.proto:92:5 + --> testdata/parser/option/values.proto:98:5 | -92 | []: 42 +98 | []: 42 | ^^ expected message field name, extension name, or `Any` type URL error: type URL can only contain a single `/` - --> testdata/parser/option/values.proto:94:17 - | -94 | [buf.build/x/y]: 42 - | - ^ - | | - | first one is here + --> testdata/parser/option/values.proto:100:17 + | +100 | [buf.build/x/y]: 42 + | - ^ + | | + | first one is here error: unexpected integer literal in array expression - --> testdata/parser/option/values.proto:96:16 - | -96 | x [{x: 5}, 1, , 2, 3], - | - ^ expected message expression - | | - | because this message field value is missing a `:` - | - = note: the `:` can be omitted in a message field value, but only if the - value is a message expression or a array expression of them + --> testdata/parser/option/values.proto:102:16 + | +102 | x [{x: 5}, 1, , 2, 3], + | - ^ expected message expression + | | + | because this message field value is missing a `:` + | + = note: the `:` can be omitted in a message field value, but only if the + value is a message expression or a array expression of them warning: using `<...>` for message expression is not recommended - --> testdata/parser/option/values.proto:96:19 - | -96 | x [{x: 5}, 1, , 2, 3], - | ^^^^^^ - help: use `{...}` instead - | -96 | - x [{x: 5}, 1, , 2, 3], -96 | + x [{x: 5}, 1, {x: 5}, 2, 3], - | - = note: `<...>` are only permitted for sub-messages within a message - expression, but as top-level option values - = help: `<...>` message expressions are an obscure feature and not - recommended + --> testdata/parser/option/values.proto:102:19 + | +102 | x [{x: 5}, 1, , 2, 3], + | ^^^^^^ + help: use `{...}` instead + | +102 | - x [{x: 5}, 1, , 2, 3], +102 | + x [{x: 5}, 1, {x: 5}, 2, 3], + | + = note: `<...>` are only permitted for sub-messages within a message + expression, but as top-level option values + = help: `<...>` message expressions are an obscure feature and not + recommended -encountered 20 errors and 8 warnings +encountered 22 errors and 8 warnings diff --git a/experimental/parser/testdata/parser/option/values.proto.yaml b/experimental/parser/testdata/parser/option/values.proto.yaml index af7947506..0158c008e 100644 --- a/experimental/parser/testdata/parser/option/values.proto.yaml +++ b/experimental/parser/testdata/parser/option/values.proto.yaml @@ -133,9 +133,9 @@ decls: - key.path.components: [{ ident: "x" }] value.path.components: [{ ident: "foo", separator: SEPARATOR_DOT }] - key.path.components: [{ ident: "x" }] - value.path.components: [{ ident: "x" }] - - value.path.components: [{ ident: "to" }] - - value.path.components: [{ ident: "y" }] + value.range: + start.path.components: [{ ident: "x" }] + end.path.components: [{ ident: "y" }] - value.path.components: [{ ident: "x" }] - { key.path.components: [{ ident: "x" }], value.array: {} } - key.path.components: [{ ident: "x" }] @@ -173,6 +173,17 @@ decls: value.dict.entries: - { key.path.components: [{ ident: "a" }], value.literal.int_value: 42 } - { key.path.components: [{ ident: "b" }], value.literal.int_value: 43 } + - { key.path.components: [{ ident: "x1" }], value.literal.int_value: 55 } + - { key.path.components: [{ ident: "to" }], value.dict: {} } + - { key.path.components: [{ ident: "x2" }], value.dict: {} } + - { key.path.components: [{ ident: "to" }], value.dict: {} } + - value.path.components: [{ ident: "x3" }] + - value.literal.int_value: 17 + - { key.path.components: [{ ident: "to" }], value.dict: {} } + - key.path.components: [{ ident: "x4" }] + value.range: { start.dict: {}, end.literal.int_value: 91 } + - key.path.components: [{ ident: "x1" }] + value.path.components: [{ ident: "to" }] - key.path.components: [{ ident: "reserved" }] value.path.components: [{ ident: "true" }] - key.path.components: [{ ident: "to" }]