-
-
Notifications
You must be signed in to change notification settings - Fork 165
Generalize ScriptWhitespace #337
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,8 +59,8 @@ object MultiLineWhitespace { | |
| * Whitespace syntax that supports # line-comments, as in the case in | ||
| * programming languages such as Bash, Ruby, or Python | ||
| */ | ||
| object ScriptWhitespace{ | ||
| implicit object whitespace extends Whitespace { | ||
| object ScriptWhitespace { | ||
| def custom(delim: Char): Whitespace = new Whitespace { | ||
| def apply(ctx: ParsingRun[_]) = { | ||
| val input = ctx.input | ||
| @tailrec def rec(current: Int, state: Int): ParsingRun[Unit] = { | ||
|
|
@@ -74,10 +74,12 @@ object ScriptWhitespace{ | |
| case 0 => | ||
| (currentChar: @switch) match{ | ||
| case ' ' | '\t' | '\n' | '\r' => rec(current + 1, state) | ||
| case '#' => rec(current + 1, state = 1) | ||
| case _ => | ||
| if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) | ||
| ctx.freshSuccessUnit(current) | ||
| if (currentChar == delim) rec(current + 1, state = 1) | ||
| else { | ||
| if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) | ||
| ctx.freshSuccessUnit(current) | ||
| } | ||
| } | ||
| case 1 => rec(current + 1, state = if (currentChar == '\n') 0 else state) | ||
| } | ||
|
|
@@ -86,6 +88,7 @@ object ScriptWhitespace{ | |
| rec(current = ctx.index, state = 0) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Performance concern: This is minor since users typically call |
||
| } | ||
| } | ||
| implicit val whitespace: Whitespace = custom('#') | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ object WhitespaceTests extends TestSuite{ | |
| def checkCommon(p0: Whitespace) = { | ||
| val p = p0.apply(_) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function tests for line comments and block comments (lines 16-24), which are NOT supported by — it only supports a single configurable delimiter for line comments. These tests will fail when called with in the test (line 46). Either should be split into separate helpers for different whitespace types, or should not call . |
||
| val Parsed.Success((), 0) = parse("", p) | ||
| val Parsed.Success((), 4) = parse(" \t\r\n", p) | ||
| val Parsed.Success((), 0) = parse("/", p) | ||
| val Parsed.Success((), 1) = parse(" /", p) | ||
| val Parsed.Success((), 1) = parse(" / ", p) | ||
|
|
@@ -22,13 +23,32 @@ object WhitespaceTests extends TestSuite{ | |
| val Parsed.Success((), 6) = parse("/****/", p) | ||
| val Parsed.Success((), 9) = parse("/** * **/", p) | ||
| val Parsed.Success((), 15) = parse("/** // * // **/", p) | ||
| val Parsed.Success((), 0) = parse("code", p) | ||
| val Parsed.Success((), 2) = parse(" code", p) | ||
| } | ||
| def checkScriptCommon(p0: Whitespace, delim: Char) = { | ||
| val p = p0.apply(_) | ||
| val Parsed.Success((), 0) = parse("", p) | ||
| val Parsed.Success((), 4) = parse(" \t\r\n", p) | ||
| val Parsed.Success((), 9) = parse(s"$delim comment", p) | ||
| val Parsed.Success((), 10) = parse(s"$delim$delim comment", p) | ||
| val Parsed.Success((), 10) = parse(s" $delim comment", p) | ||
| val Parsed.Success((), 19) = parse(s"$delim comment $delim comment", p) | ||
| val Parsed.Success((), 0) = parse("code", p) | ||
| val Parsed.Success((), 2) = parse(" code", p) | ||
| } | ||
| test("scala"){ | ||
| checkCommon(ScalaWhitespace.whitespace) | ||
| // allow nested comments | ||
| val Parsed.Failure(_, 11, _) = parse("/** /* /**/", ScalaWhitespace.whitespace.apply(_)) | ||
| val Parsed.Success((), 8) = parse("/*/**/*/", ScalaWhitespace.whitespace.apply(_)) | ||
| } | ||
| test("standard_script"){ | ||
| checkScriptCommon(ScriptWhitespace.whitespace, '#') | ||
| } | ||
| test("custom_script"){ | ||
| checkScriptCommon(ScriptWhitespace.custom('%'), '%') | ||
| } | ||
| test("java"){ | ||
| checkCommon(JavaWhitespace.whitespace) | ||
| // no nested comments | ||
|
|
@@ -43,4 +63,4 @@ object WhitespaceTests extends TestSuite{ | |
| } | ||
| } | ||
|
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Performance issue: The inner
(currentChar: @switch) matchnow has a problem. Originally it had 3 cases: whitespace chars,#, andcase _. The@switchannotation compiles this to atableswitchJVM instruction (O(1) dispatch).After this change,
case #is removed from the match and moved intocase _as a runtimeif (currentChar == delim)check. This means:@switchannotation on the inner match is still valid (2 cases + default), BUTifcheck fordelimbefore returning success. Previouslycase _went directly to success. Now it doesif (currentChar == delim) ... else success.For the default
ScriptWhitespace.whitespace(delim=#), this adds one extra character comparison per non-whitespace character encountered. In a hot path that parses whitespace frequently (between every token in a source file), this is a measurable regression.Fix: Keep the hardcoded
case # =>in the switch AND still support custom delimiters. One approach: makecustomreturn a different anonymousWhitespacesubclass that has its ownapplymethod with the delim hardcoded via avalcaptured at construction time, but the@switchmatch should still have the delim as a literal case. However, sincedelimis a runtime parameter, you cannot have it as acaseliteral.Alternative: accept that custom delimiters incur a tiny overhead but document this, or use a macro/inline to generate the literal case at compile time.