From 7d88c5a1f9f290d09f37523e9edefc17927ccc7b Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Tue, 2 Jun 2026 13:02:24 +0200 Subject: [PATCH 1/3] Add ASSIGN_NOPUSH optimization --- src/main/java/io/jawk/backend/AVM.java | 22 +++++--- .../java/io/jawk/intermediate/AwkTuples.java | 39 +++++++++++++ .../java/io/jawk/intermediate/Opcode.java | 15 ++++- .../io/jawk/AwkTupleOptimizationTest.java | 56 +++++++++++++++++++ 4 files changed, 122 insertions(+), 10 deletions(-) diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index 0479568f..089f7e72 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -1177,13 +1177,19 @@ private void executeTuples(PositionTracker position) position.next(); break; } - case ASSIGN: { + case ASSIGN: + case ASSIGN_NOPUSH: { // arg[0] = offset // arg[1] = isGlobal // stack[0] = value VariableTuple variableTuple = (VariableTuple) tuple; Object value = pop(); - assign(variableTuple.getVariableOffset(), value, variableTuple.isGlobal(), position); + assign( + variableTuple.getVariableOffset(), + value, + variableTuple.isGlobal(), + position, + opcode == Opcode.ASSIGN); position.next(); break; } @@ -2040,8 +2046,7 @@ private void executeTuples(PositionTracker position) argcOffset = offsetTuple.getValue(); // assign(argcOffset, arguments.size(), true, position); // true = global // +1 to include the "jawk" program name (ARGV[0]) - assign(argcOffset, arguments.size() + 1, true, position); // true = global - pop(); // clean up the stack after the assignment + assign(argcOffset, arguments.size() + 1, true, position, false); // true = global position.next(); break; } @@ -2691,8 +2696,7 @@ private void execSubForDollarReference(BooleanTuple tuple) { private void execSubForVariable(SubstitutionVariableTuple tuple, PositionTracker position) { String newString = execSubOrGSub(tuple.isGlobalSubstitution()); - assign(tuple.getVariableOffset(), newString, tuple.isGlobal(), position); - pop(); + assign(tuple.getVariableOffset(), newString, tuple.isGlobal(), position, false); } private void execSubForArrayReference(SubstitutionVariableTuple tuple) { @@ -2984,12 +2988,14 @@ private String replaceAll(String orig, String ere, String repl) { /** * Awk variable assignment functionality. */ - private void assign(long l, Object value, boolean isGlobal, PositionTracker position) { + private void assign(long l, Object value, boolean isGlobal, PositionTracker position, boolean push) { // check if curr value already refers to an array if (runtimeStack.getVariable(l, isGlobal) instanceof Map) { throw new AwkRuntimeException(position.lineNumber(), "cannot assign anything to an unindexed associative array"); } - push(value); + if (push) { + push(value); + } runtimeStack.setVariable(l, value, isGlobal); // When specials are compiled correctly, they use ASSIGN_* and skip this path. } diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index a4f8ce0e..692b0698 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -1860,6 +1860,9 @@ private boolean removeRedundantEvalSetNumGlobals() { } private boolean peepholeOptimize() { + // Keep running the local rewrite pass because one fold can expose another. + // Example: PUSH 1, PUSH 2, ADD, NEGATE first becomes PUSH 3, NEGATE and + // only the next pass can fold it to PUSH -3. boolean modified = false; boolean passModified; do { @@ -1885,11 +1888,31 @@ private boolean peepholeOptimizePass() { int newIndex = 0; while (oldIndex < originalSize) { Tuple tuple = original.get(oldIndex); + if (tuple.getOpcode() == Opcode.ASSIGN && (oldIndex + 1) < originalSize) { + Tuple nextTuple = original.get(oldIndex + 1); + if (nextTuple.getOpcode() == Opcode.POP) { + // Statement assignments compile as ASSIGN followed by POP because + // ASSIGN normally leaves the assigned value on the stack for + // expression contexts such as print (a = 1). When the result is + // discarded immediately, replace both opcodes with ASSIGN_NOPUSH. + Tuple replacement = createAssignNoPush(tuple); + optimizedQueue.add(replacement); + mapFoldedRange(indexMapping, oldIndex, 2, newIndex); + oldIndex += 2; + newIndex++; + modified = true; + continue; + } + } + Object literal = literalValue(tuple); if (literal != null) { if ((oldIndex + 1) < originalSize) { Tuple nextTuple = original.get(oldIndex + 1); if (nextTuple.getOpcode() == Opcode.GET_INPUT_FIELD) { + // Replace PUSH literal + GET_INPUT_FIELD with the constant-field + // opcode so $1, $2, etc. do not need a stack round trip for the + // field index. long fieldIndex = JRT.toLong(literal); Tuple replacement = createGetInputFieldConst( fieldIndex, @@ -1909,6 +1932,9 @@ private boolean peepholeOptimizePass() { if (secondLiteral != null) { Object folded = foldBinary(literal, secondLiteral, opTuple); if (folded != null) { + // Fold two literal pushes followed by a pure binary operator + // into a single literal push, e.g. PUSH 1, PUSH 2, ADD -> + // PUSH 3. Tuple replacement = createLiteralPush(folded, tuple.getLineNumber()); optimizedQueue.add(replacement); mapFoldedRange(indexMapping, oldIndex, 3, newIndex); @@ -1923,6 +1949,8 @@ private boolean peepholeOptimizePass() { Tuple opTuple = original.get(oldIndex + 1); Object folded = foldUnary(literal, opTuple); if (folded != null) { + // Fold one literal push followed by a pure unary operator into a + // single literal push, e.g. PUSH 5, NEGATE -> PUSH -5. Tuple replacement = createLiteralPush(folded, tuple.getLineNumber()); optimizedQueue.add(replacement); mapFoldedRange(indexMapping, oldIndex, 2, newIndex); @@ -2100,6 +2128,16 @@ private Tuple createLiteralPush(Object value, int lineNumber) { return tuple; } + private Tuple createAssignNoPush(Tuple tuple) { + Tuple.VariableTuple variableTuple = (Tuple.VariableTuple) tuple; + Tuple replacement = new Tuple.VariableTuple( + Opcode.ASSIGN_NOPUSH, + variableTuple.getVariableOffset(), + variableTuple.isGlobal()); + replacement.setLineNumber(tuple.getLineNumber()); + return replacement; + } + private Tuple createGetInputFieldConst(long fieldIndex, int lineNumber) { Tuple tuple = new Tuple.InputFieldTuple(fieldIndex); tuple.setLineNumber(lineNumber); @@ -2548,6 +2586,7 @@ private static Set freezeSet(Set set) { private boolean requiresEvalGlobalFrame(Opcode opcode) { switch (opcode) { case ASSIGN: + case ASSIGN_NOPUSH: case ASSIGN_ARRAY: case DEREFERENCE: case PLUS_EQ: diff --git a/src/main/java/io/jawk/intermediate/Opcode.java b/src/main/java/io/jawk/intermediate/Opcode.java index c6133b86..b9af6d64 100644 --- a/src/main/java/io/jawk/intermediate/Opcode.java +++ b/src/main/java/io/jawk/intermediate/Opcode.java @@ -197,8 +197,8 @@ public enum Opcode { */ CONCAT, /** - * Assigns the top-of-stack to a variable. The contents of the stack - * are unaffected. + * Assigns the top-of-stack to a variable and pushes the assigned value back + * onto the stack. *

* Argument 1: offset of the particular variable into the variable manager
* Argument 2: whether the variable is global or local @@ -207,6 +207,17 @@ public enum Opcode { * Stack after: x ... */ ASSIGN, + /** + * Assigns the top-of-stack to a variable without pushing the assigned value + * back onto the stack. + *

+ * Argument 1: offset of the particular variable into the variable manager
+ * Argument 2: whether the variable is global or local + *

+ * Stack before: x ...
+ * Stack after: ... + */ + ASSIGN_NOPUSH, /** * Assigns an item to an array element. The item remains on the stack. *

diff --git a/src/test/java/io/jawk/AwkTupleOptimizationTest.java b/src/test/java/io/jawk/AwkTupleOptimizationTest.java index a10883ca..54760c44 100644 --- a/src/test/java/io/jawk/AwkTupleOptimizationTest.java +++ b/src/test/java/io/jawk/AwkTupleOptimizationTest.java @@ -139,6 +139,36 @@ public void foldsLiteralStringConcatenation() throws Exception { assertTrue("Expected folded literal push of foobar", hasLiteralPush(tuples, "foobar")); } + @Test + public void foldsScalarAssignmentPopIntoNonPushingAssignment() throws Exception { + String script = "BEGIN { a = -2; b = 2; c = 4; print a + b + c }\n"; + AwkTestSupport + .awkTest("folds scalar assignment pop") + .script(script) + .expect("4\n") + .runAndAssert(); + + AwkProgram tuples = new Awk().compile(script); + assertFalse( + "ASSIGN followed by POP should be folded", + hasAdjacentOpcodes(tuples, Opcode.ASSIGN, Opcode.POP)); + assertEquals("Expected one non-pushing assignment per statement", 3, countOpcode(tuples, Opcode.ASSIGN_NOPUSH)); + } + + @Test + public void keepsScalarAssignmentPushWhenResultIsUsed() throws Exception { + String script = "BEGIN { print (a = 7) + 1 }\n"; + AwkTestSupport + .awkTest("assignment expression pushes result") + .script(script) + .expect("8\n") + .runAndAssert(); + + AwkProgram tuples = new Awk().compile(script); + assertEquals("Assignment expression should still push its result", 1, countOpcode(tuples, Opcode.ASSIGN)); + assertEquals("Expression assignment should not use ASSIGN_NOPUSH", 0, countOpcode(tuples, Opcode.ASSIGN_NOPUSH)); + } + @Test public void compilesGetlineIntoVariableWithDedicatedTargetOpcode() throws Exception { String script = "{ getline line; print line; exit }\n"; @@ -474,6 +504,32 @@ private static List collectOpcodes(AwkProgram tuples) { return opcodes; } + private static boolean hasAdjacentOpcodes(AwkProgram tuples, Opcode first, Opcode second) { + Opcode previous = null; + PositionTracker tracker = rawTuples(tuples).top(); + while (!tracker.isEOF()) { + Opcode current = tracker.opcode(); + if (previous == first && current == second) { + return true; + } + previous = current; + tracker.next(); + } + return false; + } + + private static int countOpcode(AwkProgram tuples, Opcode opcode) { + int count = 0; + PositionTracker tracker = rawTuples(tuples).top(); + while (!tracker.isEOF()) { + if (tracker.opcode() == opcode) { + count++; + } + tracker.next(); + } + return count; + } + private static String dumpTuples(AwkProgram tuples) throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); try (PrintStream ps = new PrintStream(out, true, StandardCharsets.UTF_8.name())) { From 8fd355594424c2ea69f44c29e10db485e08502d7 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Tue, 2 Jun 2026 14:54:06 +0200 Subject: [PATCH 2/3] Guard assignment peephole rewrite on branch targets --- .../java/io/jawk/intermediate/AwkTuples.java | 28 +++++++++++++++---- .../io/jawk/AwkTupleOptimizationTest.java | 15 ++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index 692b0698..3e430390 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -1882,6 +1882,7 @@ private boolean peepholeOptimizePass() { int[] indexMapping = new int[originalSize]; Arrays.fill(indexMapping, -1); java.util.List optimizedQueue = new ArrayList(originalSize); + boolean[] addressTargets = addressTargets(original, originalSize); boolean modified = false; int oldIndex = 0; @@ -1890,11 +1891,14 @@ private boolean peepholeOptimizePass() { Tuple tuple = original.get(oldIndex); if (tuple.getOpcode() == Opcode.ASSIGN && (oldIndex + 1) < originalSize) { Tuple nextTuple = original.get(oldIndex + 1); - if (nextTuple.getOpcode() == Opcode.POP) { - // Statement assignments compile as ASSIGN followed by POP because - // ASSIGN normally leaves the assigned value on the stack for - // expression contexts such as print (a = 1). When the result is - // discarded immediately, replace both opcodes with ASSIGN_NOPUSH. + // Statement assignments compile as ASSIGN followed by POP because + // ASSIGN normally leaves the assigned value on the stack for + // expression contexts such as print (a = 1). When the result is + // discarded immediately, replace both opcodes with ASSIGN_NOPUSH + // unless the POP itself is a branch target. Branches that land on + // the POP must continue to skip the assignment and only discard the + // already-computed expression result. + if (nextTuple.getOpcode() == Opcode.POP && !addressTargets[oldIndex + 1]) { Tuple replacement = createAssignNoPush(tuple); optimizedQueue.add(replacement); mapFoldedRange(indexMapping, oldIndex, 2, newIndex); @@ -1983,6 +1987,20 @@ private boolean peepholeOptimizePass() { return true; } + private boolean[] addressTargets(java.util.List tuples, int tupleCount) { + boolean[] targets = new boolean[tupleCount]; + for (Tuple tuple : tuples) { + Address address = tuple.getAddress(); + if (address != null) { + int index = address.index(); + if (index >= 0 && index < tupleCount) { + targets[index] = true; + } + } + } + return targets; + } + private void mapFoldedRange(int[] indexMapping, int startIndex, int length, int newIndex) { for (int idx = 0; idx < length; idx++) { indexMapping[startIndex + idx] = newIndex; diff --git a/src/test/java/io/jawk/AwkTupleOptimizationTest.java b/src/test/java/io/jawk/AwkTupleOptimizationTest.java index 54760c44..46ec01c3 100644 --- a/src/test/java/io/jawk/AwkTupleOptimizationTest.java +++ b/src/test/java/io/jawk/AwkTupleOptimizationTest.java @@ -169,6 +169,21 @@ public void keepsScalarAssignmentPushWhenResultIsUsed() throws Exception { assertEquals("Expression assignment should not use ASSIGN_NOPUSH", 0, countOpcode(tuples, Opcode.ASSIGN_NOPUSH)); } + @Test + public void keepsAssignmentPopWhenPopIsBranchTarget() throws Exception { + String script = "BEGIN { cond = 1; cond ? (a = 1) : (b = 2); print a + 0, b + 0 }\n"; + AwkTestSupport + .awkTest("keeps branch-target assignment pop") + .script(script) + .expect("1 0\n") + .runAndAssert(); + + AwkProgram tuples = new Awk().compile(script); + assertTrue( + "ASSIGN followed by targeted POP should not be folded", + hasAdjacentOpcodes(tuples, Opcode.ASSIGN, Opcode.POP)); + } + @Test public void compilesGetlineIntoVariableWithDedicatedTargetOpcode() throws Exception { String script = "{ getline line; print line; exit }\n"; From b020f511f565b8c58daf4e826b1e63cd7717c989 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Tue, 2 Jun 2026 18:21:38 +0200 Subject: [PATCH 3/3] Clarify address target optimization test --- .../java/io/jawk/intermediate/AwkTuples.java | 4 +-- .../io/jawk/AwkTupleOptimizationTest.java | 27 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index 3e430390..851e69f1 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -1882,7 +1882,7 @@ private boolean peepholeOptimizePass() { int[] indexMapping = new int[originalSize]; Arrays.fill(indexMapping, -1); java.util.List optimizedQueue = new ArrayList(originalSize); - boolean[] addressTargets = addressTargets(original, originalSize); + boolean[] isAddressTarget = addressTargets(original, originalSize); boolean modified = false; int oldIndex = 0; @@ -1898,7 +1898,7 @@ private boolean peepholeOptimizePass() { // unless the POP itself is a branch target. Branches that land on // the POP must continue to skip the assignment and only discard the // already-computed expression result. - if (nextTuple.getOpcode() == Opcode.POP && !addressTargets[oldIndex + 1]) { + if (nextTuple.getOpcode() == Opcode.POP && !isAddressTarget[oldIndex + 1]) { Tuple replacement = createAssignNoPush(tuple); optimizedQueue.add(replacement); mapFoldedRange(indexMapping, oldIndex, 2, newIndex); diff --git a/src/test/java/io/jawk/AwkTupleOptimizationTest.java b/src/test/java/io/jawk/AwkTupleOptimizationTest.java index 46ec01c3..a4a2ccd2 100644 --- a/src/test/java/io/jawk/AwkTupleOptimizationTest.java +++ b/src/test/java/io/jawk/AwkTupleOptimizationTest.java @@ -181,7 +181,7 @@ public void keepsAssignmentPopWhenPopIsBranchTarget() throws Exception { AwkProgram tuples = new Awk().compile(script); assertTrue( "ASSIGN followed by targeted POP should not be folded", - hasAdjacentOpcodes(tuples, Opcode.ASSIGN, Opcode.POP)); + hasAddressTargetWithPredecessor(tuples, Opcode.ASSIGN, Opcode.POP)); } @Test @@ -533,6 +533,31 @@ private static boolean hasAdjacentOpcodes(AwkProgram tuples, Opcode first, Opcod return false; } + private static boolean hasAddressTargetWithPredecessor(AwkProgram tuples, Opcode predecessor, Opcode target) { + Set targetIndexes = new HashSet<>(); + PositionTracker tracker = rawTuples(tuples).top(); + while (!tracker.isEOF()) { + Address address = tracker.current().getAddress(); + if (address != null) { + targetIndexes.add(Integer.valueOf(address.index())); + } + tracker.next(); + } + + tracker = rawTuples(tuples).top(); + Opcode previous = null; + while (!tracker.isEOF()) { + if (targetIndexes.contains(Integer.valueOf(tracker.currentIndex())) + && previous == predecessor + && tracker.opcode() == target) { + return true; + } + previous = tracker.opcode(); + tracker.next(); + } + return false; + } + private static int countOpcode(AwkProgram tuples, Opcode opcode) { int count = 0; PositionTracker tracker = rawTuples(tuples).top();