From 316eb8d016ecd6fa68d7fa269a6434fafdf6e5ae Mon Sep 17 00:00:00 2001
From: Bertrand Martin
Date: Tue, 19 May 2026 00:22:18 +0200
Subject: [PATCH 1/2] Implement strnum-aware AWK comparison semantics
---
.../io/jawk/jrt/JRTCompare2Benchmark.java | 82 ++++--
.../java/io/jawk/jrt/JRTHotPathBenchmark.java | 48 ++++
src/main/java/io/jawk/backend/AVM.java | 6 +-
src/main/java/io/jawk/jrt/AssocArray.java | 14 +-
src/main/java/io/jawk/jrt/JRT.java | 237 +++++++++---------
src/main/java/io/jawk/jrt/StrNum.java | 69 +++++
src/test/java/io/jawk/JRTTest.java | 16 +-
.../java/io/jawk/PosixConformanceTest.java | 2 +-
.../java/io/jawk/StrNumSemanticsTest.java | 143 +++++++++++
.../io/jawk/jrt/JRTComparisonNumberTest.java | 95 ++++---
10 files changed, 535 insertions(+), 177 deletions(-)
create mode 100644 src/main/java/io/jawk/jrt/StrNum.java
create mode 100644 src/test/java/io/jawk/StrNumSemanticsTest.java
diff --git a/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java b/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java
index 0a3327ea..bcc6770b 100644
--- a/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java
+++ b/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java
@@ -57,10 +57,14 @@ public class JRTCompare2Benchmark {
private Object stringLeft;
private Object stringRightEqual;
private Object stringRightGreater;
- private Object numericStringLeft;
- private Object numericStringRightEqual;
- private Object numericStringRightGreater;
+ private Object plainNumericStringLeft;
+ private Object plainNumericStringRightEqual;
+ private Object plainNumericStringRightGreater;
private Object nonNumericString;
+ private Object strNumLeft;
+ private Object strNumRightEqual;
+ private Object strNumRightGreater;
+ private Object nonNumericStrNum;
/**
* Initializes benchmark operands as mutable state fields so the benchmark body
@@ -78,10 +82,14 @@ public void setup() {
this.stringLeft = "alpha";
this.stringRightEqual = "alpha";
this.stringRightGreater = "bravo";
- this.numericStringLeft = "123";
- this.numericStringRightEqual = "123.0";
- this.numericStringRightGreater = "456";
+ this.plainNumericStringLeft = "123";
+ this.plainNumericStringRightEqual = "123.0";
+ this.plainNumericStringRightGreater = "456";
this.nonNumericString = "2x";
+ this.strNumLeft = new StrNum("123");
+ this.strNumRightEqual = new StrNum("123.0");
+ this.strNumRightGreater = new StrNum("456");
+ this.nonNumericStrNum = new StrNum("2x");
}
/**
@@ -155,33 +163,66 @@ public boolean stringLessThan() {
}
/**
- * Measures equality for two numeric string operands.
+ * Measures equality for two plain numeric-looking {@link String} operands.
*
* @return the comparison result
*/
@Benchmark
- public boolean numericStringEquals() {
- return JRT.compare2(this.numericStringLeft, this.numericStringRightEqual, 0);
+ public boolean plainNumericStringEquals() {
+ return JRT.compare2(this.plainNumericStringLeft, this.plainNumericStringRightEqual, 0);
}
/**
- * Measures less-than comparison for two numeric string operands.
+ * Measures less-than comparison for two plain numeric-looking {@link String}
+ * operands.
*
* @return the comparison result
*/
@Benchmark
- public boolean numericStringLessThan() {
- return JRT.compare2(this.numericStringLeft, this.numericStringRightGreater, -1);
+ public boolean plainNumericStringLessThan() {
+ return JRT.compare2(this.plainNumericStringLeft, this.plainNumericStringRightGreater, -1);
}
/**
- * Measures equality for a boxed {@link Long} and a numeric string operand.
+ * Measures equality for a boxed {@link Long} and a plain numeric-looking
+ * {@link String} operand.
*
* @return the comparison result
*/
@Benchmark
- public boolean mixedLongNumericStringEquals() {
- return JRT.compare2(this.longLeft, this.numericStringRightEqual, 0);
+ public boolean mixedLongPlainNumericStringEquals() {
+ return JRT.compare2(this.longLeft, this.plainNumericStringRightEqual, 0);
+ }
+
+ /**
+ * Measures equality for two input-derived numeric string operands.
+ *
+ * @return the comparison result
+ */
+ @Benchmark
+ public boolean strNumEquals() {
+ return JRT.compare2(this.strNumLeft, this.strNumRightEqual, 0);
+ }
+
+ /**
+ * Measures less-than comparison for two input-derived numeric string operands.
+ *
+ * @return the comparison result
+ */
+ @Benchmark
+ public boolean strNumLessThan() {
+ return JRT.compare2(this.strNumLeft, this.strNumRightGreater, -1);
+ }
+
+ /**
+ * Measures equality for a boxed {@link Long} and an input-derived numeric
+ * string operand.
+ *
+ * @return the comparison result
+ */
+ @Benchmark
+ public boolean mixedLongStrNumEquals() {
+ return JRT.compare2(this.longLeft, this.strNumRightEqual, 0);
}
/**
@@ -194,4 +235,15 @@ public boolean mixedLongNumericStringEquals() {
public boolean mixedLongNonNumericStringLessThan() {
return JRT.compare2(this.longLeft, this.nonNumericString, -1);
}
+
+ /**
+ * Measures fallback string comparison for a numeric operand and a nonnumeric
+ * input-derived string.
+ *
+ * @return the comparison result
+ */
+ @Benchmark
+ public boolean mixedLongNonNumericStrNumLessThan() {
+ return JRT.compare2(this.longLeft, this.nonNumericStrNum, -1);
+ }
}
diff --git a/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java b/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java
index 61d542bb..8a902235 100644
--- a/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java
+++ b/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java
@@ -63,6 +63,9 @@ public class JRTHotPathBenchmark {
private Object zeroLong;
private Object zeroDouble;
private Object zeroString;
+ private Object zeroStrNum;
+ private Object nonZeroStrNum;
+ private Object nonNumericStrNum;
private Object uninitialized;
private double integralDouble;
private double fractionalDouble;
@@ -87,6 +90,9 @@ public void setup() {
this.zeroLong = Long.valueOf(0L);
this.zeroDouble = Double.valueOf(0.0D);
this.zeroString = "0";
+ this.zeroStrNum = new StrNum("0");
+ this.nonZeroStrNum = new StrNum("123");
+ this.nonNumericStrNum = new StrNum("2x");
this.uninitialized = new UninitializedObject();
this.integralDouble = 123456789D;
this.fractionalDouble = 123456.75D;
@@ -166,6 +172,17 @@ public double toDoubleNonNumericString() {
return JRT.toDouble(this.nonNumericString);
}
+ /**
+ * Measures {@link JRT#toDouble(Object)} for an input-derived numeric-prefix
+ * string.
+ *
+ * @return converted value
+ */
+ @Benchmark
+ public double toDoubleStrNumNumericPrefix() {
+ return JRT.toDouble(this.nonNumericStrNum);
+ }
+
/**
* Measures {@link JRT#toDouble(Object)} for an empty string.
*
@@ -336,6 +353,37 @@ public boolean toBooleanStringZero() {
return this.jrt.toBoolean(this.zeroString);
}
+ /**
+ * Measures {@link JRT#toBoolean(Object)} for an input-derived zero string.
+ *
+ * @return converted value
+ */
+ @Benchmark
+ public boolean toBooleanStrNumZero() {
+ return this.jrt.toBoolean(this.zeroStrNum);
+ }
+
+ /**
+ * Measures {@link JRT#toBoolean(Object)} for an input-derived non-zero string.
+ *
+ * @return converted value
+ */
+ @Benchmark
+ public boolean toBooleanStrNumNonZero() {
+ return this.jrt.toBoolean(this.nonZeroStrNum);
+ }
+
+ /**
+ * Measures {@link JRT#toBoolean(Object)} for an input-derived nonnumeric
+ * string.
+ *
+ * @return converted value
+ */
+ @Benchmark
+ public boolean toBooleanStrNumNonNumeric() {
+ return this.jrt.toBoolean(this.nonNumericStrNum);
+ }
+
/**
* Measures {@link JRT#toBoolean(Object)} for an uninitialized runtime value.
*
diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java
index 9897f387..f65c5ecc 100644
--- a/src/main/java/io/jawk/backend/AVM.java
+++ b/src/main/java/io/jawk/backend/AVM.java
@@ -300,7 +300,7 @@ public Object eval(AwkExpression expression) throws IOException {
exitCode = 0;
throw new IllegalStateException("eval(AwkExpression) cannot execute EXIT opcodes.", e);
}
- return operandStack.isEmpty() ? null : pop();
+ return operandStack.isEmpty() ? null : JRT.toJavaScalar(pop());
}
/**
@@ -1371,7 +1371,7 @@ private void executeTuples(PositionTracker position)
// stack[1] = value
Object fieldNumObj = pop();
long fieldNum = JRT.parseFieldNumber(fieldNumObj);
- String value = pop().toString();
+ Object value = pop();
push(value); // leave the result on the stack
if (fieldNum == 0) {
jrt.setInputLine(value);
@@ -2072,7 +2072,7 @@ private void executeTuples(PositionTracker position)
}
case GETLINE_INPUT_TO_TARGET: {
applyInputSourceFilelistAssignmentsIfNeeded();
- String input = jrt.consumeInputToTarget(resolvedInputSource);
+ Object input = jrt.consumeInputToTarget(resolvedInputSource);
if (input != null) {
push(1);
push(input);
diff --git a/src/main/java/io/jawk/jrt/AssocArray.java b/src/main/java/io/jawk/jrt/AssocArray.java
index 78ec158e..fb03f6bb 100644
--- a/src/main/java/io/jawk/jrt/AssocArray.java
+++ b/src/main/java/io/jawk/jrt/AssocArray.java
@@ -69,13 +69,17 @@ public interface AssocArray extends Map
*
- * @return a {@link java.lang.String} object
+ * @return the current input line scalar value, or {@code null}
*/
- public String getInputLine() {
- if (inputLine != null) {
- return inputLine;
- }
- if (recordState == null) {
- return null;
+ public Object getInputLine() {
+ if (recordState != null) {
+ return recordState.getField(0);
}
- return recordState.getRecordText();
+ return inputLine == null ? null : new StrNum(inputLine, decimalSeparator);
}
/**
@@ -1294,11 +1301,12 @@ public void setARGC(Object value) {
* Setter for the field inputLine.
*
*
- * @param inputLine a {@link java.lang.String} object
+ * @param inputLine input value
*/
- public void setInputLine(String inputLine) {
- this.inputLine = inputLine;
- recordState = newRecordStateFromText(inputLine);
+ public void setInputLine(Object inputLine) {
+ String inputText = inputLine == null ? "" : inputLine.toString();
+ this.inputLine = inputText;
+ recordState = new RecordState(inputText, null, false);
}
/**
@@ -1309,7 +1317,7 @@ public void setInputLine(String inputLine) {
public void assignInputLineFromGetline(Object value) {
String inputValue = value == null ? "" : value.toString();
inputLine = inputValue;
- recordState = newRecordStateFromText(inputValue);
+ recordState = new RecordState(inputValue, null, true);
}
/**
@@ -1330,7 +1338,7 @@ public boolean consumeInput(final InputSource source) throws IOException {
}
inputLine = null;
- recordState = newRecordStateFromSource(source);
+ recordState = new RecordState(source);
this.nr++;
if (source.isFromFilenameList()) {
@@ -1341,16 +1349,16 @@ public boolean consumeInput(final InputSource source) throws IOException {
/**
* Attempt to consume one record from a structured input source for
- * {@code getline target}, returning only the input text and leaving the
+ * {@code getline target}, returning the input value and leaving the
* current input record state untouched.
*
* @param source source strategy that provides records and optional
* pre-split fields
- * @return the consumed input text, or {@code null} when the source is
+ * @return the consumed input value, or {@code null} when the source is
* exhausted
* @throws IOException if the source raises an I/O error
*/
- public String consumeInputToTarget(final InputSource source) throws IOException {
+ public Object consumeInputToTarget(final InputSource source) throws IOException {
Objects.requireNonNull(source, "source");
activeSource = source;
materializeCurrentRecord();
@@ -1358,12 +1366,12 @@ public String consumeInputToTarget(final InputSource source) throws IOException
return null;
}
- String input = newRecordStateFromSource(source).getRecordText();
+ RecordState inputState = new RecordState(source);
this.nr++;
if (source.isFromFilenameList()) {
this.fnr++;
}
- return input;
+ return new StrNum(inputState.getRecordText(), decimalSeparator);
}
/**
@@ -1386,7 +1394,7 @@ public boolean consumeInputForEval(InputSource source) throws IOException {
* @param preFields current fields where index {@code 0} is {@code $1}
*/
protected void initializeInputFields(String record, List preFields) {
- recordState = newRecordStateFromSource(record, preFields);
+ recordState = new RecordState(record, preFields, true);
}
/**
@@ -1473,7 +1481,7 @@ public String jrtSetInputField(Object valueObj, long fieldNum) {
if (fieldNum > Integer.MAX_VALUE) {
throw new AwkRuntimeException("Field $(" + Long.valueOf(fieldNum) + ") is incorrect.");
}
- String value = valueObj.toString();
+ String value = valueObj == null ? "" : valueObj.toString();
int fieldIndex = (int) fieldNum;
RecordState state = ensureRecordStateForFieldMutation();
if (valueObj instanceof UninitializedObject) {
@@ -1484,7 +1492,7 @@ public String jrtSetInputField(Object valueObj, long fieldNum) {
while (state.getNF() < fieldIndex) {
state.addField("");
}
- state.setField(fieldIndex - 1, value);
+ state.setField(fieldIndex - 1, valueObj);
}
state.markRecordTextDirty();
return value;
@@ -1505,7 +1513,7 @@ private void materializeCurrentRecord() {
private RecordState ensureRecordStateForTextMutation() {
if (recordState == null) {
- recordState = newRecordStateFromText(inputLine == null ? "" : inputLine);
+ recordState = new RecordState(inputLine == null ? "" : inputLine, null, false);
}
return recordState;
}
@@ -1516,16 +1524,17 @@ private RecordState ensureRecordStateForFieldMutation() {
return state;
}
- private static List sanitizeFields(List rawFields) {
- List copy = new ArrayList(rawFields.size());
+ private List