diff --git a/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java b/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java index 0a3327ea..bcc6770b 100644 --- a/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java +++ b/src/jmh/java/io/jawk/jrt/JRTCompare2Benchmark.java @@ -57,10 +57,14 @@ public class JRTCompare2Benchmark { private Object stringLeft; private Object stringRightEqual; private Object stringRightGreater; - private Object numericStringLeft; - private Object numericStringRightEqual; - private Object numericStringRightGreater; + private Object plainNumericStringLeft; + private Object plainNumericStringRightEqual; + private Object plainNumericStringRightGreater; private Object nonNumericString; + private Object strNumLeft; + private Object strNumRightEqual; + private Object strNumRightGreater; + private Object nonNumericStrNum; /** * Initializes benchmark operands as mutable state fields so the benchmark body @@ -78,10 +82,14 @@ public void setup() { this.stringLeft = "alpha"; this.stringRightEqual = "alpha"; this.stringRightGreater = "bravo"; - this.numericStringLeft = "123"; - this.numericStringRightEqual = "123.0"; - this.numericStringRightGreater = "456"; + this.plainNumericStringLeft = "123"; + this.plainNumericStringRightEqual = "123.0"; + this.plainNumericStringRightGreater = "456"; this.nonNumericString = "2x"; + this.strNumLeft = new StrNum("123"); + this.strNumRightEqual = new StrNum("123.0"); + this.strNumRightGreater = new StrNum("456"); + this.nonNumericStrNum = new StrNum("2x"); } /** @@ -155,33 +163,66 @@ public boolean stringLessThan() { } /** - * Measures equality for two numeric string operands. + * Measures equality for two plain numeric-looking {@link String} operands. * * @return the comparison result */ @Benchmark - public boolean numericStringEquals() { - return JRT.compare2(this.numericStringLeft, this.numericStringRightEqual, 0); + public boolean plainNumericStringEquals() { + return JRT.compare2(this.plainNumericStringLeft, this.plainNumericStringRightEqual, 0); } /** - * Measures less-than comparison for two numeric string operands. + * Measures less-than comparison for two plain numeric-looking {@link String} + * operands. * * @return the comparison result */ @Benchmark - public boolean numericStringLessThan() { - return JRT.compare2(this.numericStringLeft, this.numericStringRightGreater, -1); + public boolean plainNumericStringLessThan() { + return JRT.compare2(this.plainNumericStringLeft, this.plainNumericStringRightGreater, -1); } /** - * Measures equality for a boxed {@link Long} and a numeric string operand. + * Measures equality for a boxed {@link Long} and a plain numeric-looking + * {@link String} operand. * * @return the comparison result */ @Benchmark - public boolean mixedLongNumericStringEquals() { - return JRT.compare2(this.longLeft, this.numericStringRightEqual, 0); + public boolean mixedLongPlainNumericStringEquals() { + return JRT.compare2(this.longLeft, this.plainNumericStringRightEqual, 0); + } + + /** + * Measures equality for two input-derived numeric string operands. + * + * @return the comparison result + */ + @Benchmark + public boolean strNumEquals() { + return JRT.compare2(this.strNumLeft, this.strNumRightEqual, 0); + } + + /** + * Measures less-than comparison for two input-derived numeric string operands. + * + * @return the comparison result + */ + @Benchmark + public boolean strNumLessThan() { + return JRT.compare2(this.strNumLeft, this.strNumRightGreater, -1); + } + + /** + * Measures equality for a boxed {@link Long} and an input-derived numeric + * string operand. + * + * @return the comparison result + */ + @Benchmark + public boolean mixedLongStrNumEquals() { + return JRT.compare2(this.longLeft, this.strNumRightEqual, 0); } /** @@ -194,4 +235,15 @@ public boolean mixedLongNumericStringEquals() { public boolean mixedLongNonNumericStringLessThan() { return JRT.compare2(this.longLeft, this.nonNumericString, -1); } + + /** + * Measures fallback string comparison for a numeric operand and a nonnumeric + * input-derived string. + * + * @return the comparison result + */ + @Benchmark + public boolean mixedLongNonNumericStrNumLessThan() { + return JRT.compare2(this.longLeft, this.nonNumericStrNum, -1); + } } diff --git a/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java b/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java index 61d542bb..0c91696c 100644 --- a/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java +++ b/src/jmh/java/io/jawk/jrt/JRTHotPathBenchmark.java @@ -63,6 +63,9 @@ public class JRTHotPathBenchmark { private Object zeroLong; private Object zeroDouble; private Object zeroString; + private Object zeroStrNum; + private Object nonZeroStrNum; + private Object nonNumericStrNum; private Object uninitialized; private double integralDouble; private double fractionalDouble; @@ -87,6 +90,9 @@ public void setup() { this.zeroLong = Long.valueOf(0L); this.zeroDouble = Double.valueOf(0.0D); this.zeroString = "0"; + this.zeroStrNum = new StrNum("0"); + this.nonZeroStrNum = new StrNum("123"); + this.nonNumericStrNum = new StrNum("2x"); this.uninitialized = new UninitializedObject(); this.integralDouble = 123456789D; this.fractionalDouble = 123456.75D; @@ -166,6 +172,28 @@ public double toDoubleNonNumericString() { return JRT.toDouble(this.nonNumericString); } + /** + * Measures {@link JRT#toDouble(Object)} for an input-derived fully numeric + * string. + * + * @return converted value + */ + @Benchmark + public double toDoubleStrNumNumeric() { + return JRT.toDouble(this.nonZeroStrNum); + } + + /** + * Measures {@link JRT#toDouble(Object)} for an input-derived numeric-prefix + * string. + * + * @return converted value + */ + @Benchmark + public double toDoubleStrNumNumericPrefix() { + return JRT.toDouble(this.nonNumericStrNum); + } + /** * Measures {@link JRT#toDouble(Object)} for an empty string. * @@ -336,6 +364,37 @@ public boolean toBooleanStringZero() { return this.jrt.toBoolean(this.zeroString); } + /** + * Measures {@link JRT#toBoolean(Object)} for an input-derived zero string. + * + * @return converted value + */ + @Benchmark + public boolean toBooleanStrNumZero() { + return this.jrt.toBoolean(this.zeroStrNum); + } + + /** + * Measures {@link JRT#toBoolean(Object)} for an input-derived non-zero string. + * + * @return converted value + */ + @Benchmark + public boolean toBooleanStrNumNonZero() { + return this.jrt.toBoolean(this.nonZeroStrNum); + } + + /** + * Measures {@link JRT#toBoolean(Object)} for an input-derived nonnumeric + * string. + * + * @return converted value + */ + @Benchmark + public boolean toBooleanStrNumNonNumeric() { + return this.jrt.toBoolean(this.nonNumericStrNum); + } + /** * Measures {@link JRT#toBoolean(Object)} for an uninitialized runtime value. * diff --git a/src/main/java/io/jawk/Cli.java b/src/main/java/io/jawk/Cli.java index e186f884..8c6f8a25 100644 --- a/src/main/java/io/jawk/Cli.java +++ b/src/main/java/io/jawk/Cli.java @@ -382,17 +382,7 @@ private static void addVariable(AwkSettings settings, String keyValue) { } String name = m.group(1); String valueString = m.group(2); - Object value; - try { - value = Integer.parseInt(valueString); - } catch (NumberFormatException nfe) { - try { - value = Double.parseDouble(valueString); - } catch (NumberFormatException nfe2) { - value = valueString; - } - } - settings.putVariable(name, value); + settings.putVariable(name, valueString); } /** diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index 9897f387..0479568f 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -300,7 +300,7 @@ public Object eval(AwkExpression expression) throws IOException { exitCode = 0; throw new IllegalStateException("eval(AwkExpression) cannot execute EXIT opcodes.", e); } - return operandStack.isEmpty() ? null : pop(); + return operandStack.isEmpty() ? null : JRT.toJavaScalar(pop()); } /** @@ -773,7 +773,7 @@ private void applyExecutionInitialVariablesToGlobalSlots(boolean skipPersistentE Integer offsetObj = globalVariableOffsets.get(key); Boolean arrayObj = globalVariableArrays.get(key); if (offsetObj != null) { - Object obj = normalizeVariableValue(entry.getValue()); + Object obj = normalizeExternalVariableValue(entry.getValue()); if (arrayObj.booleanValue()) { if (obj instanceof Map) { runtimeStack.setFilelistVariable(offsetObj.intValue(), obj); @@ -857,7 +857,7 @@ private Map collectBasePersistentGlobalSeeds() { String name = entry.getKey(); if (isPersistentEligibleGlobal(name)) { validateSeededGlobalName(name); - Object value = normalizeVariableValue(entry.getValue()); + Object value = normalizeExternalVariableValue(entry.getValue()); validateSeededGlobalValue(name, value); basePersistentSeeds.put(name, value); } @@ -882,7 +882,7 @@ private Map collectExecutionUserGlobalSeeds(Map String name = entry.getKey(); if (isPersistentEligibleGlobal(name)) { validateSeededGlobalName(name); - Object value = normalizeVariableValue(entry.getValue()); + Object value = normalizeExternalVariableValue(entry.getValue()); validateSeededGlobalValue(name, value); executionUserSeeds.put(name, value); } @@ -986,26 +986,7 @@ private NameValueAssignment parseNameValueAssignment(String nameValue) { } String name = nameValue.substring(0, eqIdx); String value = nameValue.substring(eqIdx + 1); - return new NameValueAssignment(name, coerceVariableAssignmentValue(value)); - } - - /** - * Coerces a runtime assignment value using the same scalar rules as the - * existing command-line handling: integer first, then double, then string. - * - * @param value raw text to coerce - * @return coerced scalar value - */ - private Object coerceVariableAssignmentValue(String value) { - try { - return Integer.parseInt(value); - } catch (NumberFormatException nfe) { - try { - return Double.parseDouble(value); - } catch (NumberFormatException nfe2) { - return value; - } - } + return new NameValueAssignment(name, jrt.toInputScalar(value)); } /** @@ -1164,22 +1145,14 @@ private void executeTuples(PositionTracker position) // stack[0] = item to numerically negate double d = JRT.toDouble(pop()); - if (JRT.isActuallyLong(d)) { - push((long) -Math.rint(d)); - } else { - push(-d); - } + push(-d); position.next(); break; } case UNARY_PLUS: { // stack[0] = item to convert to a number double d = JRT.toDouble(pop()); - if (JRT.isActuallyLong(d)) { - push((long) Math.rint(d)); - } else { - push(d); - } + push(d); position.next(); break; } @@ -1296,11 +1269,7 @@ private void executeTuples(PositionTracker position) throw new Error("Invalid op code here: " + opcode); } - if (JRT.isActuallyLong(newVal)) { - assignArray(offset, arrIdx, (long) Math.rint(newVal), isGlobal); - } else { - assignArray(offset, arrIdx, newVal, isGlobal); - } + assignArray(offset, arrIdx, newVal, isGlobal); position.next(); break; } @@ -1349,18 +1318,14 @@ private void executeTuples(PositionTracker position) throw new Error("Invalid op code here: " + opcode); } - if (JRT.isActuallyLong(newVal)) { - assignMapElement(array, arrIdx, (long) Math.rint(newVal)); - } else { - assignMapElement(array, arrIdx, newVal); - } + assignMapElement(array, arrIdx, newVal); position.next(); break; } case ASSIGN_AS_INPUT: { // stack[0] = value - jrt.assignInputLineFromGetline(pop()); + jrt.setInputLine(pop()); push(jrt.getInputLine()); position.next(); break; @@ -1371,7 +1336,7 @@ private void executeTuples(PositionTracker position) // stack[1] = value Object fieldNumObj = pop(); long fieldNum = JRT.parseFieldNumber(fieldNumObj); - String value = pop().toString(); + Object value = pop(); push(value); // leave the result on the stack if (fieldNum == 0) { jrt.setInputLine(value); @@ -1424,14 +1389,8 @@ private void executeTuples(PositionTracker position) default: throw new Error("Invalid opcode here: " + opcode); } - if (JRT.isActuallyLong(ans)) { - long integral = (long) Math.rint(ans); - push(integral); - runtimeStack.setVariable(offset, integral, isGlobal); - } else { - push(ans); - runtimeStack.setVariable(offset, ans, isGlobal); - } + push(ans); + runtimeStack.setVariable(offset, ans, isGlobal); position.next(); break; } @@ -1526,11 +1485,7 @@ private void executeTuples(PositionTracker position) checkScalar(key); Object o = aa.get(key); double ans = JRT.toDouble(o) + 1; - if (JRT.isActuallyLong(ans)) { - aa.put(key, (long) Math.rint(ans)); - } else { - aa.put(key, ans); - } + aa.put(key, ans); position.next(); break; } @@ -1545,11 +1500,7 @@ private void executeTuples(PositionTracker position) checkScalar(key); Object o = aa.get(key); double ans = JRT.toDouble(o) - 1; - if (JRT.isActuallyLong(ans)) { - aa.put(key, (long) Math.rint(ans)); - } else { - aa.put(key, ans); - } + aa.put(key, ans); position.next(); break; } @@ -1561,11 +1512,7 @@ private void executeTuples(PositionTracker position) Map aa = toMap(pop()); Object o = aa.get(key); double ans = JRT.toDouble(o) + 1; - if (JRT.isActuallyLong(ans)) { - aa.put(key, (long) Math.rint(ans)); - } else { - aa.put(key, ans); - } + aa.put(key, ans); position.next(); break; } @@ -1577,11 +1524,7 @@ private void executeTuples(PositionTracker position) Map aa = toMap(pop()); Object o = aa.get(key); double ans = JRT.toDouble(o) - 1; - if (JRT.isActuallyLong(ans)) { - aa.put(key, (long) Math.rint(ans)); - } else { - aa.put(key, ans); - } + aa.put(key, ans); position.next(); break; } @@ -1594,11 +1537,7 @@ private void executeTuples(PositionTracker position) double num = original + 1; setNumOnJRT(fieldnum, num); - if (JRT.isActuallyLong(original)) { - push((long) Math.rint(original)); - } else { - push(Double.valueOf(original)); - } + push(Double.valueOf(original)); position.next(); break; @@ -1613,11 +1552,7 @@ private void executeTuples(PositionTracker position) double num = original - 1; setNumOnJRT(fieldnum, num); - if (JRT.isActuallyLong(original)) { - push((long) Math.rint(original)); - } else { - push(Double.valueOf(original)); - } + push(Double.valueOf(original)); position.next(); break; @@ -1892,11 +1827,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = d1 + d2; - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -1908,11 +1839,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = d1 - d2; - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -1924,11 +1851,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = d1 * d2; - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -1940,11 +1863,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = d1 / d2; - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -1956,11 +1875,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = d1 % d2; - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -1972,11 +1887,7 @@ private void executeTuples(PositionTracker position) double d1 = JRT.toDouble(o1); double d2 = JRT.toDouble(o2); double ans = Math.pow(d1, d2); - if (JRT.isActuallyLong(ans)) { - push((long) Math.rint(ans)); - } else { - push(ans); - } + push(ans); position.next(); break; } @@ -2072,7 +1983,7 @@ private void executeTuples(PositionTracker position) } case GETLINE_INPUT_TO_TARGET: { applyInputSourceFilelistAssignmentsIfNeeded(); - String input = jrt.consumeInputToTarget(resolvedInputSource); + Object input = jrt.consumeInputToTarget(resolvedInputSource); if (input != null) { push(1); push(input); @@ -2117,7 +2028,7 @@ private void executeTuples(PositionTracker position) // set the initial variables Map env = System.getenv(); for (Map.Entry var : env.entrySet()) { - assignArray(environOffset, var.getKey(), var.getValue(), true); + assignArray(environOffset, var.getKey(), jrt.toInputScalar(var.getValue()), true); pop(); // clean up the stack after the assignment } position.next(); @@ -2144,7 +2055,7 @@ private void executeTuples(PositionTracker position) pop(); for (int i = 1; i < argc; i++) { // assignArray(argvOffset, i+1, arguments.get(i), true); - assignArray(argvOffset, i, arguments.get(i - 1), true); + assignArray(argvOffset, i, jrt.toInputScalar(arguments.get(i - 1)), true); pop(); // clean up the stack after the assignment } position.next(); @@ -2537,8 +2448,8 @@ private void executeTuples(PositionTracker position) } case ASSIGN_FILENAME: { Object v = pop(); - jrt.setFILENAMEViaJrt(v == null ? "" : v.toString()); - push(v == null ? "" : v.toString()); + jrt.setFILENAMEViaJrt(v); + push(v == null ? "" : v); position.next(); break; } @@ -2830,7 +2741,8 @@ private void execSplit(CountTuple tuple, PositionTracker position) { assocArray.clear(); long cnt = 0; while (tokenizer.hasMoreElements()) { - assocArray.put(++cnt, tokenizer.nextElement()); + Object value = tokenizer.nextElement(); + assocArray.put(++cnt, jrt.toInputScalar(value)); } push(cnt); } @@ -3020,16 +2932,11 @@ private String sprintfFunction(long numArgs) { } private void setNumOnJRT(long fieldNum, double num) { - String numString; - if (JRT.isActuallyLong(num)) { - numString = Long.toString((long) Math.rint(num)); - } else { - numString = Double.toString(num); - } + String numString = jrt.toAwkString(Double.valueOf(num)); // same code as ASSIGN_AS_INPUT_FIELD if (fieldNum == 0) { - jrt.setInputLine(numString.toString()); + jrt.setInputLine(numString); jrt.jrtParseFields(); } else { jrt.jrtSetInputField(numString, fieldNum); @@ -3190,7 +3097,7 @@ public final void assignVariable(String name, Object obj) { // When offsets are not available yet, treat the assignment as part of this // AVM's baseline initial-variable snapshot. if (globalVariableOffsets == null || globalVariableArrays == null) { - Object normalized = normalizeVariableValue(obj); + Object normalized = normalizeExternalVariableValue(obj); baseInitialVariables.put(name, normalized); if (JRT.isJrtManagedSpecialVariable(name)) { baseSpecialVariables.put(name, normalized); @@ -3207,7 +3114,7 @@ public final void assignVariable(String name, Object obj) { Boolean arrayObj = globalVariableArrays.get(name); if (offsetObj != null) { - Object normalized = normalizeVariableValue(obj); + Object normalized = normalizeExternalVariableValue(obj); if (arrayObj.booleanValue()) { if (normalized instanceof Map) { runtimeStack.setFilelistVariable(offsetObj.intValue(), normalized); @@ -3219,7 +3126,7 @@ public final void assignVariable(String name, Object obj) { runtimeStack.setFilelistVariable(offsetObj.intValue(), normalized); } } else if (runtimeStack.hasGlobalVariable(name)) { - Object normalized = normalizeVariableValue(obj); + Object normalized = normalizeExternalVariableValue(obj); runtimeStack.setGlobalVariable(name, normalized); } } @@ -3277,7 +3184,7 @@ public void setNF(Integer newNf) { /** {@inheritDoc} */ @Override public void setFILENAME(String filename) { - jrt.setFILENAMEViaJrt(filename); + jrt.setFILENAMEViaJrt(jrt.toInputScalar(filename)); } /** {@inheritDoc} */ @@ -3287,7 +3194,7 @@ public Object getARGV() { Map argv = newAwkArray(); argv.put(0L, "jawk"); for (int i = 0; i < arguments.size(); i++) { - argv.put(Long.valueOf(i + 1L), arguments.get(i)); + argv.put(Long.valueOf(i + 1L), jrt.toInputScalar(arguments.get(i))); } return argv; } @@ -3384,7 +3291,13 @@ private Map ensureArrayInArray(Map map, Object k return nested; } - private Object normalizeVariableValue(Object value) { + private Object normalizeExternalVariableValue(Object value) { + if (value instanceof String) { + return jrt.toInputScalar(value); + } + if (!(value instanceof Map) && !(value instanceof List)) { + return value; + } return AssocArray.normalizeValue(value, sortedArrayKeys); } diff --git a/src/main/java/io/jawk/ext/StdinExtension.java b/src/main/java/io/jawk/ext/StdinExtension.java index b6f2a7f5..b1ef2b60 100644 --- a/src/main/java/io/jawk/ext/StdinExtension.java +++ b/src/main/java/io/jawk/ext/StdinExtension.java @@ -258,7 +258,7 @@ private Object stdInGetLine() { isEof = true; return 0; } - getJrt().setInputLine((String) lineObj); + getJrt().setInputLine(getJrt().toInputScalar((String) lineObj)); getJrt().jrtParseFields(); return 1; } catch (InterruptedException ie) { diff --git a/src/main/java/io/jawk/jrt/AssocArray.java b/src/main/java/io/jawk/jrt/AssocArray.java index 78ec158e..1f123acb 100644 --- a/src/main/java/io/jawk/jrt/AssocArray.java +++ b/src/main/java/io/jawk/jrt/AssocArray.java @@ -69,13 +69,26 @@ public interface AssocArray extends Map { /** * Converts a key to the canonical form expected by AWK: {@code null} and - * {@link UninitializedObject} map to the empty string. + * {@link UninitializedObject} map to the empty string, and internal input + * strings map to their string value. * * @param key the raw key * @return the normalized key, never {@code null} */ static Object normalizeKey(Object key) { - return (key == null || key instanceof UninitializedObject) ? "" : key; + if (key == null || key instanceof UninitializedObject) { + return ""; + } + if (key instanceof StrNum) { + return key.toString(); + } + if (key instanceof Double || key instanceof Float) { + double numericKey = ((Number) key).doubleValue(); + if (JRT.isActuallyLong(numericKey)) { + return Long.valueOf((long) Math.rint(numericKey)); + } + } + return key; } /** @@ -109,11 +122,7 @@ static Long toLongKey(Object key) { * @return {@code true} if the key (or its numeric equivalent) is present */ default boolean isIn(Object key) { - if (key == null || key instanceof UninitializedObject) { -// According to AWK semantics, an uninitialized index -// evaluates to the empty string, not numeric zero - key = ""; - } + key = normalizeKey(key); if (containsKey(key)) { return true; } diff --git a/src/main/java/io/jawk/jrt/JRT.java b/src/main/java/io/jawk/jrt/JRT.java index 90980e33..8ad1b025 100644 --- a/src/main/java/io/jawk/jrt/JRT.java +++ b/src/main/java/io/jawk/jrt/JRT.java @@ -28,6 +28,7 @@ import java.io.InputStreamReader; import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.text.DecimalFormatSymbols; import java.util.ArrayList; import java.util.Date; import java.util.Enumeration; @@ -42,7 +43,6 @@ import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.math.BigDecimal; import io.jawk.Awk; import io.jawk.intermediate.UninitializedObject; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; @@ -93,7 +93,7 @@ public class JRT { /** PrintStream used for command error output */ private PrintStream error; // Last input line consumed for getline-style transport. - private String inputLine = null; + private Object inputLine = null; // Current record state ($0, $1, $2, ...). private RecordState recordState; // The currently active InputSource (set during consumeInput calls). @@ -110,7 +110,7 @@ public class JRT { private long fnr; // file record number private int rstart; // last match start (1-based) private int rlength; // last match length - private String filename; // current input filename (or empty for stdin/pipe) + private Object filename; // current input filename scalar (or empty for stdin/pipe) private String fs; // field separator private String rs; // record separator (regexp) private String ofs; // output field separator @@ -119,6 +119,7 @@ public class JRT { private String ofmt; // number-to-string for output private String subsep; // subscript separator private final Locale locale; // locale for number formatting + private final char decimalSeparator; // locale decimal separator for strnum recognition private static final class FileOutputState { @@ -184,6 +185,7 @@ private static final class IoState { public JRT(VariableManager vm, Locale locale, AwkSink awkSink, PrintStream error) { this.vm = vm; this.locale = locale == null ? Locale.US : locale; + this.decimalSeparator = DecimalFormatSymbols.getInstance(this.locale).getDecimalSeparator(); this.awkSink = Objects.requireNonNull(awkSink, "awkSink"); this.error = error == null ? System.err : error; this.nr = 0L; @@ -371,7 +373,7 @@ public final void assignInitialVariables(Map initialVarMap) { continue; } if ("FILENAME".equals(name)) { - setFILENAMEViaJrt(value == null ? "" : value.toString()); + setFILENAMEViaJrt(value); continue; } if ("NF".equals(name)) { @@ -425,7 +427,7 @@ public final void applySpecialVariables(Map variableMap) { } else if ("SUBSEP".equals(name)) { setSUBSEP(value); } else if ("FILENAME".equals(name)) { - setFILENAMEViaJrt(value == null ? "" : value.toString()); + setFILENAMEViaJrt(value); } else if ("NF".equals(name)) { setNF(value); } else if ("NR".equals(name)) { @@ -452,7 +454,7 @@ public final void applySpecialVariables(Map variableMap) { public static void assignEnvironmentVariables(AssocArray aa) { Map env = System.getenv(); for (Map.Entry var : env.entrySet()) { - aa.put(var.getKey(), var.getValue()); + aa.put(var.getKey(), new StrNum(var.getValue())); } } @@ -532,6 +534,13 @@ public static double toDouble(final Object o) { return (double) ((Character) o).charValue(); } + if (o instanceof StrNum) { + StrNum strNum = (StrNum) o; + if (strNum.isNumber()) { + return strNum.doubleValue(); + } + } + // Try to convert the string to a number. String s = o.toString(); int length = s.length(); @@ -646,89 +655,95 @@ public static long parseFieldNumber(Object obj) { * @return a boolean */ public static boolean compare2(Object o1, Object o2, int mode) { - boolean o1Numeric = o1 instanceof Number; - boolean o2Numeric = o2 instanceof Number; - double o1Number; - double o2Number; - - if (o1Numeric && o2Numeric) { - o1Number = ((Number) o1).doubleValue(); - o2Number = ((Number) o2).doubleValue(); - if (mode < 0) { - return o1Number < o2Number; - } else if (mode == 0) { - return o1Number == o2Number; - } else { - return o1Number > o2Number; - } + if (o1 instanceof Number && o2 instanceof Number) { + return compareNumbers(((Number) o1).doubleValue(), ((Number) o2).doubleValue(), mode); } - String o1String = o1.toString(); - String o2String = o2.toString(); + + String o1String = o1 == null ? "" : o1.toString(); + String o2String = o2 == null ? "" : o2.toString(); if (o1 instanceof UninitializedObject) { - if (o2 instanceof UninitializedObject || "".equals(o2String) || "0".equals(o2String)) { + if (isBlankOrZero(o2, o2String)) { return mode == 0; } else { return mode < 0; } } if (o2 instanceof UninitializedObject) { - if ("".equals(o1String) || "0".equals(o1String)) { + if (isBlankOrZero(o1, o1String)) { return mode == 0; } else { return mode > 0; } } - if (o1String.equals(o2String)) { - return mode == 0; + if (isNumericComparisonOperand(o1) && isNumericComparisonOperand(o2)) { + return compareNumbers(getDoubleForComparison(o1), getDoubleForComparison(o2), mode); } - if (o1Numeric) { - o1Number = ((Number) o1).doubleValue(); - } else if (isComparisonNumber(o1String)) { - try { - o1Number = new BigDecimal(o1String).doubleValue(); - o1Numeric = true; - } catch (NumberFormatException nfe) { // NOPMD - ignore invalid number - o1Number = 0.0; - } + if (mode == 0) { + return o1String.equals(o2String); + } else if (mode < 0) { + return o1String.compareTo(o2String) < 0; } else { - o1Number = 0.0; + return o1String.compareTo(o2String) > 0; } - if (o2Numeric) { - o2Number = ((Number) o2).doubleValue(); - } else if (isComparisonNumber(o2String)) { - try { - o2Number = new BigDecimal(o2String).doubleValue(); - o2Numeric = true; - } catch (NumberFormatException nfe) { // NOPMD - ignore invalid number - o2Number = 0.0; - } - } else { - o2Number = 0.0; + } + + private static boolean isBlankOrZero(Object value, String stringValue) { + if (value instanceof UninitializedObject) { + return true; + } + if (value instanceof Number) { + return ((Number) value).doubleValue() == 0.0D; + } + if (value instanceof StrNum && ((StrNum) value).isNumber()) { + return ((StrNum) value).doubleValue() == 0.0D; } + return "".equals(stringValue) || "0".equals(stringValue); + } - if (o1Numeric && o2Numeric) { - if (mode < 0) { - return o1Number < o2Number; - } else if (mode == 0) { - return o1Number == o2Number; - } else { - return o1Number > o2Number; - } + private static boolean isNumericComparisonOperand(Object value) { + return value instanceof Number || value instanceof StrNum && ((StrNum) value).isNumber(); + } + + private static double getDoubleForComparison(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); } + return ((StrNum) value).doubleValue(); + } - if (mode == 0) { - return o1String.equals(o2String); - } else if (mode < 0) { - return o1String.compareTo(o2String) < 0; + private static boolean compareNumbers(double o1Number, double o2Number, int mode) { + if (mode < 0) { + return o1Number < o2Number; + } else if (mode == 0) { + return o1Number == o2Number; } else { - return o1String.compareTo(o2String) > 0; + return o1Number > o2Number; } } - static boolean isComparisonNumber(String value) { + /** + * Converts an internal runtime scalar to the value exposed through Java APIs. + * + * @param value internal scalar value + * @return plain Java scalar value + */ + public static Object toJavaScalar(Object value) { + if (value instanceof StrNum) { + return value.toString(); + } + if (value instanceof Double || value instanceof Float) { + double number = ((Number) value).doubleValue(); + if (isActuallyLong(number)) { + return Long.valueOf((long) Math.rint(number)); + } + } + return value; + } + + static boolean isParseableNumber(String value, char decimalSeparator) { int index = 0; int length = value.length(); @@ -750,7 +765,7 @@ static boolean isComparisonNumber(String value) { digitFound = true; } - if (index < length && value.charAt(index) == '.') { + if (index < length && value.charAt(index) == decimalSeparator) { index++; while (index < length && value.charAt(index) >= '0' && value.charAt(index) <= '9') { index++; @@ -781,36 +796,23 @@ static boolean isComparisonNumber(String value) { return index == length; } + static String normalizeNumberForComparison(String value, char decimalSeparator) { + return decimalSeparator == '.' ? value : value.replace(decimalSeparator, '.'); + } + /** * Return an object which is numerically equivalent to * one plus a given object. For Integers and Doubles, * this is similar to o+1. For Strings, attempts are * made to convert it to a double first. If the - * String does not represent a valid Double, 1 is returned. + * String does not contain a numeric prefix, 1 is returned. * * @param o The object to increase. - * @return o+1 if o is an Integer or Double object, or - * if o is a String object and represents a double. - * Otherwise, 1 is returned. If the return value - * is an integer, an Integer object is returned. - * Otherwise, a Double object is returned. + * @return {@code o + 1} if o is numeric or contains a numeric prefix; + * otherwise, {@code 1.0} */ public static Object inc(Object o) { - double ans; - if (o instanceof Number) { - ans = ((Number) o).doubleValue() + 1; - } else { - try { - ans = Double.parseDouble(o.toString()) + 1; - } catch (NumberFormatException nfe) { - ans = 1; - } - } - if (isActuallyLong(ans)) { - return (long) Math.rint(ans); - } else { - return ans; - } + return toDouble(o) + 1; } /** @@ -818,31 +820,14 @@ public static Object inc(Object o) { * one minus a given object. For Integers and Doubles, * this is similar to o-1. For Strings, attempts are * made to convert it to a double first. If the - * String does not represent a valid Double, -1 is returned. + * String does not contain a numeric prefix, -1 is returned. * * @param o The object to increase. - * @return o-1 if o is an Integer or Double object, or - * if o is a String object and represents a double. - * Otherwise, -1 is returned. If the return value - * is an integer, an Integer object is returned. - * Otherwise, a Double object is returned. + * @return {@code o - 1} if o is numeric or contains a numeric prefix; + * otherwise, {@code -1.0} */ public static Object dec(Object o) { - double ans; - if (o instanceof Number) { - ans = ((Number) o).doubleValue() - 1; - } else { - try { - ans = Double.parseDouble(o.toString()) - 1; - } catch (NumberFormatException nfe) { - ans = 1; - } - } - if (isActuallyLong(ans)) { - return (long) Math.rint(ans); - } else { - return ans; - } + return toDouble(o) - 1; } // non-static to reference "inputLine" @@ -870,6 +855,9 @@ public final boolean toBoolean(Object o) { val = ((Long) o).longValue() != 0; } else if (o instanceof Double) { val = ((Double) o).doubleValue() != 0; + } else if (o instanceof StrNum) { + StrNum strNum = (StrNum) o; + val = strNum.isNumber() ? strNum.doubleValue() != 0 : strNum.toString().length() > 0; } else if (o instanceof String) { val = (o.toString().length() > 0); } else if (o instanceof UninitializedObject) { @@ -937,11 +925,12 @@ private static Map toArrayMap(Object array) { return arrayMap; } - private static int splitWorker(Enumeration e, Map array) { + private int splitWorker(Enumeration e, Map array) { int cnt = 0; array.clear(); while (e.hasMoreElements()) { - array.put(Long.valueOf(++cnt), e.nextElement()); + Object value = e.nextElement(); + array.put(Long.valueOf(++cnt), toInputScalar(value)); } array.put(0L, Long.valueOf(cnt)); return cnt; @@ -966,16 +955,13 @@ public PartitioningReader getPartitioningReader() { * Getter for the field inputLine. *

* - * @return a {@link java.lang.String} object + * @return the current input line scalar value, or {@code null} */ - public String getInputLine() { - if (inputLine != null) { - return inputLine; - } - if (recordState == null) { - return null; + public Object getInputLine() { + if (recordState != null) { + return recordState.getField(0); } - return recordState.getRecordText(); + return inputLine; } /** @@ -1186,7 +1172,7 @@ public void setRLENGTH(Object value) { * * @return current FILENAME (empty string for stdin/pipe) */ - public String getFILENAME() { + public Object getFILENAME() { return filename == null ? "" : filename; } @@ -1195,8 +1181,8 @@ public String getFILENAME() { * * @param name file name to set */ - public void setFILENAMEViaJrt(String name) { - this.filename = name == null ? "" : name; + public void setFILENAMEViaJrt(Object name) { + this.filename = normalizeRecordValue(name); } /** @@ -1294,22 +1280,38 @@ public void setARGC(Object value) { * Setter for the field inputLine. *

* - * @param inputLine a {@link java.lang.String} object + * @param inputLineParam input value */ - public void setInputLine(String inputLine) { - this.inputLine = inputLine; - recordState = newRecordStateFromText(inputLine); + public void setInputLine(Object inputLineParam) { + Object inputValue = normalizeRecordValue(inputLineParam); + this.inputLine = inputValue; + recordState = new RecordState(inputValue, null); } /** - * Assigns {@code $0} from a getline result and initializes {@code $1..$NF}. + * Creates an input-derived AWK scalar value. * - * @param value getline result assigned to {@code $0} + * @param value input text + * @return input-derived scalar value */ - public void assignInputLineFromGetline(Object value) { - String inputValue = value == null ? "" : value.toString(); - inputLine = inputValue; - recordState = newRecordStateFromText(inputValue); + public Object toInputScalar(Object value) { + if (value instanceof String) { + return new StrNum((String) value, decimalSeparator); + } + if (value instanceof StrNum) { + return value; + } + if (value == null || value instanceof UninitializedObject) { + return new StrNum("", decimalSeparator); + } + return new StrNum(value.toString(), decimalSeparator); + } + + private static Object normalizeRecordValue(Object value) { + if (value == null || value instanceof UninitializedObject) { + return ""; + } + return value; } /** @@ -1330,7 +1332,7 @@ public boolean consumeInput(final InputSource source) throws IOException { } inputLine = null; - recordState = newRecordStateFromSource(source); + recordState = new RecordState(source); this.nr++; if (source.isFromFilenameList()) { @@ -1341,16 +1343,16 @@ public boolean consumeInput(final InputSource source) throws IOException { /** * Attempt to consume one record from a structured input source for - * {@code getline target}, returning only the input text and leaving the + * {@code getline target}, returning the input value and leaving the * current input record state untouched. * * @param source source strategy that provides records and optional * pre-split fields - * @return the consumed input text, or {@code null} when the source is + * @return the consumed input value, or {@code null} when the source is * exhausted * @throws IOException if the source raises an I/O error */ - public String consumeInputToTarget(final InputSource source) throws IOException { + public Object consumeInputToTarget(final InputSource source) throws IOException { Objects.requireNonNull(source, "source"); activeSource = source; materializeCurrentRecord(); @@ -1358,12 +1360,12 @@ public String consumeInputToTarget(final InputSource source) throws IOException return null; } - String input = newRecordStateFromSource(source).getRecordText(); + RecordState inputState = new RecordState(source); this.nr++; if (source.isFromFilenameList()) { this.fnr++; } - return input; + return new StrNum(inputState.getRecordText(), decimalSeparator); } /** @@ -1386,7 +1388,7 @@ public boolean consumeInputForEval(InputSource source) throws IOException { * @param preFields current fields where index {@code 0} is {@code $1} */ protected void initializeInputFields(String record, List preFields) { - recordState = newRecordStateFromSource(record, preFields); + recordState = new RecordState(toInputScalar(record), preFields); } /** @@ -1473,7 +1475,7 @@ public String jrtSetInputField(Object valueObj, long fieldNum) { if (fieldNum > Integer.MAX_VALUE) { throw new AwkRuntimeException("Field $(" + Long.valueOf(fieldNum) + ") is incorrect."); } - String value = valueObj.toString(); + String value = valueObj == null ? "" : valueObj.toString(); int fieldIndex = (int) fieldNum; RecordState state = ensureRecordStateForFieldMutation(); if (valueObj instanceof UninitializedObject) { @@ -1484,7 +1486,7 @@ public String jrtSetInputField(Object valueObj, long fieldNum) { while (state.getNF() < fieldIndex) { state.addField(""); } - state.setField(fieldIndex - 1, value); + state.setField(fieldIndex - 1, valueObj); } state.markRecordTextDirty(); return value; @@ -1493,7 +1495,7 @@ public String jrtSetInputField(Object valueObj, long fieldNum) { protected void rebuildDollarZeroFromFields() { if (recordState != null) { recordState.markRecordTextDirty(); - inputLine = recordState.getRecordText(); + inputLine = recordState.getField(0); } } @@ -1505,7 +1507,7 @@ private void materializeCurrentRecord() { private RecordState ensureRecordStateForTextMutation() { if (recordState == null) { - recordState = newRecordStateFromText(inputLine == null ? "" : inputLine); + recordState = new RecordState(inputLine, null); } return recordState; } @@ -1516,16 +1518,17 @@ private RecordState ensureRecordStateForFieldMutation() { return state; } - private static List sanitizeFields(List rawFields) { - List copy = new ArrayList(rawFields.size()); + private List sanitizeFields(List rawFields) { + List copy = new ArrayList(rawFields.size()); for (String field : rawFields) { - copy.add(field == null ? "" : field); + String value = field == null ? "" : field; + copy.add(new StrNum(value, decimalSeparator)); } return copy; } - private List splitRecordText(String recordText, String fieldSeparator) { - List fields = new ArrayList(); + private List splitRecordText(String recordText, String fieldSeparator) { + List fields = new ArrayList(); if (recordText == null || recordText.isEmpty()) { return fields; } @@ -1542,44 +1545,34 @@ private List splitRecordText(String recordText, String fieldSeparator) { } while (tokenizer.hasMoreElements()) { - fields.add((String) tokenizer.nextElement()); + fields.add(new StrNum((String) tokenizer.nextElement(), decimalSeparator)); } return fields; } - private static String joinFieldsWithLiteralSeparator(List fields, String separator) { + private static String joinFieldsWithLiteralSeparator(List fields, String separator) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < fields.size(); i++) { if (i > 0) { sb.append(separator); } - sb.append(fields.get(i)); + Object field = fields.get(i); + sb.append(field == null ? "" : field.toString()); } return sb.toString(); } - private String rebuildRecordTextFromFields(List fields) { + private String rebuildRecordTextFromFields(List fields) { return joinFieldsWithLiteralSeparator(fields, ofs); } - private RecordState newRecordStateFromText(String recordText) { - return new RecordState(recordText, null); - } - - private RecordState newRecordStateFromSource(InputSource source) { - return new RecordState(source); - } - - private RecordState newRecordStateFromSource(String recordText, List rawFields) { - return new RecordState(recordText, rawFields); - } - private final class RecordState { private final String fieldSeparatorAtRead; private final InputSource source; private String recordText; - private List fields; + private Object recordScalar; + private List fields; private boolean recordTextAvailable; private boolean fieldsAvailable; private boolean recordTextDirty; @@ -1591,17 +1584,19 @@ private RecordState(InputSource source) { this(null, null, source); } - private RecordState(String recordText, List rawFields) { - this(recordText, rawFields, null); + private RecordState(Object recordValue, List rawFields) { + this(recordValue, rawFields, null); } - private RecordState(String recordText, List rawFields, InputSource source) { + private RecordState(Object recordValue, List rawFields, InputSource source) { this.fieldSeparatorAtRead = fs; this.source = source; - if (recordText != null) { - this.recordText = recordText; + if (recordValue != null) { + this.recordScalar = normalizeRecordValue(recordValue); + this.recordText = this.recordScalar.toString(); this.recordTextAvailable = true; } else if (rawFields == null && source == null) { + this.recordScalar = ""; this.recordText = ""; this.recordTextAvailable = true; } @@ -1635,6 +1630,7 @@ private String getRecordText() { if (!recordTextAvailable || recordTextDirty) { if (recordTextDirty) { recordText = rebuildRecordTextFromFields(fields); + recordScalar = recordText; } else { loadRecordTextFromSource(); if (!recordTextAvailable) { @@ -1644,6 +1640,7 @@ private String getRecordText() { "InputSource must provide record text, fields, or both after nextRecord()"); } recordText = joinFieldsWithLiteralSeparator(fields, fieldSeparatorAtRead); + recordScalar = new StrNum(recordText, decimalSeparator); } } recordTextAvailable = true; @@ -1659,7 +1656,11 @@ private int getNF() { private Object getField(int fieldIndex) { if (fieldIndex == 0) { - return getRecordText(); + String value = getRecordText(); + if (recordScalar == null) { + recordScalar = value; + } + return recordScalar; } ensureFieldsMaterialized(); int zeroBasedIndex = fieldIndex - 1; @@ -1669,18 +1670,25 @@ private Object getField(int fieldIndex) { return fields.get(zeroBasedIndex); } - private void setField(int zeroBasedIndex, String value) { + private void setField(int zeroBasedIndex, Object value) { ensureFieldsMaterialized(); - fields.set(zeroBasedIndex, value); + fields.set(zeroBasedIndex, normalizeFieldValue(value)); markRecordTextDirty(); } - private void addField(String value) { + private void addField(Object value) { ensureFieldsMaterialized(); - fields.add(value); + fields.add(normalizeFieldValue(value)); markRecordTextDirty(); } + private Object normalizeFieldValue(Object value) { + if (value == null || value instanceof UninitializedObject) { + return ""; + } + return value; + } + private void removeField(int zeroBasedIndex) { ensureFieldsMaterialized(); fields.remove(zeroBasedIndex); @@ -1690,6 +1698,7 @@ private void removeField(int zeroBasedIndex) { private void markRecordTextDirty() { recordTextDirty = true; recordTextAvailable = fieldsAvailable; + recordScalar = null; } private void materialize() { @@ -1703,6 +1712,9 @@ private void loadRecordTextFromSource() { } recordText = source.getRecordText(); recordTextAvailable = recordText != null; + if (recordTextAvailable) { + recordScalar = new StrNum(recordText, decimalSeparator); + } recordTextLoadedFromSource = true; } @@ -1921,11 +1933,13 @@ public boolean jrtConsumeFileInput(String fileNameParam) throws IOException { } } - inputLine = pr.readRecord(); - if (inputLine == null) { + String recordText = pr.readRecord(); + if (recordText == null) { return false; } else { - jrtInputString = inputLine; + jrtInputString = recordText; + inputLine = toInputScalar(recordText); + recordState = new RecordState(inputLine, null); this.nr++; return true; } @@ -1958,11 +1972,13 @@ private static Process spawnProcess(String cmd) throws IOException { */ public boolean jrtConsumeCommandInput(String cmd) throws IOException { CommandInputState commandInput = getOrCreateCommandInputState(cmd); - inputLine = commandInput.reader.readRecord(); - if (inputLine == null) { + String recordText = commandInput.reader.readRecord(); + if (recordText == null) { return false; } else { - jrtInputString = inputLine; + jrtInputString = recordText; + inputLine = toInputScalar(recordText); + recordState = new RecordState(inputLine, null); this.nr++; return true; } diff --git a/src/main/java/io/jawk/jrt/StrNum.java b/src/main/java/io/jawk/jrt/StrNum.java new file mode 100644 index 00000000..40f38b44 --- /dev/null +++ b/src/main/java/io/jawk/jrt/StrNum.java @@ -0,0 +1,70 @@ +package io.jawk.jrt; + +/*- + * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲ + * Jawk + * ჻჻჻჻჻჻ + * Copyright (C) 2006 - 2026 MetricsHub + * ჻჻჻჻჻჻ + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * . + * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱ + */ + +import java.math.BigDecimal; + +final class StrNum { + + private final String value; + private final char decimalSeparator; + private Boolean numeric; + private Double numericValue; + + StrNum(String value) { + this(value, '.'); + } + + StrNum(String value, char decimalSeparator) { + this.value = value == null ? "" : value; + this.decimalSeparator = decimalSeparator; + } + + boolean isNumber() { + if (numeric == null) { + numeric = Boolean.valueOf(JRT.isParseableNumber(value, decimalSeparator)); + } + return numeric.booleanValue(); + } + + double doubleValue() { + if (numericValue == null) { + numericValue = Double.valueOf(parseDoubleValue()); + } + return numericValue.doubleValue(); + } + + private double parseDoubleValue() { + String normalizedValue = JRT.normalizeNumberForComparison(value, decimalSeparator); + try { + return Double.parseDouble(normalizedValue); + } catch (NumberFormatException nfe) { + return new BigDecimal(normalizedValue).doubleValue(); + } + } + + @Override + public String toString() { + return value; + } +} diff --git a/src/main/java/io/jawk/jrt/StreamInputSource.java b/src/main/java/io/jawk/jrt/StreamInputSource.java index 4df4a585..bb0366f9 100644 --- a/src/main/java/io/jawk/jrt/StreamInputSource.java +++ b/src/main/java/io/jawk/jrt/StreamInputSource.java @@ -229,7 +229,7 @@ private boolean detectFilenames() { * @return {@code ARGC} converted to an {@code int} */ private int getArgCount() { - long raw = JRT.toLong(vm.getARGC()); + double raw = JRT.toDouble(vm.getARGC()); if (raw <= 0) { return 0; } @@ -326,7 +326,7 @@ private boolean prepareNextReader() throws IOException { partitioningReader = new PartitioningReader( new InputStreamReader(defaultInput, StandardCharsets.UTF_8), jrt.getRSString()); - jrt.setFILENAMEViaJrt(""); + jrt.setFILENAMEViaJrt(jrt.toInputScalar("")); return true; } closeCurrentReaderIfFileStream(); @@ -341,7 +341,7 @@ private boolean prepareNextReader() throws IOException { partitioningReader = new PartitioningReader( new InputStreamReader(defaultInput, StandardCharsets.UTF_8), jrt.getRSString()); - jrt.setFILENAMEViaJrt(""); + jrt.setFILENAMEViaJrt(jrt.toInputScalar("")); return true; } if (partitioningReader != null) { @@ -353,7 +353,7 @@ private boolean prepareNextReader() throws IOException { new InputStreamReader(new FileInputStream(arg), StandardCharsets.UTF_8), jrt.getRSString(), true); - jrt.setFILENAMEViaJrt(arg); + jrt.setFILENAMEViaJrt(jrt.toInputScalar(arg)); jrt.setFNR(0L); ready = true; } @@ -404,16 +404,6 @@ private void setFilelistVariable(String nameValue) { } String name = nameValue.substring(0, eqIdx); String value = nameValue.substring(eqIdx + 1); - Object obj; - try { - obj = Integer.parseInt(value); - } catch (NumberFormatException nfe) { - try { - obj = Double.parseDouble(value); - } catch (NumberFormatException nfe2) { - obj = value; - } - } - vm.assignVariable(name, obj); + vm.assignVariable(name, jrt.toInputScalar(value)); } } diff --git a/src/main/java/io/jawk/util/AwkSettings.java b/src/main/java/io/jawk/util/AwkSettings.java index 9d25fb2e..442df6f4 100644 --- a/src/main/java/io/jawk/util/AwkSettings.java +++ b/src/main/java/io/jawk/util/AwkSettings.java @@ -162,19 +162,8 @@ private void addInitialVariable(String keyValue) { int equalsIdx = keyValue.indexOf('='); String name = keyValue.substring(0, equalsIdx); String valueString = keyValue.substring(equalsIdx + 1); - Object value; - // deduce type - try { - value = Integer.parseInt(valueString); - } catch (NumberFormatException nfe) { - try { - value = Double.parseDouble(valueString); - } catch (NumberFormatException nfe2) { - value = valueString; - } - } // note: can overwrite previously defined variables - putVariable(name, value); + putVariable(name, valueString); } /** diff --git a/src/test/java/io/jawk/AssocArrayTest.java b/src/test/java/io/jawk/AssocArrayTest.java index 548b53a0..3d3a195a 100644 --- a/src/test/java/io/jawk/AssocArrayTest.java +++ b/src/test/java/io/jawk/AssocArrayTest.java @@ -298,8 +298,8 @@ public void testSplitMutatesInjectedMapDirectly() throws Exception { assertFalse(data.containsKey("old")); assertFalse(data.containsKey(1)); assertFalse(data.containsKey(2)); - assertEquals("alpha", data.get(1L)); - assertEquals("beta", data.get(2L)); + assertEquals("alpha", data.get(1L).toString()); + assertEquals("beta", data.get(2L).toString()); } @Test diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 0b3bba3e..55ab6b62 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -478,7 +478,11 @@ public void testFunctionArgumentsLeftAssociativity() throws Exception { @Test public void testAtan2ArgumentsLeftAssociativity() throws Exception { - assertEquals("atan2 arguments must be eval'ed from left to right", 0.0, AWK.eval("atan2(a++, a++)")); + AwkTestSupport + .awkTest("atan2 arguments left to right") + .script("BEGIN { print atan2(a++, a++) }") + .expect("0\n") + .runAndAssert(); } @Test diff --git a/src/test/java/io/jawk/JRTTest.java b/src/test/java/io/jawk/JRTTest.java index 744e6dd5..91a9a2c4 100644 --- a/src/test/java/io/jawk/JRTTest.java +++ b/src/test/java/io/jawk/JRTTest.java @@ -127,22 +127,22 @@ public void testCompare2NumericOperands() { } @Test - public void testCompare2NumericStrings() { - assertTrue(JRT.compare2("3", "3.0", 0)); + public void testCompare2PlainStrings() { + assertFalse(JRT.compare2("3", "3.0", 0)); assertTrue(JRT.compare2("3", "4.0", -1)); assertTrue(JRT.compare2("4.0", "3", 1)); - assertTrue(JRT.compare2("1e2", "100", 0)); - assertTrue(JRT.compare2("+.5", "0.5", 0)); - assertTrue(JRT.compare2("5.", "5.0", 0)); - assertTrue(JRT.compare2("-1E+2", "-100", 0)); + assertFalse(JRT.compare2("1e2", "100", 0)); + assertFalse(JRT.compare2("+.5", "0.5", 0)); + assertFalse(JRT.compare2("5.", "5.0", 0)); + assertFalse(JRT.compare2("-1E+2", "-100", 0)); assertFalse(JRT.compare2("1e2147483649", "2", 0)); assertTrue(JRT.compare2("1e2147483649", "2", -1)); } @Test public void testCompare2MixedNumberAndString() { - assertTrue(JRT.compare2(3L, "3.0", 0)); - assertTrue(JRT.compare2("3.0", 3L, 0)); + assertFalse(JRT.compare2(3L, "3.0", 0)); + assertFalse(JRT.compare2("3.0", 3L, 0)); assertTrue(JRT.compare2(3L, "4", -1)); assertTrue(JRT.compare2("4", 3L, 1)); } @@ -221,8 +221,8 @@ public void testSplitUsesLongIndexesForPlainMap() { int n = jrt.split(map, "a b"); assertEquals(2, n); assertEquals(2L, map.get(0L)); - assertEquals("a", map.get(1L)); - assertEquals("b", map.get(2L)); + assertEquals("a", map.get(1L).toString()); + assertEquals("b", map.get(2L).toString()); assertFalse(map.containsKey(1)); } @@ -232,9 +232,37 @@ public void testSplitRegexWhitespace() { JRT jrt = new JRT(null, Locale.US, AwkSink.from(System.out, Locale.US), System.err); int n = jrt.split("[ \t]+", aa, " 9853 shen"); assertEquals(3, n); - assertEquals("", aa.get(1)); - assertEquals("9853", aa.get(2)); - assertEquals("shen", aa.get(3)); + assertEquals("", aa.get(1).toString()); + assertEquals("9853", aa.get(2).toString()); + assertEquals("shen", aa.get(3).toString()); + } + + @Test + public void testInputDerivedDollarZeroScalarIsCachedUntilRecordChanges() { + JRT jrt = new JRT(null, Locale.US, AwkSink.from(System.out, Locale.US), System.err); + jrt.setFS(" "); + jrt.setInputLine(jrt.toInputScalar("9 10")); + + Object firstRead = jrt.getInputLine(); + assertSame(firstRead, jrt.getInputLine()); + + jrt.jrtSetInputField("8", 1); + + Object changedRead = jrt.getInputLine(); + assertNotSame(firstRead, changedRead); + assertSame(changedRead, jrt.getInputLine()); + assertEquals("8 10", changedRead.toString()); + } + + @Test + public void testFilenamePreservesScalarAttribute() { + JRT jrt = new JRT(null, Locale.US, AwkSink.from(System.out, Locale.US), System.err); + + jrt.setFILENAMEViaJrt(jrt.toInputScalar("9")); + assertTrue(JRT.compare2(jrt.getFILENAME(), Long.valueOf(10L), -1)); + + jrt.setFILENAMEViaJrt("9"); + assertFalse(JRT.compare2(jrt.getFILENAME(), Long.valueOf(10L), -1)); } @Test diff --git a/src/test/java/io/jawk/PosixConformanceTest.java b/src/test/java/io/jawk/PosixConformanceTest.java index 9a49f8fb..88a1def1 100644 --- a/src/test/java/io/jawk/PosixConformanceTest.java +++ b/src/test/java/io/jawk/PosixConformanceTest.java @@ -342,7 +342,7 @@ public void posix48NumericVsStringComparison() throws Exception { AwkTestSupport .awkTest("POSIX 4.8 numeric vs string comparison") .script("BEGIN{print \"10\" < \"2\", 10 < \"2\", \"10\"==\"10\", \"10\"==10}") - .expectLines("0 0 1 1") + .expectLines("1 1 1 1") .runAndAssert(); } diff --git a/src/test/java/io/jawk/StrNumSemanticsTest.java b/src/test/java/io/jawk/StrNumSemanticsTest.java new file mode 100644 index 00000000..faab2e3c --- /dev/null +++ b/src/test/java/io/jawk/StrNumSemanticsTest.java @@ -0,0 +1,212 @@ +package io.jawk; + +/*- + * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲ + * Jawk + * ჻჻჻჻჻჻ + * Copyright (C) 2006 - 2026 MetricsHub + * ჻჻჻჻჻჻ + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * . + * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱ + */ + +import static io.jawk.AwkTestSupport.awkTest; +import static io.jawk.AwkTestSupport.cliTest; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; +import java.util.Locale; +import io.jawk.ext.StdinExtension; +import io.jawk.util.AwkSettings; +import org.junit.Test; + +public class StrNumSemanticsTest { + + @Test + public void testArithmeticKeepsNumericPrefixConversion() throws Exception { + awkTest("arithmetic parses numeric prefixes") + .script("{ print($1 + 1) }") + .stdin("2x\n2.3x\n2x.3x\n2e+02\n0x10\n") + .expectLines("3", "3.3", "3", "201", "1") + .runAndAssert(); + } + + @Test + public void testInputComparisonsUseStrNumAttribute() throws Exception { + awkTest("input-derived values compare as strnum only when fully numeric") + .script("{ print($1 < 10) }") + .stdin("2x\n2x.3x\n2e01\n9\n0x10\n") + .expectLines("0", "0", "0", "1", "1") + .runAndAssert(); + } + + @Test + public void testAssignmentPreservesStrNumAttribute() throws Exception { + awkTest("assignment preserves strnum attribute") + .script("{ x = $1; print(x < 10) }") + .stdin("9\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testStringOperationProducesPlainString() throws Exception { + awkTest("concatenation produces plain string") + .script("{ x = $1 \"\"; print(x < 10) }") + .stdin("9\n") + .expectLines("0") + .runAndAssert(); + } + + @Test + public void testStringLiteralsArePlainStrings() throws Exception { + awkTest("string literals force string comparison") + .script("BEGIN { print(\"9\" < 10); print(9 < \"10\") }") + .expectLines("0", "0") + .runAndAssert(); + } + + @Test + public void testNumericOperationProducesNumber() throws Exception { + awkTest("numeric operation produces numeric value") + .script("{ x = $1 + 0; print(x < 10) }") + .stdin("9\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testUninitializedEqualsNumericZeroStrNum() throws Exception { + awkTest("uninitialized equals numeric zero strnum") + .script("{ print($1 == undefined) }") + .stdin("0.000\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testFieldAssignmentPreservesAssignedAttribute() throws Exception { + awkTest("field assignment preserves assigned attribute") + .script("{ $1 = $2; print($1 < 10); $1 = \"3.00\"; print($1 < 10); $1 = 3.00; print($1 < 10) }") + .stdin("2.00 3.00\n") + .expectLines("1", "0", "1") + .runAndAssert(); + } + + @Test + public void testAssigningDollarZeroCreatesNumericStringFields() throws Exception { + awkTest("assigning dollar zero creates numeric string fields") + .script("{ $0 = \"2.00 3.00\"; print($1 < 10) }") + .stdin("ignored\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testAssignedDollarZeroRemainsPlainString() throws Exception { + awkTest("assigned dollar zero remains plain string") + .script("{ $0 = \"2.00 3.00\"; print($0 < 10); print($1 < 10) }") + .stdin("ignored\n") + .expectLines("0", "1") + .runAndAssert(); + } + + @Test + public void testAssignedDollarZeroPreservesAssignedAttribute() throws Exception { + awkTest("assigned dollar zero preserves assigned attribute") + .script("{ $0 = $1; print($0 < 10); $0 = 3.00; print($0 < 10); $0 = \"3.00\"; print($0 < 10) }") + .stdin("9\n") + .expectLines("1", "1", "0") + .runAndAssert(); + } + + @Test + public void testArgvValuesAreInputDerived() throws Exception { + awkTest("ARGV values are input-derived") + .script("BEGIN { $0 = ARGV[1]; print($0 < 10); print($1 < 10); exit }") + .operand("9") + .expectLines("1", "1") + .runAndAssert(); + } + + @Test + public void testSplitCreatesNumericStringElements() throws Exception { + awkTest("split array elements are numeric strings") + .script("BEGIN { split(\"9 9a\", a); print(a[1] < 10); print(a[2] < 10) }") + .expectLines("1", "0") + .runAndAssert(); + } + + @Test + public void testCommandLineVariableAssignmentsAreInputDerived() throws Exception { + cliTest("CLI variable assignments are numeric strings") + .preassign("x", "9") + .script("BEGIN { print(x < 10) }") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testFilelistVariableAssignmentsAreInputDerived() throws Exception { + awkTest("filelist variable assignments are numeric strings") + .script("{ print(x < 10); exit }") + .operand("x=9") + .stdin("ignored\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testProgrammaticStringPreassignmentIsInputDerived() throws Exception { + awkTest("programmatic string preassignments are numeric strings") + .preassign("x", "9") + .script("BEGIN { print(x < 10) }") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testStrNumComparisonUsesRuntimeLocale() throws Exception { + AwkSettings settings = new AwkSettings(); + settings.setLocale(Locale.FRANCE); + + awkTest("strnum comparison uses runtime locale") + .withAwk(new Awk(settings)) + .script("{ print($1 < 10) }") + .stdin("3,14\n") + .expectLines("1") + .runAndAssert(); + } + + @Test + public void testNumericStrNumTruthinessUsesNumericValue() throws Exception { + awkTest("input-derived numeric string truthiness uses numeric value") + .script("{ print($1 ? \"true\" : \"false\") }") + .stdin("0\n2\n2a\n") + .expectLines("false", "true", "true") + .runAndAssert(); + } + + @Test + public void testStdinExtensionInputUsesStrNumAttribute() throws Exception { + StdinExtension stdin = new StdinExtension(new ByteArrayInputStream("9\n0\n".getBytes(StandardCharsets.UTF_8))); + + awkTest("stdin extension records are input-derived") + .withExtensions(stdin) + .script("BEGIN { StdinGetline(); print($0 < 10); StdinGetline(); print($0 ? \"true\" : \"false\") }") + .expectLines("1", "false") + .runAndAssert(); + } +} diff --git a/src/test/java/io/jawk/jrt/JRTComparisonNumberTest.java b/src/test/java/io/jawk/jrt/JRTComparisonNumberTest.java index b3e3c6c2..ba17ed4b 100644 --- a/src/test/java/io/jawk/jrt/JRTComparisonNumberTest.java +++ b/src/test/java/io/jawk/jrt/JRTComparisonNumberTest.java @@ -23,47 +23,84 @@ */ import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import io.jawk.intermediate.UninitializedObject; import org.junit.Test; public class JRTComparisonNumberTest { @Test - public void testIsComparisonNumberAcceptsDecimalForms() { - assertTrue(JRT.isComparisonNumber("0")); - assertTrue(JRT.isComparisonNumber("123")); - assertTrue(JRT.isComparisonNumber("+123")); - assertTrue(JRT.isComparisonNumber("-123")); - assertTrue(JRT.isComparisonNumber("123.45")); - assertTrue(JRT.isComparisonNumber("+.5")); - assertTrue(JRT.isComparisonNumber("5.")); - assertTrue(JRT.isComparisonNumber("1e2")); - assertTrue(JRT.isComparisonNumber("1E2")); - assertTrue(JRT.isComparisonNumber("-1E+2")); - assertTrue(JRT.isComparisonNumber("+1e-2")); + public void testIsParseableNumberAcceptsDecimalForms() { + assertTrue(JRT.isParseableNumber("0", '.')); + assertTrue(JRT.isParseableNumber("123", '.')); + assertTrue(JRT.isParseableNumber("+123", '.')); + assertTrue(JRT.isParseableNumber("-123", '.')); + assertTrue(JRT.isParseableNumber("123.45", '.')); + assertTrue(JRT.isParseableNumber("+.5", '.')); + assertTrue(JRT.isParseableNumber("5.", '.')); + assertTrue(JRT.isParseableNumber("1e2", '.')); + assertTrue(JRT.isParseableNumber("1E2", '.')); + assertTrue(JRT.isParseableNumber("-1E+2", '.')); + assertTrue(JRT.isParseableNumber("+1e-2", '.')); } @Test - public void testIsComparisonNumberRejectsInvalidDecimalForms() { - assertFalse(JRT.isComparisonNumber("")); - assertFalse(JRT.isComparisonNumber("+")); - assertFalse(JRT.isComparisonNumber("-")); - assertFalse(JRT.isComparisonNumber(".")); - assertFalse(JRT.isComparisonNumber("e1")); - assertFalse(JRT.isComparisonNumber("1e")); - assertFalse(JRT.isComparisonNumber("1e+")); - assertFalse(JRT.isComparisonNumber("1e-")); - assertFalse(JRT.isComparisonNumber("1.2.3")); - assertFalse(JRT.isComparisonNumber("123abc")); - assertFalse(JRT.isComparisonNumber("abc123")); + public void testIsParseableNumberRejectsInvalidDecimalForms() { + assertFalse(JRT.isParseableNumber("", '.')); + assertFalse(JRT.isParseableNumber("+", '.')); + assertFalse(JRT.isParseableNumber("-", '.')); + assertFalse(JRT.isParseableNumber(".", '.')); + assertFalse(JRT.isParseableNumber("e1", '.')); + assertFalse(JRT.isParseableNumber("1e", '.')); + assertFalse(JRT.isParseableNumber("1e+", '.')); + assertFalse(JRT.isParseableNumber("1e-", '.')); + assertFalse(JRT.isParseableNumber("1.2.3", '.')); + assertFalse(JRT.isParseableNumber("123abc", '.')); + assertFalse(JRT.isParseableNumber("abc123", '.')); } @Test - public void testIsComparisonNumberRejectsHexadecimal() { - assertFalse(JRT.isComparisonNumber("0x0")); - assertFalse(JRT.isComparisonNumber("0x10")); - assertFalse(JRT.isComparisonNumber("-0x10")); - assertFalse(JRT.isComparisonNumber("+0XFF")); + public void testIsParseableNumberRejectsHexadecimal() { + assertFalse(JRT.isParseableNumber("0x0", '.')); + assertFalse(JRT.isParseableNumber("0x10", '.')); + assertFalse(JRT.isParseableNumber("-0x10", '.')); + assertFalse(JRT.isParseableNumber("+0XFF", '.')); + } + + @Test + public void testIsParseableNumberUsesLocaleDecimalSeparator() { + assertTrue(JRT.isParseableNumber("3,14", ',')); + assertFalse(JRT.isParseableNumber("3.14", ',')); + } + + @Test + public void testStrNumComparesNumericallyAgainstNumber() { + assertTrue(JRT.compare2(new StrNum("9"), 10L, -1)); + assertTrue(JRT.compare2(10L, new StrNum("9"), 1)); + assertTrue(JRT.compare2(new StrNum("3.0"), 3L, 0)); + } + + @Test + public void testPlainStringForcesStringComparison() { + assertFalse(JRT.compare2("9", 10L, -1)); + assertFalse(JRT.compare2(9L, "10", -1)); + assertFalse(JRT.compare2(new StrNum("9"), "10", -1)); + } + + @Test + public void testNonNumericStrNumFallsBackToStringComparison() { + Object value = new StrNum("2x"); + assertTrue(value instanceof StrNum); + assertFalse(JRT.compare2(value, 10L, -1)); + assertEquals(2.0D, JRT.toDouble(value), 0.0D); + } + + @Test + public void testUninitializedEqualsNumericZeroStrNum() { + assertTrue(JRT.compare2(new UninitializedObject(), new StrNum("0.000"), 0)); + assertTrue(JRT.compare2(new StrNum("0.000"), new UninitializedObject(), 0)); + assertFalse(JRT.compare2(new UninitializedObject(), new StrNum("0.000"), -1)); } }