From 1b818c7cdc88c41f07a49909ebe019b8d88d7ced Mon Sep 17 00:00:00 2001 From: bqxbqx Date: Mon, 17 Mar 2025 23:26:50 +0800 Subject: [PATCH 1/3] perf: optimize tokenizer and parser performance --- bench/expr.bench.ts | 41 +-- src/interpreter.ts | 35 +-- src/parser.ts | 195 +++++++------ src/tokenizer.ts | 455 +++++++++++++++++++---------- tests/coverage-improvement.test.ts | 35 ++- tests/parser.test.ts | 180 +++++------- tests/tokenizer.test.ts | 164 ++++++----- 7 files changed, 613 insertions(+), 492 deletions(-) diff --git a/bench/expr.bench.ts b/bench/expr.bench.ts index 0f5552f..2ec89df 100644 --- a/bench/expr.bench.ts +++ b/bench/expr.bench.ts @@ -1,5 +1,5 @@ import { bench, describe } from "vitest"; -import { compile, evaluate, register } from "../src"; +import { compile, evaluate, register } from "../dist/index.esm.js"; const context = { user: { @@ -27,7 +27,6 @@ const context = { }, }; -// 测试表达式 const simpleExpression = "user.age + 5"; const mediumExpression = 'user.scores[2] > 80 ? "Good" : "Needs improvement"'; const complexExpression = @@ -38,12 +37,11 @@ const complexExpression2 = const simpleExpressionCompiler = compile(simpleExpression); const mediumExpressionCompiler = compile(mediumExpression); -const complexExpression2Compiler = compile(complexExpression); +const complexExpressionCompiler = compile(complexExpression); register("calculateTotal", context.calculateTotal); register("applyDiscount", context.applyDiscount); -// 创建 Function 对象 const newFunctionSimple = new Function( "context", `with(context) { return ${simpleExpression}; }`, @@ -58,7 +56,7 @@ const newFunctionComplex = new Function( ); describe("Simple Expression Benchmarks", () => { - bench("evaluate after compile (baseline)", () => { + bench("evaluate after compile (baseline) only interpreter", () => { simpleExpressionCompiler(context); }); @@ -66,13 +64,16 @@ describe("Simple Expression Benchmarks", () => { newFunctionSimple(context); }); - bench("evaluate without compile (vs evaluate)", () => { - evaluate(simpleExpression, context); - }); + bench( + "evaluate without compile (vs evaluate) tokenize + parse + interpreter", + () => { + evaluate(simpleExpression, context); + }, + ); }); describe("Medium Expression Benchmarks", () => { - bench("evaluate after compile (baseline)", () => { + bench("evaluate after compile (baseline) only interpreter", () => { mediumExpressionCompiler(context); }); @@ -80,21 +81,27 @@ describe("Medium Expression Benchmarks", () => { newFunctionMedium(context); }); - bench("evaluate without compile (vs evaluate)", () => { - evaluate(mediumExpression, context); - }); + bench( + "evaluate without compile (vs evaluate) tokenize + parse + interpreter", + () => { + evaluate(mediumExpression, context); + }, + ); }); describe("Complex Expression Benchmarks", () => { - bench("evaluate after compile (baseline)", () => { - complexExpression2Compiler(context); + bench("evaluate after compile (baseline) only interpreter", () => { + complexExpressionCompiler(context); }); bench("new Function (vs evaluate)", () => { newFunctionComplex(context); }); - bench("evaluate without compile (vs evaluate)", () => { - evaluate(complexExpression2, context); - }); + bench( + "evaluate without compile (vs evaluate) tokenize + parse + interpreter", + () => { + evaluate(complexExpression2, context); + }, + ); }); diff --git a/src/interpreter.ts b/src/interpreter.ts index 03e2062..ab26f50 100644 --- a/src/interpreter.ts +++ b/src/interpreter.ts @@ -1,13 +1,14 @@ -import type { - BinaryExpression, - CallExpression, - ConditionalExpression, - Expression, - Identifier, - Literal, - MemberExpression, - Program, - UnaryExpression, +import { + type BinaryExpression, + type CallExpression, + type ConditionalExpression, + type Expression, + type Identifier, + type Literal, + type MemberExpression, + NodeType, + type Program, + type UnaryExpression, } from "./parser"; import { ExpressionError } from "./utils"; @@ -225,19 +226,19 @@ export const evaluateAst = ( const evaluateNode = (node: Expression): unknown => { try { switch (node.type) { - case "Literal": + case NodeType.Literal: return evaluateLiteral(node); - case "Identifier": + case NodeType.Identifier: return evaluateIdentifier(node); - case "MemberExpression": + case NodeType.MemberExpression: return evaluateMemberExpression(node); - case "CallExpression": + case NodeType.CallExpression: return evaluateCallExpression(node); - case "BinaryExpression": + case NodeType.BinaryExpression: return evaluateBinaryExpression(node); - case "UnaryExpression": + case NodeType.UnaryExpression: return evaluateUnaryExpression(node); - case "ConditionalExpression": + case NodeType.ConditionalExpression: return evaluateConditionalExpression(node); default: throw new ExpressionError( diff --git a/src/parser.ts b/src/parser.ts index ef34589..9e7a821 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,4 +1,4 @@ -import type { Token } from "./tokenizer"; +import { type Token, TokenType } from "./tokenizer"; import { ExpressionError } from "./utils"; /** @@ -12,15 +12,16 @@ import { ExpressionError } from "./utils"; * - UnaryExpression: Operations with one operand * - ConditionalExpression: Ternary operator expressions */ -export type NodeType = - | "Program" - | "Literal" - | "Identifier" - | "MemberExpression" - | "CallExpression" - | "BinaryExpression" - | "UnaryExpression" - | "ConditionalExpression"; +export enum NodeType { + Program = 0, + Literal = 1, + Identifier = 2, + MemberExpression = 3, + CallExpression = 4, + BinaryExpression = 5, + UnaryExpression = 6, + ConditionalExpression = 7, +} /** * Base interface for all AST nodes @@ -35,7 +36,7 @@ export interface Node { * Contains a single expression as its body */ export interface Program extends Node { - type: "Program"; + type: NodeType.Program; body: Expression; } @@ -57,9 +58,8 @@ export type Expression = * Examples: 42, "hello", true, null */ export interface Literal extends Node { - type: "Literal"; + type: NodeType.Literal; value: string | number | boolean | null; // The actual value - raw: string; // Original string representation in the source } /** @@ -67,7 +67,7 @@ export interface Literal extends Node { * Examples: variable names, property names */ export interface Identifier extends Node { - type: "Identifier"; + type: NodeType.Identifier; name: string; } @@ -78,7 +78,7 @@ export interface Identifier extends Node { * - obj["prop"] (computed: true) */ export interface MemberExpression extends Node { - type: "MemberExpression"; + type: NodeType.MemberExpression; object: Expression; // The object being accessed property: Expression; // The property being accessed computed: boolean; // true for obj["prop"], false for obj.prop @@ -89,7 +89,7 @@ export interface MemberExpression extends Node { * Example: @sum(a, b) */ export interface CallExpression extends Node { - type: "CallExpression"; + type: NodeType.CallExpression; callee: Identifier; // Function name arguments: Expression[]; // Array of argument expressions } @@ -99,7 +99,7 @@ export interface CallExpression extends Node { * Examples: a + b, x * y, foo === bar */ export interface BinaryExpression extends Node { - type: "BinaryExpression"; + type: NodeType.BinaryExpression; operator: string; // The operator (+, -, *, /, etc.) left: Expression; // Left-hand operand right: Expression; // Right-hand operand @@ -110,7 +110,7 @@ export interface BinaryExpression extends Node { * Example: !valid */ export interface UnaryExpression extends Node { - type: "UnaryExpression"; + type: NodeType.UnaryExpression; operator: string; // The operator (!, -, etc.) argument: Expression; // The operand prefix: boolean; // true for prefix operators, false for postfix @@ -121,12 +121,46 @@ export interface UnaryExpression extends Node { * Example: condition ? trueValue : falseValue */ export interface ConditionalExpression extends Node { - type: "ConditionalExpression"; + type: NodeType.ConditionalExpression; test: Expression; // The condition consequent: Expression; // Value if condition is true alternate: Expression; // Value if condition is false } +// Operator precedence lookup table for O(1) access +const OPERATOR_PRECEDENCE = new Map([ + ["||", 2], + ["&&", 3], + ["===", 4], + ["!==", 4], + [">", 5], + [">=", 5], + ["<", 5], + ["<=", 5], + ["+", 6], + ["-", 6], + ["*", 7], + ["/", 7], + ["%", 7], + ["!", 8], +]); + +// Pre-create common AST nodes for reuse +const NULL_LITERAL: Literal = { + type: NodeType.Literal, + value: null, +}; + +const TRUE_LITERAL: Literal = { + type: NodeType.Literal, + value: true, +}; + +const FALSE_LITERAL: Literal = { + type: NodeType.Literal, + value: false, +}; + /** * Parse tokens into an AST * Time: O(n) - single pass through tokens @@ -137,13 +171,14 @@ export interface ConditionalExpression extends Node { export const parse = (tokens: Token[]): Program => { // Use closure to encapsulate the parser state let current = 0; + const length = tokens.length; /** * Returns the current token without consuming it * @returns The current token or null if at end of input */ const peek = (): Token | null => { - if (current >= tokens.length) return null; + if (current >= length) return null; return tokens[current]; }; @@ -152,9 +187,7 @@ export const parse = (tokens: Token[]): Program => { * @returns The consumed token */ const consume = (): Token => { - const token = tokens[current]; - current++; - return token; + return tokens[current++]; }; /** @@ -162,7 +195,7 @@ export const parse = (tokens: Token[]): Program => { * @param type - The token type to match * @returns boolean indicating if current token matches */ - const match = (type: Token["type"]): boolean => { + const match = (type: TokenType): boolean => { const token = peek(); return token !== null && token.type === type; }; @@ -173,37 +206,15 @@ export const parse = (tokens: Token[]): Program => { * @returns Precedence level (-1 to 9) or -1 if not an operator */ const getOperatorPrecedence = (token: Token): number => { - if (token.type === "OPERATOR") { - switch (token.value) { - case "||": - return 2; - case "&&": - return 3; - case "===": - case "!==": - return 4; - case ">": - case ">=": - case "<": - case "<=": - return 5; - case "+": - case "-": - return 6; - case "*": - case "/": - case "%": - return 7; - case "!": - return 8; - } + if (token.type === TokenType.OPERATOR) { + return OPERATOR_PRECEDENCE.get(token.value) || -1; } - if (token.type === "DOT" || token.type === "BRACKET_LEFT") { + if (token.type === TokenType.DOT || token.type === TokenType.BRACKET_LEFT) { return 9; // Highest precedence for member access } - if (token.type === "QUESTION") { + if (token.type === TokenType.QUESTION) { return 1; // Make it higher than -1 but lower than other operators } @@ -220,8 +231,8 @@ export const parse = (tokens: Token[]): Program => { let property: Expression; let computed: boolean; - if (token.type === "DOT") { - if (!match("IDENTIFIER")) { + if (token.type === TokenType.DOT) { + if (!match(TokenType.IDENTIFIER)) { const token = peek(); throw new ExpressionError( "Expected property name", @@ -231,7 +242,7 @@ export const parse = (tokens: Token[]): Program => { } const identifierToken = consume(); property = { - type: "Identifier", + type: NodeType.Identifier, name: identifierToken.value, }; computed = false; @@ -239,7 +250,7 @@ export const parse = (tokens: Token[]): Program => { // BRACKET_LEFT property = parseExpression(0); - if (!match("BRACKET_RIGHT")) { + if (!match(TokenType.BRACKET_RIGHT)) { const token = peek(); throw new ExpressionError( "Expected closing bracket", @@ -252,7 +263,7 @@ export const parse = (tokens: Token[]): Program => { } return { - type: "MemberExpression", + type: NodeType.MemberExpression, object, property, computed, @@ -267,7 +278,7 @@ export const parse = (tokens: Token[]): Program => { const token = consume(); // consume FUNCTION token const args: Expression[] = []; - if (!match("PAREN_LEFT")) { + if (!match(TokenType.PAREN_LEFT)) { const token = peek(); throw new ExpressionError( "Expected opening parenthesis after function name", @@ -280,7 +291,7 @@ export const parse = (tokens: Token[]): Program => { // Parse arguments while (true) { // First check for right parenthesis - if (match("PAREN_RIGHT")) { + if (match(TokenType.PAREN_RIGHT)) { consume(); // consume ) break; } @@ -297,7 +308,7 @@ export const parse = (tokens: Token[]): Program => { // If we have arguments already, we need a comma if (args.length > 0) { - if (!match("COMMA")) { + if (!match(TokenType.COMMA)) { const token = peek(); throw new ExpressionError( "Expected comma between function arguments", @@ -313,9 +324,9 @@ export const parse = (tokens: Token[]): Program => { } return { - type: "CallExpression", + type: NodeType.CallExpression, callee: { - type: "Identifier", + type: NodeType.Identifier, name: token.value, }, arguments: args, @@ -337,13 +348,13 @@ export const parse = (tokens: Token[]): Program => { // Handle unary operators if ( - token.type === "OPERATOR" && + token.type === TokenType.OPERATOR && (token.value === "!" || token.value === "-") ) { consume(); // consume operator const argument = parsePrimary(); return { - type: "UnaryExpression", + type: NodeType.UnaryExpression, operator: token.value, argument, prefix: true, @@ -351,57 +362,47 @@ export const parse = (tokens: Token[]): Program => { } switch (token.type) { - case "NUMBER": { + case TokenType.NUMBER: { consume(); // consume number return { - type: "Literal", + type: NodeType.Literal, value: Number(token.value), - raw: token.value, }; } - case "STRING": { + case TokenType.STRING: { consume(); // consume string return { - type: "Literal", + type: NodeType.Literal, value: token.value, - raw: `"${token.value}"`, }; } - case "BOOLEAN": { + case TokenType.BOOLEAN: { consume(); // consume boolean - return { - type: "Literal", - value: token.value === "true", - raw: token.value, - }; + return token.value === "true" ? TRUE_LITERAL : FALSE_LITERAL; } - case "NULL": { + case TokenType.NULL: { consume(); // consume null - return { - type: "Literal", - value: null, - raw: "null", - }; + return NULL_LITERAL; } - case "IDENTIFIER": { + case TokenType.IDENTIFIER: { consume(); // consume identifier return { - type: "Identifier", + type: NodeType.Identifier, name: token.value, }; } - case "FUNCTION": + case TokenType.FUNCTION: return parseCallExpression(); - case "PAREN_LEFT": { + case TokenType.PAREN_LEFT: { consume(); // consume ( const expr = parseExpression(0); - if (!match("PAREN_RIGHT")) { + if (!match(TokenType.PAREN_RIGHT)) { const token = peek(); throw new ExpressionError( "Expected closing parenthesis", @@ -430,19 +431,16 @@ export const parse = (tokens: Token[]): Program => { const parseExpression = (precedence = 0): Expression => { let left = parsePrimary(); - while (true) { - const token = peek(); - - if (!token) break; - + while (current < length) { + const token = tokens[current]; // Inline peek() for performance const nextPrecedence = getOperatorPrecedence(token); if (nextPrecedence <= precedence) break; - if (token.type === "QUESTION") { + if (token.type === TokenType.QUESTION) { consume(); // consume ? const consequent = parseExpression(0); - if (!match("COLON")) { + if (!match(TokenType.COLON)) { const token = peek(); throw new ExpressionError( "Expected : in conditional expression", @@ -453,7 +451,7 @@ export const parse = (tokens: Token[]): Program => { consume(); // consume : const alternate = parseExpression(0); left = { - type: "ConditionalExpression", + type: NodeType.ConditionalExpression, test: left, consequent, alternate, @@ -461,11 +459,11 @@ export const parse = (tokens: Token[]): Program => { continue; } - if (token.type === "OPERATOR") { + if (token.type === TokenType.OPERATOR) { consume(); // consume operator const right = parseExpression(nextPrecedence); left = { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: token.value, left, right, @@ -473,7 +471,10 @@ export const parse = (tokens: Token[]): Program => { continue; } - if (token.type === "DOT" || token.type === "BRACKET_LEFT") { + if ( + token.type === TokenType.DOT || + token.type === TokenType.BRACKET_LEFT + ) { left = parseMemberExpression(left); continue; } @@ -487,7 +488,7 @@ export const parse = (tokens: Token[]): Program => { // Start parsing from the initial state const expression = parseExpression(); return { - type: "Program", + type: NodeType.Program, body: expression, }; }; diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4a42508..52f27a2 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,29 +1,125 @@ import { ExpressionError } from "./utils"; -/** - * TokenType represents all possible token types in our expression language - * - Basic types: STRING, NUMBER, BOOLEAN, NULL - * - Identifiers and Functions: IDENTIFIER, FUNCTION - * - Operators: OPERATOR - * - Structural tokens: DOT, BRACKET_LEFT/RIGHT, PAREN_LEFT/RIGHT, COMMA, QUESTION, COLON - */ -export type TokenType = - | "STRING" - | "NUMBER" - | "BOOLEAN" - | "NULL" - | "IDENTIFIER" - | "OPERATOR" - | "FUNCTION" - | "DOT" - | "BRACKET_LEFT" - | "BRACKET_RIGHT" - | "PAREN_LEFT" - | "PAREN_RIGHT" - | "COMMA" - | "QUESTION" - | "COLON" - | "DOLLAR"; +// token type enum +export enum TokenType { + STRING = 0, + NUMBER = 1, + BOOLEAN = 2, + NULL = 3, + IDENTIFIER = 4, + OPERATOR = 5, + FUNCTION = 6, + DOT = 7, + BRACKET_LEFT = 8, + BRACKET_RIGHT = 9, + PAREN_LEFT = 10, + PAREN_RIGHT = 11, + COMMA = 12, + QUESTION = 13, + COLON = 14, + DOLLAR = 15, +} + +// Character code constants for faster comparison +const CHAR_0 = 48; // '0' +const CHAR_9 = 57; // '9' +const CHAR_A = 65; // 'A' +const CHAR_Z = 90; // 'Z' +const CHAR_a = 97; // 'a' +const CHAR_z = 122; // 'z' +const CHAR_UNDERSCORE = 95; // '_' +const CHAR_DOT = 46; // '.' +const CHAR_MINUS = 45; // '-' +const CHAR_PLUS = 43; // '+' +const CHAR_MULTIPLY = 42; // '*' +const CHAR_DIVIDE = 47; // '/' +const CHAR_MODULO = 37; // '%' +const CHAR_EXCLAMATION = 33; // '!' +const CHAR_AMPERSAND = 38; // '&' +const CHAR_PIPE = 124; // '|' +const CHAR_EQUAL = 61; // '=' +const CHAR_LESS_THAN = 60; // '<' +const CHAR_GREATER_THAN = 62; // '>' +const CHAR_QUESTION = 63; // '?' +const CHAR_COLON = 58; // ':' +const CHAR_COMMA = 44; // ',' +const CHAR_BRACKET_LEFT = 91; // '[' +const CHAR_BRACKET_RIGHT = 93; // ']' +const CHAR_PAREN_LEFT = 40; // '(' +const CHAR_PAREN_RIGHT = 41; // ')' +const CHAR_DOLLAR = 36; // '$' +const CHAR_AT = 64; // '@' +const CHAR_DOUBLE_QUOTE = 34; // '"' +const CHAR_SINGLE_QUOTE = 39; // '\'' +const CHAR_BACKSLASH = 92; // '\\' +const CHAR_SPACE = 32; // ' ' +const CHAR_TAB = 9; // '\t' +const CHAR_NEWLINE = 10; // '\n' +const CHAR_CARRIAGE_RETURN = 13; // '\r' + +// Use a Set for faster lookups +const WHITESPACE_CHARS = new Set([ + CHAR_SPACE, + CHAR_TAB, + CHAR_NEWLINE, + CHAR_CARRIAGE_RETURN, +]); +const OPERATOR_START_CHARS = new Set([ + CHAR_PLUS, + CHAR_MINUS, + CHAR_MULTIPLY, + CHAR_DIVIDE, + CHAR_MODULO, + CHAR_EXCLAMATION, + CHAR_AMPERSAND, + CHAR_PIPE, + CHAR_EQUAL, + CHAR_LESS_THAN, + CHAR_GREATER_THAN, +]); + +// Token type lookup maps for common tokens +const KEYWORDS = new Map([ + ["true", TokenType.BOOLEAN], + ["false", TokenType.BOOLEAN], + ["null", TokenType.NULL], +]); + +// Operator to token type mapping (sorted by length for optimization) +const OPERATOR_TOKENS = new Map([ + // 3-character operators + ["===", true], + ["!==", true], + + // 2-character operators + ["<=", true], + [">=", true], + ["&&", true], + ["||", true], + + // 1-character operators + ["+", true], + ["-", true], + ["*", true], + ["/", true], + ["%", true], + ["!", true], + ["<", true], + [">", true], +]); + +// Single character token map for O(1) lookup +const SINGLE_CHAR_TOKENS = new Map([ + [CHAR_DOT, TokenType.DOT], + [CHAR_BRACKET_LEFT, TokenType.BRACKET_LEFT], + [CHAR_BRACKET_RIGHT, TokenType.BRACKET_RIGHT], + [CHAR_PAREN_LEFT, TokenType.PAREN_LEFT], + [CHAR_PAREN_RIGHT, TokenType.PAREN_RIGHT], + [CHAR_COMMA, TokenType.COMMA], + [CHAR_QUESTION, TokenType.QUESTION], + [CHAR_COLON, TokenType.COLON], + [CHAR_DOLLAR, TokenType.DOLLAR], +]); /** * Token represents a single unit in the expression @@ -35,14 +131,50 @@ export interface Token { value: string; } +// Pre-allocate token objects for single character tokens to reduce object creation +const CHAR_TOKEN_CACHE = new Map(); +for (const [code, type] of SINGLE_CHAR_TOKENS.entries()) { + CHAR_TOKEN_CACHE.set(code, { type, value: String.fromCharCode(code) }); +} + /** - * Checks if a character can start an operator - * @param char - Character to check - * @returns boolean indicating if char can start an operator + * Check if a character code is a digit (0-9) */ -const isOperatorStart = (char: string): boolean => { - return /[+\-*/%!&|=<>]/.test(char); -}; +function isDigit(code: number): boolean { + return code >= CHAR_0 && code <= CHAR_9; +} + +/** + * Check if a character code is a letter (a-z, A-Z) or underscore + */ +function isAlpha(code: number): boolean { + return ( + (code >= CHAR_a && code <= CHAR_z) || + (code >= CHAR_A && code <= CHAR_Z) || + code === CHAR_UNDERSCORE + ); +} + +/** + * Check if a character code is alphanumeric (a-z, A-Z, 0-9) or underscore + */ +function isAlphaNumeric(code: number): boolean { + return isAlpha(code) || isDigit(code); +} + +/** + * Check if a character code is whitespace + */ +function isWhitespace(code: number): boolean { + return WHITESPACE_CHARS.has(code); +} + +/** + * Check if a character code can start an operator + */ +function isOperatorStart(code: number): boolean { + return OPERATOR_START_CHARS.has(code); +} /** * Converts an input expression string into an array of tokens @@ -56,213 +188,230 @@ const isOperatorStart = (char: string): boolean => { * @throws Error for unexpected or invalid characters */ export const tokenize = (expr: string): Token[] => { - // Use closure to encapsulate the tokenizer state - let pos = 0; const input = expr; - const tokens: Token[] = []; + const length = input.length; + // Pre-allocate tokens array with estimated capacity to avoid resizing + const tokens: Token[] = new Array(Math.ceil(length / 3)); + let tokenCount = 0; + let pos = 0; /** * Reads a string literal token, handling escape sequences * @returns String token * @throws Error for unterminated strings */ - const readString = (): Token => { - const quote = input[pos]; + + function readString(quoteChar: number): Token { + const start = pos + 1; // Skip opening quote + pos++; let value = ""; - pos++; // Skip opening quote + let hasEscape = false; - while (pos < input.length) { - const char = input[pos]; - if (char === quote) { + while (pos < length) { + const char = input.charCodeAt(pos); + if (char === quoteChar) { + // If no escape sequences, use substring directly + if (!hasEscape) { + value = input.substring(start, pos); + } pos++; // Skip closing quote - return { type: "STRING", value }; + return { type: TokenType.STRING, value }; } - if (char === "\\") { + if (char === CHAR_BACKSLASH) { + // Handle escape sequence + if (!hasEscape) { + // First escape encountered, copy characters so far + value = input.substring(start, pos); + hasEscape = true; + } pos++; value += input[pos]; - } else { - value += char; + } else if (hasEscape) { + // Only append if we're building the escaped string + value += input[pos]; } pos++; } throw new ExpressionError( - "Unterminated string", + `Unterminated string starting with ${String.fromCharCode(quoteChar)}`, pos, input.substring(Math.max(0, pos - 10), pos), ); - }; + } /** * Reads a numeric token, handling integers, decimals, and negative numbers * @returns Number token */ - const readNumber = (): Token => { + function readNumber(): Token { const start = pos; - while ( - pos < input.length && - (/[0-9]/.test(input[pos]) || input[pos] === "." || input[pos] === "-") - ) { + // Handle negative sign if present + if (input.charCodeAt(pos) === CHAR_MINUS) { + pos++; + } + + // Read digits before decimal point + while (pos < length && isDigit(input.charCodeAt(pos))) { + pos++; + } + + // Handle decimal point and digits after it + if (pos < length && input.charCodeAt(pos) === CHAR_DOT) { pos++; + while (pos < length && isDigit(input.charCodeAt(pos))) { + pos++; + } } + const value = input.slice(start, pos); - return { type: "NUMBER", value }; - }; + return { type: TokenType.NUMBER, value }; + } /** * Reads a function name token after @ symbol * @returns Function token */ - const readFunction = (): Token => { + function readFunction(): Token { pos++; // Skip @ symbol const start = pos; - while (pos < input.length && /[a-zA-Z_]/.test(input[pos])) { + + // First character must be a letter or underscore + if (pos < length && isAlpha(input.charCodeAt(pos))) { pos++; + + // Subsequent characters can be alphanumeric + while (pos < length && isAlphaNumeric(input.charCodeAt(pos))) { + pos++; + } } + const value = input.slice(start, pos); - return { type: "FUNCTION", value }; - }; + return { type: TokenType.FUNCTION, value }; + } /** * Reads an identifier token, also handling boolean and null literals * @returns Identifier, boolean, or null token */ - const readIdentifier = (): Token => { - const start = pos; - while (pos < input.length && /[a-zA-Z0-9_]/.test(input[pos])) { + function readIdentifier(): Token { + const start = pos++; // First character already checked + + // Read remaining characters + while (pos < length && isAlphaNumeric(input.charCodeAt(pos))) { pos++; } + const value = input.slice(start, pos); - // Handle special keywords - if (value === "true" || value === "false") { - return { type: "BOOLEAN", value }; - } - if (value === "null") { - return { type: "NULL", value }; + // Check if it's a keyword (true, false, null) + const keywordType = KEYWORDS.get(value); + if (keywordType) { + return { type: keywordType, value }; } - return { type: "IDENTIFIER", value }; - }; + return { type: TokenType.IDENTIFIER, value }; + } /** - * Reads an operator token, handling multi-character operators first + * Reads an operator token, checking multi-character operators first * @returns Operator token - * @throws Error for unknown operators */ - const readOperator = (): Token => { - const operators = [ - "===", // Equality - "!==", // Inequality - "&&", // Logical AND - "||", // Logical OR - ">=", // Greater than or equal - "<=", // Less than or equal - ">", // Greater than - "<", // Less than - "+", // Addition - "-", // Subtraction - "*", // Multiplication - "/", // Division - "%", // Modulo - "!", // Logical NOT - ]; - for (const op of operators) { - if (input.startsWith(op, pos)) { - pos += op.length; - return { type: "OPERATOR", value: op }; + function readOperator(): Token { + // Try to match 3-character operators + if (pos + 2 < length) { + const op3 = input.substring(pos, pos + 3); + if (OPERATOR_TOKENS.has(op3)) { + pos += 3; + return { type: TokenType.OPERATOR, value: op3 }; } } + + // Try to match 2-character operators + if (pos + 1 < length) { + const op2 = input.substring(pos, pos + 2); + if (OPERATOR_TOKENS.has(op2)) { + pos += 2; + return { type: TokenType.OPERATOR, value: op2 }; + } + } + + // Try to match 1-character operators + const op1 = input[pos]; + if (OPERATOR_TOKENS.has(op1)) { + pos++; + return { type: TokenType.OPERATOR, value: op1 }; + } + throw new ExpressionError( - `Unknown operator at position ${pos}, the token is ${input.substring(Math.max(0, pos - 10), pos)}`, + `Unknown operator at position ${pos}: ${input.substring(pos, pos + 1)}`, pos, input.substring(Math.max(0, pos - 10), pos), ); - }; + } - while (pos < input.length) { - const char = input[pos]; + // Main tokenization loop + while (pos < length) { + const charCode = input.charCodeAt(pos); - // Skip whitespace characters (space, tab, newline) - if (/\s/.test(char)) { + // Fast path for whitespace + if (isWhitespace(charCode)) { pos++; continue; } - // Handle string literals (both single and double quotes) - if (char === '"' || char === "'") { - const token = readString(); - tokens.push(token); + // Fast path for single-character tokens + const cachedToken = CHAR_TOKEN_CACHE.get(charCode); + if (cachedToken) { + tokens[tokenCount++] = cachedToken; + pos++; continue; } - // Handle numbers (including negative numbers and decimals) - if (/[0-9]/.test(char) || (char === "-" && /[0-9]/.test(input[pos + 1]))) { - const token = readNumber(); - tokens.push(token); + // Handle string literals + if (charCode === CHAR_DOUBLE_QUOTE || charCode === CHAR_SINGLE_QUOTE) { + tokens[tokenCount++] = readString(charCode); continue; } - // Handle predefined functions starting with @ - if (char === "@") { - const token = readFunction(); - tokens.push(token); + // Handle numbers (including negative numbers) + if ( + isDigit(charCode) || + (charCode === CHAR_MINUS && + pos + 1 < length && + isDigit(input.charCodeAt(pos + 1))) + ) { + tokens[tokenCount++] = readNumber(); continue; } - // Handle identifiers, boolean literals, and null - if (/[a-zA-Z_]/.test(char)) { - const token = readIdentifier(); - tokens.push(token); + // Handle function calls starting with @ + if (charCode === CHAR_AT) { + tokens[tokenCount++] = readFunction(); continue; } - // Handle operators (+, -, *, /, etc.) - if (isOperatorStart(char)) { - const token = readOperator(); - tokens.push(token); + // Handle identifiers (including keywords) + if (isAlpha(charCode)) { + tokens[tokenCount++] = readIdentifier(); continue; } - // Handle single-character tokens - let token: Token | null = null; - switch (char) { - case ".": - token = { type: "DOT", value: "." }; - break; - case "[": - token = { type: "BRACKET_LEFT", value: "[" }; - break; - case "]": - token = { type: "BRACKET_RIGHT", value: "]" }; - break; - case "(": - token = { type: "PAREN_LEFT", value: "(" }; - break; - case ")": - token = { type: "PAREN_RIGHT", value: ")" }; - break; - case ",": - token = { type: "COMMA", value: "," }; - break; - case "?": - token = { type: "QUESTION", value: "?" }; - break; - case ":": - token = { type: "COLON", value: ":" }; - break; - case "$": - token = { type: "DOLLAR", value: "$" }; - break; - default: - throw new ExpressionError(`Unexpected character: ${char}`, pos, char); - } - if (token) { - tokens.push(token); + // Handle operators + if (isOperatorStart(charCode)) { + tokens[tokenCount++] = readOperator(); + continue; } - pos++; + + // If we get here, we have an unexpected character + throw new ExpressionError( + `Unexpected character: ${input[pos]}`, + pos, + input.substring(Math.max(0, pos - 10), pos), + ); } - return tokens; + // Trim the tokens array to the actual number of tokens + return tokenCount === tokens.length ? tokens : tokens.slice(0, tokenCount); }; diff --git a/tests/coverage-improvement.test.ts b/tests/coverage-improvement.test.ts index 6367038..1bfacff 100644 --- a/tests/coverage-improvement.test.ts +++ b/tests/coverage-improvement.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it, vi } from "vitest"; import { ExpressionError, compile, evaluate, register } from "../src"; import { createInterpreterState, evaluateAst } from "../src/interpreter"; -import { parse } from "../src/parser"; -import { tokenize } from "../src/tokenizer"; +import { NodeType, parse } from "../src/parser"; +import { TokenType, tokenize } from "../src/tokenizer"; describe("Coverage Improvement Tests", () => { describe("Expression Error Handling", () => { @@ -50,14 +50,17 @@ describe("Coverage Improvement Tests", () => { const tokens = tokenize("-42.5"); expect(tokens).toHaveLength(1); - expect(tokens[0]).toEqual({ type: "NUMBER", value: "-42.5" }); + expect(tokens[0]).toEqual({ type: TokenType.NUMBER, value: "-42.5" }); }); it("should handle function names with underscores", () => { const tokens = tokenize("@calculate_total(a, b)"); expect(tokens).toHaveLength(6); - expect(tokens[0]).toEqual({ type: "FUNCTION", value: "calculate_total" }); + expect(tokens[0]).toEqual({ + type: TokenType.FUNCTION, + value: "calculate_total", + }); }); }); @@ -83,8 +86,8 @@ describe("Coverage Improvement Tests", () => { const tokens = tokenize("obj.prop[index].nested"); const ast = parse(tokens); - expect(ast.type).toBe("Program"); - expect(ast.body.type).toBe("MemberExpression"); + expect(ast.type).toBe(NodeType.Program); + expect(ast.body.type).toBe(NodeType.MemberExpression); }); }); @@ -92,16 +95,16 @@ describe("Coverage Improvement Tests", () => { it("should handle null values in member expressions", () => { const interpreterState = createInterpreterState(); const ast: any = { - type: "Program", + type: NodeType.Program, body: { - type: "MemberExpression", + type: NodeType.MemberExpression, object: { - type: "Literal", + type: NodeType.Literal, value: null, raw: "null", }, property: { - type: "Identifier", + type: NodeType.Identifier, name: "prop", }, computed: false, @@ -116,11 +119,11 @@ describe("Coverage Improvement Tests", () => { it("should handle undefined functions in call expressions", () => { const interpreterState = createInterpreterState(); const ast: any = { - type: "Program", + type: NodeType.Program, body: { - type: "CallExpression", + type: NodeType.CallExpression, callee: { - type: "Identifier", + type: NodeType.Identifier, name: "undefinedFunc", }, arguments: [], @@ -135,12 +138,12 @@ describe("Coverage Improvement Tests", () => { it("should handle unsupported unary operators", () => { const interpreterState = createInterpreterState(); const ast: any = { - type: "Program", + type: NodeType.Program, body: { - type: "UnaryExpression", + type: NodeType.UnaryExpression, operator: "~", // argument: { - type: "Literal", + type: NodeType.Literal, value: 5, raw: "5", }, diff --git a/tests/parser.test.ts b/tests/parser.test.ts index ab7355b..c33ed01 100644 --- a/tests/parser.test.ts +++ b/tests/parser.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { parse } from "../src/parser"; +import { NodeType, parse } from "../src/parser"; import { tokenize } from "../src/tokenizer"; describe("Parser", () => { @@ -12,11 +12,10 @@ describe("Parser", () => { it("should parse number literals", () => { const ast = parseExpression("42"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "Literal", + type: NodeType.Literal, value: 42, - raw: "42", }, }); }); @@ -24,11 +23,10 @@ describe("Parser", () => { it("should parse string literals", () => { const ast = parseExpression('"hello"'); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "Literal", + type: NodeType.Literal, value: "hello", - raw: '"hello"', }, }); }); @@ -36,11 +34,10 @@ describe("Parser", () => { it("should parse boolean literals", () => { const ast = parseExpression("true"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "Literal", + type: NodeType.Literal, value: true, - raw: "true", }, }); }); @@ -48,11 +45,10 @@ describe("Parser", () => { it("should parse null literal", () => { const ast = parseExpression("null"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "Literal", + type: NodeType.Literal, value: null, - raw: "null", }, }); }); @@ -62,15 +58,15 @@ describe("Parser", () => { it("should parse dot notation", () => { const ast = parseExpression("data.value"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "MemberExpression", + type: NodeType.MemberExpression, object: { - type: "Identifier", + type: NodeType.Identifier, name: "data", }, property: { - type: "Identifier", + type: NodeType.Identifier, name: "value", }, computed: false, @@ -81,69 +77,32 @@ describe("Parser", () => { it("should parse bracket notation", () => { const ast = parseExpression('data["value"]'); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "MemberExpression", + type: NodeType.MemberExpression, object: { - type: "Identifier", + type: NodeType.Identifier, name: "data", }, property: { - type: "Literal", + type: NodeType.Literal, value: "value", - raw: '"value"', }, computed: true, }, }); }); - - it("should parse nested member expressions", () => { - const ast = parseExpression("data.values[0].id"); - expect(ast).toEqual({ - type: "Program", - body: { - type: "MemberExpression", - object: { - type: "MemberExpression", - object: { - type: "MemberExpression", - object: { - type: "Identifier", - name: "data", - }, - property: { - type: "Identifier", - name: "values", - }, - computed: false, - }, - property: { - type: "Literal", - value: 0, - raw: "0", - }, - computed: true, - }, - property: { - type: "Identifier", - name: "id", - }, - computed: false, - }, - }); - }); }); describe("Function Calls", () => { it("should parse function calls without arguments", () => { const ast = parseExpression("@sum()"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "CallExpression", + type: NodeType.CallExpression, callee: { - type: "Identifier", + type: NodeType.Identifier, name: "sum", }, arguments: [], @@ -154,26 +113,25 @@ describe("Parser", () => { it("should parse function calls with multiple arguments", () => { const ast = parseExpression("@max(a, b, 42)"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "CallExpression", + type: NodeType.CallExpression, callee: { - type: "Identifier", + type: NodeType.Identifier, name: "max", }, arguments: [ { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, { - type: "Literal", + type: NodeType.Literal, value: 42, - raw: "42", }, ], }, @@ -185,23 +143,23 @@ describe("Parser", () => { it("should parse arithmetic expressions", () => { const ast = parseExpression("a + b * c"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "+", left: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, right: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "*", left: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "c", }, }, @@ -212,16 +170,16 @@ describe("Parser", () => { it("should parse comparison expressions", () => { const ast = parseExpression("a > b"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: ">", left: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, }, @@ -231,24 +189,24 @@ describe("Parser", () => { it("should parse logical expressions", () => { const ast = parseExpression("a && b || c"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "||", left: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "&&", left: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "c", }, }, @@ -260,12 +218,12 @@ describe("Parser", () => { it("should parse unary expressions", () => { const ast = parseExpression("!a"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "UnaryExpression", + type: NodeType.UnaryExpression, operator: "!", argument: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, prefix: true, @@ -278,19 +236,19 @@ describe("Parser", () => { it("should parse ternary expressions", () => { const ast = parseExpression("a ? b : c"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "ConditionalExpression", + type: NodeType.ConditionalExpression, test: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, consequent: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, alternate: { - type: "Identifier", + type: NodeType.Identifier, name: "c", }, }, @@ -300,29 +258,29 @@ describe("Parser", () => { it("should parse nested ternary expressions", () => { const ast = parseExpression("a ? b : c ? d : e"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "ConditionalExpression", + type: NodeType.ConditionalExpression, test: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, consequent: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, alternate: { - type: "ConditionalExpression", + type: NodeType.ConditionalExpression, test: { - type: "Identifier", + type: NodeType.Identifier, name: "c", }, consequent: { - type: "Identifier", + type: NodeType.Identifier, name: "d", }, alternate: { - type: "Identifier", + type: NodeType.Identifier, name: "e", }, }, @@ -335,43 +293,43 @@ describe("Parser", () => { it("should parse complex expressions", () => { const ast = parseExpression("a + b * c > d ? e : f"); expect(ast).toEqual({ - type: "Program", + type: NodeType.Program, body: { - type: "ConditionalExpression", + type: NodeType.ConditionalExpression, test: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: ">", left: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "+", left: { - type: "Identifier", + type: NodeType.Identifier, name: "a", }, right: { - type: "BinaryExpression", + type: NodeType.BinaryExpression, operator: "*", left: { - type: "Identifier", + type: NodeType.Identifier, name: "b", }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "c", }, }, }, right: { - type: "Identifier", + type: NodeType.Identifier, name: "d", }, }, consequent: { - type: "Identifier", + type: NodeType.Identifier, name: "e", }, alternate: { - type: "Identifier", + type: NodeType.Identifier, name: "f", }, }, diff --git a/tests/tokenizer.test.ts b/tests/tokenizer.test.ts index 33b414a..c2de774 100644 --- a/tests/tokenizer.test.ts +++ b/tests/tokenizer.test.ts @@ -1,29 +1,31 @@ import { describe, expect, it } from "vitest"; -import { type Token, tokenize } from "../src/tokenizer"; +import { type Token, TokenType, tokenize } from "../src/tokenizer"; describe("Tokenizer", () => { describe("Basic Literals", () => { it("should tokenize string literals", () => { const input = "\"hello\" 'world'"; const expected: Token[] = [ - { type: "STRING", value: "hello" }, - { type: "STRING", value: "world" }, + { type: TokenType.STRING, value: "hello" }, + { type: TokenType.STRING, value: "world" }, ]; expect(tokenize(input)).toEqual(expected); }); it("should handle escaped quotes in strings", () => { const input = '"hello \\"world\\""'; - const expected: Token[] = [{ type: "STRING", value: 'hello "world"' }]; + const expected: Token[] = [ + { type: TokenType.STRING, value: 'hello "world"' }, + ]; expect(tokenize(input)).toEqual(expected); }); it("should tokenize numbers", () => { const input = "42 -3.14 0.5"; const expected: Token[] = [ - { type: "NUMBER", value: "42" }, - { type: "NUMBER", value: "-3.14" }, - { type: "NUMBER", value: "0.5" }, + { type: TokenType.NUMBER, value: "42" }, + { type: TokenType.NUMBER, value: "-3.14" }, + { type: TokenType.NUMBER, value: "0.5" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -31,9 +33,9 @@ describe("Tokenizer", () => { it("should tokenize boolean and null", () => { const input = "true false null"; const expected: Token[] = [ - { type: "BOOLEAN", value: "true" }, - { type: "BOOLEAN", value: "false" }, - { type: "NULL", value: "null" }, + { type: TokenType.BOOLEAN, value: "true" }, + { type: TokenType.BOOLEAN, value: "false" }, + { type: TokenType.NULL, value: "null" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -43,17 +45,17 @@ describe("Tokenizer", () => { it("should tokenize arithmetic operators", () => { const input = "a + b - c * d / e % f"; const expected: Token[] = [ - { type: "IDENTIFIER", value: "a" }, - { type: "OPERATOR", value: "+" }, - { type: "IDENTIFIER", value: "b" }, - { type: "OPERATOR", value: "-" }, - { type: "IDENTIFIER", value: "c" }, - { type: "OPERATOR", value: "*" }, - { type: "IDENTIFIER", value: "d" }, - { type: "OPERATOR", value: "/" }, - { type: "IDENTIFIER", value: "e" }, - { type: "OPERATOR", value: "%" }, - { type: "IDENTIFIER", value: "f" }, + { type: TokenType.IDENTIFIER, value: "a" }, + { type: TokenType.OPERATOR, value: "+" }, + { type: TokenType.IDENTIFIER, value: "b" }, + { type: TokenType.OPERATOR, value: "-" }, + { type: TokenType.IDENTIFIER, value: "c" }, + { type: TokenType.OPERATOR, value: "*" }, + { type: TokenType.IDENTIFIER, value: "d" }, + { type: TokenType.OPERATOR, value: "/" }, + { type: TokenType.IDENTIFIER, value: "e" }, + { type: TokenType.OPERATOR, value: "%" }, + { type: TokenType.IDENTIFIER, value: "f" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -61,19 +63,19 @@ describe("Tokenizer", () => { it("should tokenize comparison operators", () => { const input = "a === b !== c > d < e >= f <= g"; const expected: Token[] = [ - { type: "IDENTIFIER", value: "a" }, - { type: "OPERATOR", value: "===" }, - { type: "IDENTIFIER", value: "b" }, - { type: "OPERATOR", value: "!==" }, - { type: "IDENTIFIER", value: "c" }, - { type: "OPERATOR", value: ">" }, - { type: "IDENTIFIER", value: "d" }, - { type: "OPERATOR", value: "<" }, - { type: "IDENTIFIER", value: "e" }, - { type: "OPERATOR", value: ">=" }, - { type: "IDENTIFIER", value: "f" }, - { type: "OPERATOR", value: "<=" }, - { type: "IDENTIFIER", value: "g" }, + { type: TokenType.IDENTIFIER, value: "a" }, + { type: TokenType.OPERATOR, value: "===" }, + { type: TokenType.IDENTIFIER, value: "b" }, + { type: TokenType.OPERATOR, value: "!==" }, + { type: TokenType.IDENTIFIER, value: "c" }, + { type: TokenType.OPERATOR, value: ">" }, + { type: TokenType.IDENTIFIER, value: "d" }, + { type: TokenType.OPERATOR, value: "<" }, + { type: TokenType.IDENTIFIER, value: "e" }, + { type: TokenType.OPERATOR, value: ">=" }, + { type: TokenType.IDENTIFIER, value: "f" }, + { type: TokenType.OPERATOR, value: "<=" }, + { type: TokenType.IDENTIFIER, value: "g" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -81,12 +83,12 @@ describe("Tokenizer", () => { it("should tokenize logical operators", () => { const input = "a && b || !c"; const expected: Token[] = [ - { type: "IDENTIFIER", value: "a" }, - { type: "OPERATOR", value: "&&" }, - { type: "IDENTIFIER", value: "b" }, - { type: "OPERATOR", value: "||" }, - { type: "OPERATOR", value: "!" }, - { type: "IDENTIFIER", value: "c" }, + { type: TokenType.IDENTIFIER, value: "a" }, + { type: TokenType.OPERATOR, value: "&&" }, + { type: TokenType.IDENTIFIER, value: "b" }, + { type: TokenType.OPERATOR, value: "||" }, + { type: TokenType.OPERATOR, value: "!" }, + { type: TokenType.IDENTIFIER, value: "c" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -96,11 +98,11 @@ describe("Tokenizer", () => { it("should tokenize dot notation", () => { const input = "data.value.nested"; const expected: Token[] = [ - { type: "IDENTIFIER", value: "data" }, - { type: "DOT", value: "." }, - { type: "IDENTIFIER", value: "value" }, - { type: "DOT", value: "." }, - { type: "IDENTIFIER", value: "nested" }, + { type: TokenType.IDENTIFIER, value: "data" }, + { type: TokenType.DOT, value: "." }, + { type: TokenType.IDENTIFIER, value: "value" }, + { type: TokenType.DOT, value: "." }, + { type: TokenType.IDENTIFIER, value: "nested" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -108,10 +110,10 @@ describe("Tokenizer", () => { it("should tokenize bracket notation", () => { const input = 'data["value"]'; const expected: Token[] = [ - { type: "IDENTIFIER", value: "data" }, - { type: "BRACKET_LEFT", value: "[" }, - { type: "STRING", value: "value" }, - { type: "BRACKET_RIGHT", value: "]" }, + { type: TokenType.IDENTIFIER, value: "data" }, + { type: TokenType.BRACKET_LEFT, value: "[" }, + { type: TokenType.STRING, value: "value" }, + { type: TokenType.BRACKET_RIGHT, value: "]" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -121,10 +123,10 @@ describe("Tokenizer", () => { it("should tokenize predefined functions", () => { const input = "@sum(values)"; const expected: Token[] = [ - { type: "FUNCTION", value: "sum" }, - { type: "PAREN_LEFT", value: "(" }, - { type: "IDENTIFIER", value: "values" }, - { type: "PAREN_RIGHT", value: ")" }, + { type: TokenType.FUNCTION, value: "sum" }, + { type: TokenType.PAREN_LEFT, value: "(" }, + { type: TokenType.IDENTIFIER, value: "values" }, + { type: TokenType.PAREN_RIGHT, value: ")" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -132,14 +134,14 @@ describe("Tokenizer", () => { it("should tokenize function calls with multiple arguments", () => { const input = "@max(a, b, c)"; const expected: Token[] = [ - { type: "FUNCTION", value: "max" }, - { type: "PAREN_LEFT", value: "(" }, - { type: "IDENTIFIER", value: "a" }, - { type: "COMMA", value: "," }, - { type: "IDENTIFIER", value: "b" }, - { type: "COMMA", value: "," }, - { type: "IDENTIFIER", value: "c" }, - { type: "PAREN_RIGHT", value: ")" }, + { type: TokenType.FUNCTION, value: "max" }, + { type: TokenType.PAREN_LEFT, value: "(" }, + { type: TokenType.IDENTIFIER, value: "a" }, + { type: TokenType.COMMA, value: "," }, + { type: TokenType.IDENTIFIER, value: "b" }, + { type: TokenType.COMMA, value: "," }, + { type: TokenType.IDENTIFIER, value: "c" }, + { type: TokenType.PAREN_RIGHT, value: ")" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -149,11 +151,11 @@ describe("Tokenizer", () => { it("should tokenize ternary expressions", () => { const input = "condition ? trueValue : falseValue"; const expected: Token[] = [ - { type: "IDENTIFIER", value: "condition" }, - { type: "QUESTION", value: "?" }, - { type: "IDENTIFIER", value: "trueValue" }, - { type: "COLON", value: ":" }, - { type: "IDENTIFIER", value: "falseValue" }, + { type: TokenType.IDENTIFIER, value: "condition" }, + { type: TokenType.QUESTION, value: "?" }, + { type: TokenType.IDENTIFIER, value: "trueValue" }, + { type: TokenType.COLON, value: ":" }, + { type: TokenType.IDENTIFIER, value: "falseValue" }, ]; expect(tokenize(input)).toEqual(expected); }); @@ -163,21 +165,21 @@ describe("Tokenizer", () => { it("should tokenize complex nested expressions", () => { const input = '@sum(data.values) > 0 ? data["status"] : "inactive"'; const expected: Token[] = [ - { type: "FUNCTION", value: "sum" }, - { type: "PAREN_LEFT", value: "(" }, - { type: "IDENTIFIER", value: "data" }, - { type: "DOT", value: "." }, - { type: "IDENTIFIER", value: "values" }, - { type: "PAREN_RIGHT", value: ")" }, - { type: "OPERATOR", value: ">" }, - { type: "NUMBER", value: "0" }, - { type: "QUESTION", value: "?" }, - { type: "IDENTIFIER", value: "data" }, - { type: "BRACKET_LEFT", value: "[" }, - { type: "STRING", value: "status" }, - { type: "BRACKET_RIGHT", value: "]" }, - { type: "COLON", value: ":" }, - { type: "STRING", value: "inactive" }, + { type: TokenType.FUNCTION, value: "sum" }, + { type: TokenType.PAREN_LEFT, value: "(" }, + { type: TokenType.IDENTIFIER, value: "data" }, + { type: TokenType.DOT, value: "." }, + { type: TokenType.IDENTIFIER, value: "values" }, + { type: TokenType.PAREN_RIGHT, value: ")" }, + { type: TokenType.OPERATOR, value: ">" }, + { type: TokenType.NUMBER, value: "0" }, + { type: TokenType.QUESTION, value: "?" }, + { type: TokenType.IDENTIFIER, value: "data" }, + { type: TokenType.BRACKET_LEFT, value: "[" }, + { type: TokenType.STRING, value: "status" }, + { type: TokenType.BRACKET_RIGHT, value: "]" }, + { type: TokenType.COLON, value: ":" }, + { type: TokenType.STRING, value: "inactive" }, ]; expect(tokenize(input)).toEqual(expected); }); From 0e7450b94ce19b12c40e536e68aef97c60dfdea2 Mon Sep 17 00:00:00 2001 From: bqxbqx Date: Mon, 17 Mar 2025 23:27:37 +0800 Subject: [PATCH 2/3] fix: ci --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c08967..1677785 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,8 +34,8 @@ jobs: - name: Run tests run: pnpm test - - name: Run benchmarks - run: pnpm run benchmark - - name: Build package run: pnpm run build + + - name: Run benchmarks + run: pnpm run benchmark From ebca37f75e993b105f21234870ddad039b6d6b89 Mon Sep 17 00:00:00 2001 From: bqxbqx Date: Mon, 17 Mar 2025 23:54:40 +0800 Subject: [PATCH 3/3] perf: benchmark & readme --- README.md | 13 ++++++++++++- bench/expr.bench.ts | 20 ++++++++++++++++++++ package.json | 1 + pnpm-lock.yaml | 8 ++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f5ae0be..334687b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Now we have solved this problem for you. We have designed a simple and easy-to-u - 🔒 **Secure by default** - No access to global objects or prototype chain, does not use `eval` or `new Function` - 🚀 **High performance** - Supports pre-compilation of expressions for improved performance with repeated evaluations - 🛠️ **Extensible** - Register custom functions to easily extend functionality -- 🪩 **Lightweight** - Zero dependencies, small footprint +- 🪩 **Lightweight** - Zero dependencies, small footprint, only 7.8KB ## Installation @@ -174,6 +174,17 @@ const result = evaluate('@formatCurrency(price * quantity)', { ``` **Default Global Functions:** `['abs', 'ceil', 'floor', 'round', 'sqrt', 'pow', 'max', 'min']` +## Benchmarks + +Performance comparison of different evaluation methods: (baseline: new Function) + +| Expression Type | new Function vs evaluate after compile | new Function vs evaluate without compile | new Function vs [expr-eval](https://www.npmjs.com/package/expr-eval?activeTab=readme) Parser | +|-----------------------|----------------------------------------|------------------------------------------|----------------------------------| +| Simple Expressions | 1.59x faster | 6.36x faster | 23.94x faster | +| Medium Expressions | 2.16x faster | 9.81x faster | 37.81x faster | +| Complex Expressions | 1.59x faster | 4.89x faster | 32.74x faster | + + ## Advanced Usage ### Timeout Handling diff --git a/bench/expr.bench.ts b/bench/expr.bench.ts index 2ec89df..6f22b96 100644 --- a/bench/expr.bench.ts +++ b/bench/expr.bench.ts @@ -1,3 +1,4 @@ +import { Parser } from "expr-eval"; import { bench, describe } from "vitest"; import { compile, evaluate, register } from "../dist/index.esm.js"; @@ -42,6 +43,10 @@ const complexExpressionCompiler = compile(complexExpression); register("calculateTotal", context.calculateTotal); register("applyDiscount", context.applyDiscount); +const parser = new Parser(); +parser.functions.calculateTotal = context.calculateTotal; +parser.functions.applyDiscount = context.applyDiscount; + const newFunctionSimple = new Function( "context", `with(context) { return ${simpleExpression}; }`, @@ -70,6 +75,11 @@ describe("Simple Expression Benchmarks", () => { evaluate(simpleExpression, context); }, ); + + bench("expr-eval Parser (vs evaluate)", () => { + // @ts-ignore + Parser.evaluate(simpleExpression, context); + }); }); describe("Medium Expression Benchmarks", () => { @@ -87,6 +97,11 @@ describe("Medium Expression Benchmarks", () => { evaluate(mediumExpression, context); }, ); + + bench("expr-eval Parser (vs evaluate)", () => { + // @ts-ignore + Parser.evaluate(mediumExpression, context); + }); }); describe("Complex Expression Benchmarks", () => { @@ -104,4 +119,9 @@ describe("Complex Expression Benchmarks", () => { evaluate(complexExpression2, context); }, ); + + bench("expr-eval Parser (vs evaluate)", () => { + // @ts-ignore + parser.evaluate(complexExpression2, context); + }); }); diff --git a/package.json b/package.json index a7255fa..e66e40e 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "@rollup/plugin-terser": "^0.4.4", "@rollup/plugin-typescript": "^12.1.2", "@vitest/coverage-v8": "^3.0.8", + "expr-eval": "^2.0.2", "rollup": "^4.34.6", "tslib": "^2.8.1", "vitest": "^3.0.8" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4ed59f5..c8ed326 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ importers: '@vitest/coverage-v8': specifier: ^3.0.8 version: 3.0.8(vitest@3.0.8(terser@5.38.2)(tsx@4.19.2)) + expr-eval: + specifier: ^2.0.2 + version: 2.0.2 rollup: specifier: ^4.34.6 version: 4.34.6 @@ -728,6 +731,9 @@ packages: resolution: {integrity: sha512-bFi65yM+xZgk+u/KRIpekdSYkTB5W1pEf0Lt8Q8Msh7b+eQ7LXVtIB1Bkm4fvclDEL1b2CZkMhv2mOeF8tMdkA==} engines: {node: '>=12.0.0'} + expr-eval@2.0.2: + resolution: {integrity: sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg==} + foreground-child@3.3.1: resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==} engines: {node: '>=14'} @@ -1594,6 +1600,8 @@ snapshots: expect-type@1.1.0: {} + expr-eval@2.0.2: {} + foreground-child@3.3.1: dependencies: cross-spawn: 7.0.6