diff --git a/.gitignore b/.gitignore index 523115e..2f296cc 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ out !.vscode/extensions.json !.vscode/launch.json !.vscode/tasks.json + +# Integration test temporary files +syntaxes/test/.temp diff --git a/CHANGELOG.md b/CHANGELOG.md index a19b66a..59e8c81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic [Keep a Changelog]: https://keepachangelog.com/en/1.0.0/ [Semantic Versioning]: https://semver.org/spec/v2.0.0.html +## Unreleased + +### Changed + +- Improved syntax highlighting for network rules and modifiers [#93], [#120], [#131], [#137]. + +### Fixed + +- Highlighting for cosmetic rules modifiers [#134], [#151]. + +[#120]: https://github.com/AdguardTeam/VscodeAdblockSyntax/issues/120 +[#131]: https://github.com/AdguardTeam/VscodeAdblockSyntax/issues/131 +[#134]: https://github.com/AdguardTeam/VscodeAdblockSyntax/issues/134 +[#137]: https://github.com/AdguardTeam/VscodeAdblockSyntax/issues/137 +[#151]: https://github.com/AdguardTeam/VscodeAdblockSyntax/issues/151 + ## [2.1.4] (prerelease) - 2025-12-26 ### Added diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 33c72f1..27e891e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6,21 +6,6 @@ settings: catalogs: default: - '@adguard/aglint': - specifier: ^4.0.0-beta.3 - version: 4.0.0-beta.3 - '@adguard/agtree': - specifier: ^2.3.0 - version: 2.3.0 - '@adguard/changelog-tools': - specifier: ^0.0.1 - version: 0.0.1 - '@rspack/cli': - specifier: ^1.6.7 - version: 1.6.7 - '@rspack/core': - specifier: ^1.1.7 - version: 1.6.7 '@types/fs-extra': specifier: ^11.0.4 version: 11.0.4 @@ -30,141 +15,45 @@ catalogs: '@types/plist': specifier: ^3.0.5 version: 3.0.5 - '@types/resolve': - specifier: ^1.20.6 - version: 1.20.6 - '@types/semver': - specifier: ^7.7.1 - version: 7.7.1 - '@types/vscode': - specifier: ^1.74.0 - version: 1.99.0 - '@typescript-eslint/eslint-plugin': - specifier: ^7.5.0 - version: 7.18.0 - '@typescript-eslint/parser': - specifier: ^7.5.0 - version: 7.18.0 - '@vitest/coverage-v8': - specifier: ^4.0.15 - version: 4.0.15 '@vitest/expect': specifier: ^4.0.15 version: 4.0.15 - '@vscode/vsce': - specifier: ^3.7.1 - version: 3.7.1 + chalk: + specifier: ^5.6.2 + version: 5.6.2 chokidar: specifier: ^5.0.0 version: 5.0.0 - ci-info: - specifier: ^4.3.1 - version: 4.3.1 - cross-env: - specifier: ^10.1.0 - version: 10.1.0 - debounce: - specifier: ^3.0.0 - version: 3.0.0 - eslint: - specifier: 8.57.1 - version: 8.57.1 - eslint-config-airbnb-base: - specifier: ^15.0.0 - version: 15.0.0 - eslint-config-airbnb-typescript: - specifier: ^18.0.0 - version: 18.0.0 - eslint-plugin-boundaries: - specifier: ^5.3.1 - version: 5.3.1 - eslint-plugin-import: - specifier: ^2.32.0 - version: 2.32.0 - eslint-plugin-import-newlines: - specifier: ^1.4.0 - version: 1.4.0 - eslint-plugin-jsdoc: - specifier: ^61.5.0 - version: 61.5.0 - eslint-plugin-n: - specifier: ^17.23.1 - version: 17.23.1 fast-glob: - specifier: ^3.3.3 + specifier: 3.3.3 version: 3.3.3 fs-extra: specifier: ^11.3.1 version: 11.3.2 - husky: - specifier: ^9.1.7 - version: 9.1.7 - jsdoc: - specifier: ^4.0.5 - version: 4.0.5 - lint-staged: - specifier: ^16.2.7 - version: 16.2.7 - markdownlint: - specifier: ^0.40.0 - version: 0.40.0 - markdownlint-cli: - specifier: ^0.46.0 - version: 0.46.0 - mkdirp: - specifier: ^3.0.1 - version: 3.0.1 - ovsx: - specifier: ^0.10.7 - version: 0.10.7 plist: specifier: ^3.1.0 version: 3.1.0 - preferred-pm: - specifier: ^4.1.1 - version: 4.1.1 - resolve: - specifier: ^1.22.11 - version: 1.22.11 rimraf: specifier: ^6.1.2 version: 6.1.2 - semver: - specifier: ^7.7.3 - version: 7.7.3 + tar: + specifier: ^7.5.6 + version: 7.5.6 tsc-files: specifier: 1.1.4 version: 1.1.4 - tsx: - specifier: ^4.21.0 - version: 4.21.0 - typescript: - specifier: ^5.9.3 - version: 5.9.3 valibot: specifier: ^1.2.0 version: 1.2.0 vitest: specifier: ^4.0.15 version: 4.0.15 - vscode-languageclient: - specifier: 10.0.0-next.15 - version: 10.0.0-next.15 - vscode-languageserver: - specifier: 10.0.0-next.13 - version: 10.0.0-next.13 - vscode-languageserver-textdocument: - specifier: ^1.0.12 - version: 1.0.12 vscode-oniguruma: specifier: ^2.0.1 version: 2.0.1 vscode-textmate: specifier: ^9.3.0 version: 9.3.0 - vscode-uri: - specifier: ^3.1.0 - version: 3.1.0 yaml: specifier: ^2.8.2 version: 2.8.2 @@ -388,9 +277,15 @@ importers: '@vitest/expect': specifier: 'catalog:' version: 4.0.15 + chalk: + specifier: 'catalog:' + version: 5.6.2 chokidar: specifier: 'catalog:' version: 5.0.0 + fast-glob: + specifier: 'catalog:' + version: 3.3.3 fs-extra: specifier: 'catalog:' version: 11.3.2 @@ -400,6 +295,9 @@ importers: rimraf: specifier: 'catalog:' version: 6.1.2 + tar: + specifier: 'catalog:' + version: 7.5.6 tsc-files: specifier: 'catalog:' version: 1.1.4(typescript@5.9.3) @@ -1002,6 +900,10 @@ packages: resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} engines: {node: '>=12'} + '@isaacs/fs-minipass@4.0.1': + resolution: {integrity: sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==} + engines: {node: '>=18.0.0'} + '@jridgewell/resolve-uri@3.1.2': resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==} engines: {node: '>=6.0.0'} @@ -2117,6 +2019,10 @@ packages: chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + chownr@3.0.0: + resolution: {integrity: sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==} + engines: {node: '>=18'} + ci-info@2.0.0: resolution: {integrity: sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==} @@ -3655,6 +3561,10 @@ packages: resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==} engines: {node: '>=16 || 14 >=14.17'} + minizlib@3.1.0: + resolution: {integrity: sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==} + engines: {node: '>= 18'} + mkdirp-classic@0.5.3: resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} @@ -4452,6 +4362,10 @@ packages: resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} engines: {node: '>=6'} + tar@7.5.6: + resolution: {integrity: sha512-xqUeu2JAIJpXyvskvU3uvQW8PAmHrtXp2KDuMJwQqW8Sqq0CaZBAQ+dKS3RBXVhU4wC5NjAdKrmh84241gO9cA==} + engines: {node: '>=18'} + terminal-link@2.1.1: resolution: {integrity: sha512-un0FmiRUQNr5PJqy9kP7c40F5BOfpGlYTrxonDChEZB7pzZxRNp/bt+ymiy9/npwXya9KH99nJ/GXFIiUkYGFQ==} engines: {node: '>=8'} @@ -4859,6 +4773,7 @@ packages: whatwg-encoding@3.1.1: resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} engines: {node: '>=18'} + deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation whatwg-mimetype@4.0.0: resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} @@ -4965,6 +4880,10 @@ packages: yallist@4.0.0: resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} + yallist@5.0.0: + resolution: {integrity: sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==} + engines: {node: '>=18'} + yaml@2.8.2: resolution: {integrity: sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==} engines: {node: '>= 14.6'} @@ -5509,6 +5428,10 @@ snapshots: wrap-ansi: 8.1.0 wrap-ansi-cjs: wrap-ansi@7.0.0 + '@isaacs/fs-minipass@4.0.1': + dependencies: + minipass: 7.1.2 + '@jridgewell/resolve-uri@3.1.2': {} '@jridgewell/sourcemap-codec@1.5.5': {} @@ -6708,6 +6631,8 @@ snapshots: chownr@1.1.4: optional: true + chownr@3.0.0: {} + ci-info@2.0.0: {} ci-info@4.3.1: {} @@ -8597,6 +8522,10 @@ snapshots: minipass@7.1.2: {} + minizlib@3.1.0: + dependencies: + minipass: 7.1.2 + mkdirp-classic@0.5.3: optional: true @@ -9538,6 +9467,14 @@ snapshots: readable-stream: 3.6.2 optional: true + tar@7.5.6: + dependencies: + '@isaacs/fs-minipass': 4.0.1 + chownr: 3.0.0 + minipass: 7.1.2 + minizlib: 3.1.0 + yallist: 5.0.0 + terminal-link@2.1.1: dependencies: ansi-escapes: 4.3.2 @@ -10106,6 +10043,8 @@ snapshots: yallist@4.0.0: {} + yallist@5.0.0: {} + yaml@2.8.2: {} yauzl-promise@4.0.0: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 6d11720..2da08f6 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -5,24 +5,24 @@ packages: - syntaxes - tools - test/static/aglint - catalog: - '@adguard/aglint': ^4.0.0-beta.3 - '@adguard/agtree': ^2.3.0 - '@adguard/changelog-tools': ^0.0.1 - '@rspack/cli': ^1.6.7 - '@rspack/core': ^1.1.7 - '@types/fs-extra': ^11.0.4 - '@types/node': ^20.19.26 - '@types/plist': ^3.0.5 - '@types/resolve': ^1.20.6 - '@types/semver': ^7.7.1 - '@types/vscode': ^1.74.0 - '@typescript-eslint/eslint-plugin': ^7.5.0 - '@typescript-eslint/parser': ^7.5.0 - '@vitest/coverage-v8': ^4.0.15 - '@vitest/expect': ^4.0.15 - '@vscode/vsce': ^3.7.1 + "@adguard/aglint": ^4.0.0-beta.3 + "@adguard/agtree": ^2.3.0 + "@adguard/changelog-tools": ^0.0.1 + "@rspack/cli": ^1.6.7 + "@rspack/core": ^1.1.7 + "@types/fs-extra": ^11.0.4 + "@types/node": ^20.19.26 + "@types/plist": ^3.0.5 + "@types/resolve": ^1.20.6 + "@types/semver": ^7.7.1 + "@types/vscode": ^1.74.0 + "@typescript-eslint/eslint-plugin": ^7.5.0 + "@typescript-eslint/parser": ^7.5.0 + "@vitest/coverage-v8": ^4.0.15 + "@vitest/expect": ^4.0.15 + "@vscode/vsce": ^3.7.1 + chalk: ^5.6.2 chokidar: ^5.0.0 ci-info: ^4.3.1 cross-env: ^10.1.0 @@ -35,7 +35,7 @@ catalog: eslint-plugin-import-newlines: ^1.4.0 eslint-plugin-jsdoc: ^61.5.0 eslint-plugin-n: ^17.23.1 - fast-glob: ^3.3.3 + fast-glob: 3.3.3 fs-extra: ^11.3.1 husky: ^9.1.7 jsdoc: ^4.0.5 @@ -49,6 +49,7 @@ catalog: resolve: ^1.22.11 rimraf: ^6.1.2 semver: ^7.7.3 + tar: ^7.5.6 tsc-files: 1.1.4 tsx: ^4.21.0 typescript: ^5.9.3 diff --git a/syntaxes/.lintstagedrc.js b/syntaxes/.lintstagedrc.js index da27208..d50a41c 100644 --- a/syntaxes/.lintstagedrc.js +++ b/syntaxes/.lintstagedrc.js @@ -12,7 +12,7 @@ const makeRelative = (file) => path.relative(process.cwd(), file); module.exports = { // run tests if the grammar file are changed - 'adblock.yaml-tmlanguage': 'vitest run', + 'adblock.yaml-tmlanguage': () => 'vitest run', '**/*.md': 'markdownlint', '**/*.js': 'eslint --cache', '**/*.ts': [ diff --git a/syntaxes/adblock.yaml-tmlanguage b/syntaxes/adblock.yaml-tmlanguage index f9529ce..c30ed68 100644 --- a/syntaxes/adblock.yaml-tmlanguage +++ b/syntaxes/adblock.yaml-tmlanguage @@ -18,6 +18,7 @@ --- name: Adblock scopeName: text.adblock + patterns: - include: "#adblockAgent" - include: "#preprocessor" @@ -32,6 +33,7 @@ patterns: - include: "#basicRulesNoUrl" - include: "#basicRulesRegex" - include: "#basicRules" + repository: adblockAgent: patterns: @@ -48,6 +50,7 @@ repository: match: ".*" "3": name: punctuation.definition.array.end.adblock.agent + adblockData: patterns: - match: |- @@ -65,6 +68,7 @@ repository: name: constant.language.agent.adblocker.name "2": name: constant.numeric.decimal + preprocessor: patterns: - match: "^(!#if)(\\s)(.*)$" @@ -144,6 +148,7 @@ repository: match: "(\\(|\\)|,)" - match: "^!#(?!#).+$" name: invalid.illegal.preprocessor + comments: patterns: - name: comment.line.exclamation-sign @@ -154,9 +159,24 @@ repository: # marked as a comment. # TODO: If a new cosmetic rule separator is added, it should be added here too match: "^#(?!(?:@?(?:\\$?\\?|\\$|%)?#)).*" + elemhideRules: patterns: - - match: "^(\\[.+?\\])?(.*?)(#@?\\??#\\^?)(.+)$" + - match: |- + (?x) + ^ # Start of the line + ( # Group 1. Cosmetic rule options (optional) + \[ # Opening bracket + (?: # One or more of: + \/(?:\\.|[^\/\\])*\/[a-z]* # Regex pattern (handles ] inside regex like /[class]/) + |[^\]] # OR any character except closing bracket + )+ + \] # Closing bracket + )? + (.*?) # Group 2. Domain list + (\#@?\??\#\^?) # Group 3. Cosmetic rule separator + (.+) # Group 4. CSS selector + $ # End of the line captures: "1": patterns: @@ -169,6 +189,7 @@ repository: "4": patterns: - include: "#cssSelector" + snippetRulesABP: patterns: - match: "^(.*?)(#@?\\$#)([^{]+)$" @@ -180,6 +201,7 @@ repository: name: keyword.control.adblock "3": name: constant.character.snippet.adblock + cssRules: patterns: - match: "^(\\[.+?\\])?(.*?)(#@?\\$\\??#)(.+)$" @@ -195,17 +217,24 @@ repository: "4": patterns: - include: "#cssStyle" + contentRules: patterns: - match: |- (?x) - ^ # Start of the line - \s* # Optional leading whitespace - (\[.+?\])? # Group 1. AdGuard modifier list - (.*)? # Group 2. Domain list - (\$@?\$) # Group 3. Cosmetic rule marker - (.+) # Group 4. CSS selector - $ # End of the line + ^ # Start of the line + ( # Group 1. Cosmetic rule options (optional) + \[ # Opening bracket + (?: # One or more of: + \/(?:\\.|[^\/\\])*\/[a-z]* # Regex pattern (handles ] inside regex like /[class]/) + |[^\]] # OR any character except closing bracket + )+ + \] # Closing bracket + )? + (.*?) # Group 2. Domain list + (\$@?\$) # Group 3. Cosmetic rule marker + (.+) # Group 4. CSS selector + $ # End of the line captures: "1": patterns: @@ -218,25 +247,33 @@ repository: "4": patterns: - include: "#cssSelector" + scriptletRules: patterns: - include: "#exceptionScriptletRules" - include: "#blockingScriptletRules" + exceptionScriptletRules: patterns: - match: |- (?x) - ^ # Start of the line - \s* # Optional leading whitespace - (\[.+?\])? # Group 1. AdGuard modifier list - (.*)? # Group 2. Domain list - (\#@%\#) # Group 3. Cosmetic rule marker - (\/\/scriptlet) # Group 4. Scriptlet marker - (\() # Group 5. Opening parenthesis - (.*)? # Group 6. Arguments - (\)) # Group 7. Closing parenthesis - \s* # Optional trailing whitespace - $ # End of the line + ^ # Start of the line + ( # Group 1. Cosmetic rule options (optional) + \[ # Opening bracket + (?: # One or more of: + \/(?:\\.|[^\/\\])*\/[a-z]* # Regex pattern (handles ] inside regex like /[class]/) + |[^\]] # OR any character except closing bracket + )+ + \] # Closing bracket + )? + (.*?) # Group 2. Domain list + (\#@%\#) # Group 3. Cosmetic rule marker + (\/\/scriptlet) # Group 4. Scriptlet marker + (\() # Group 5. Opening parenthesis + (.*)? # Group 6. Arguments + (\)) # Group 7. Closing parenthesis + \s* # Optional trailing whitespace + $ # End of the line captures: "1": patterns: @@ -256,21 +293,28 @@ repository: - include: "#scriptletFunction" "7": name: punctuation.section.adblock + blockingScriptletRules: patterns: - match: |- (?x) - ^ # Start of the line - \s* # Optional leading whitespace - (\[.+?\])? # Group 1. AdGuard modifier list - (.*)? # Group 2. Domain list - (\#%\#) # Group 3. Cosmetic rule marker - (\/\/scriptlet) # Group 4. Scriptlet marker - (\() # Group 5. Opening parenthesis - (.*\S.*) # Group 6. Arguments. Note: we look for a parameter that contain at least one non-whitespace character - (\)) # Group 7. Closing parenthesis - \s* # Optional trailing whitespace - $ # End of the line + ^ # Start of the line + ( # Group 1. Cosmetic rule options (optional) + \[ # Opening bracket + (?: # One or more of: + \/(?:\\.|[^\/\\])*\/[a-z]* # Regex pattern (handles ] inside regex like /[class]/) + |[^\]] # OR any character except closing bracket + )+ + \] # Closing bracket + )? + (.*?) # Group 2. Domain list + (\#%\#) # Group 3. Cosmetic rule marker + (\/\/scriptlet) # Group 4. Scriptlet marker + (\() # Group 5. Opening parenthesis + (.*\S.*) # Group 6. Arguments. Note: we look for a parameter that contain at least one non-whitespace character + (\)) # Group 7. Closing parenthesis + \s* # Optional trailing whitespace + $ # End of the line captures: "1": patterns: @@ -289,6 +333,7 @@ repository: - include: "#scriptletFunction" "7": name: punctuation.section.adblock + scriptletRulesUBO: patterns: - match: "^(.*?)(#@?#)(\\+js)(\\()(.+)(\\)\\s*)$" @@ -307,6 +352,7 @@ repository: - include: "#scriptletFunctionUBO" "6": name: punctuation.section.adblock + jsRules: # In this case, we embed the JS grammar in the adblock grammar # by using the include: "source.js" scope. @@ -316,12 +362,15 @@ repository: patterns: # Do not confuse with #%#//scriptlet, so we use a negative lookahead # to make sure the separator is not followed by //scriptlet - - begin: "^(.*?)(#@?%#(?!\\/\\/scriptlet))" + - begin: ^(\[(?:\/(?:\\.|[^\/\\])*\/[a-z]*|[^\]])+\])?(.*?)(#@?%#(?!\/\/scriptlet)) beginCaptures: "1": patterns: - - include: "#domainListCommaSeparated" + - include: "#cosmeticRulesOptions" "2": + patterns: + - include: "#domainListCommaSeparated" + "3": name: keyword.control.adblock end: "$" contentName: source.js @@ -338,6 +387,7 @@ repository: name: keyword.control.adblock "3": name: invalid.illegal + basicRulesNoUrl: patterns: - match: "^(\\$)(.+)$" @@ -346,7 +396,8 @@ repository: name: keyword.control.adblock "2": patterns: - - include: "#basicRulesOptions" + - include: "#modifierList" + basicRulesRegex: patterns: - match: "^(\\/[^\\/\\\\]*(?:\\\\.[^\\/\\\\]*)*\\/[dgimsuy]*)(?:(\\$)(.+))?$" @@ -358,169 +409,359 @@ repository: name: keyword.control.adblock "3": patterns: - - include: "#basicRulesOptions" + - include: "#modifierList" + basicRules: patterns: - - match: "^(.+?)((\\$(?!\\/))(.*))?$" + - match: |- + (?x) + ^ + # Group 1. URL pattern (note: ? is used to make it non-greedy) + (.*?) + (?: + # Group 2. Network rule separator + (\$)(?=\s*~?\s*[\w-]+\s*(?:[=,]|$)) + # Group 3. Network rule modifiers + (.*) + )? + $ captures: "1": patterns: - include: "#urlPattern" - "3": + "2": name: keyword.control.adblock - "4": + "3": patterns: - - include: "#basicRulesOptions" + - include: "#modifierList" + regularExpression: patterns: - begin: "(/)" beginCaptures: "1": - name: keyword.other.regex.begin + name: punctuation.definition.string.begin.regexp.adblock end: "((?]+>)?" + beginCaptures: "1": - name: keyword.other.adblock + name: punctuation.definition.group.regexp "2": - name: keyword.operator.adblock - "3": - patterns: - - include: "#domainListPipeSeparated" - - match: "(app)(=)([^,]+)" - captures: + name: punctuation.definition.group.assertion.regexp + end: "(\\))" + endCaptures: "1": - name: keyword.other.adblock - "2": - name: keyword.operator.adblock - "3": - patterns: - - include: "#appListPipeSeparated" - - match: "(dnstype)(=)([^,]+)" + name: punctuation.definition.group.regexp + patterns: + - include: "#regexp" + + # Alternation + - name: keyword.operator.or.regexp + match: "\\|" + + # Quantifiers + - name: keyword.operator.quantifier.regexp + match: "[*+?]\\??" + - name: keyword.operator.quantifier.regexp + match: "\\{\\d+(,\\d*)?\\}\\??" + + # Anchors and boundaries + - name: keyword.control.anchor.regexp + match: "\\^|\\$" + - name: keyword.control.anchor.regexp + match: "\\\\[bB]" + + # Backreferences + - name: keyword.other.back-reference.regexp + match: "\\\\[1-9]\\d*" + + headerValue: + patterns: + # Header name with regex value: name:/pattern/flags or name:~/pattern/flags + - match: ([\w-]+)(:)(~?)(/.+?(? { + tokenizer = await getAdblockTokenizer(); +}); + +describe('cosmetic rules modifiers', () => { + describe('valid', () => { + test('simple path modifier', () => { + const tokens = tokenizer('[$path=/test]##banner'); + + // Check only the modifier part tokens (first 6 tokens) + expect(tokens.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + }); + + test('domain modifier with regex', () => { + const tokens1 = tokenizer('[$domain=/example[0-9]\\.(com|org)/]##.ad'); + + // Check only the basic structure tokens (first 5 tokens) + expect(tokens1.slice(0, 5)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 8, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 8, endIndex: 9, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + startIndex: 9, + endIndex: 10, + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + ]); + + // Verify the regex content contains the expected scopes + expect(tokens1[5].scopes).toContain('string.regexp.adblock'); + + const tokens2 = tokenizer('[$domain=/example\\d{1,}\\.(com|org)/]##.ad'); + + // Check only the basic structure tokens (first 5 tokens) + expect(tokens2.slice(0, 5)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 8, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 8, endIndex: 9, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + startIndex: 9, + endIndex: 10, + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + ]); + + // Verify the regex content contains the expected scopes + expect(tokens2[5].scopes).toContain('string.regexp.adblock'); + }); + + test('multiple modifiers', () => { + const tokens1 = tokenizer('[$path=/test,app=com.google.search]##banner'); + + // Check only the modifier part tokens (first 10 tokens) + expect(tokens1.slice(0, 10)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 13, endIndex: 16, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 16, endIndex: 17, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 17, endIndex: 34, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 34, endIndex: 35, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + + const tokens2 = tokenizer('[$path=/test,domain=example.org]##banner'); + + // Check only the modifier part tokens (first 10 tokens) + expect(tokens2.slice(0, 10)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 13, endIndex: 19, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 19, endIndex: 20, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 20, endIndex: 31, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 31, endIndex: 32, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + }); + + test('different rule types with modifiers', () => { + const tokens1 = tokenizer('[$path=/test]#@#banner'); + expect(tokens1.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + + const tokens2 = tokenizer('[$path=/test]#?#.banner'); + expect(tokens2.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + + const tokens3 = tokenizer('[$path=/test]#$#banner { style: display: none!important; }'); + expect(tokens3.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + + const tokens4 = tokenizer('[$path=/test]$$banner'); + expect(tokens4.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + }); + + test('scriptlet rules with modifiers', () => { + const tokens1 = tokenizer('[$path=/subpage1]testcases.agrd.dev,pages.dev#%#window.__case13=true;'); + expect(tokens1.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 16, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 16, endIndex: 17, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + + const tokens2 = tokenizer('[$path=/test]example.org#%#//scriptlet(\'name\', \'\')'); + expect(tokens2.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 6, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 6, endIndex: 7, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 7, endIndex: 12, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 12, endIndex: 13, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + }); + }); + + describe('invalid', () => { + test('invalid modifier', () => { + const tokens = tokenizer('[$randommodifier=test]##banner'); + + // Check only the modifier part tokens (first 6 tokens) + expect(tokens.slice(0, 6)).toEqual([ + { startIndex: 0, endIndex: 1, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 1, endIndex: 2, scopes: ['text.adblock', 'keyword.control.adblock'] }, + { startIndex: 2, endIndex: 16, scopes: ['text.adblock', 'keyword.other.adblock'] }, + { startIndex: 16, endIndex: 17, scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { startIndex: 17, endIndex: 21, scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { startIndex: 21, endIndex: 22, scopes: ['text.adblock', 'keyword.control.adblock'] }, + ]); + }); + }); +}); diff --git a/syntaxes/test/adblock/cosmetic/html.test.ts b/syntaxes/test/adblock/cosmetic/html.test.ts index 2ca06ac..e1098cf 100644 --- a/syntaxes/test/adblock/cosmetic/html.test.ts +++ b/syntaxes/test/adblock/cosmetic/html.test.ts @@ -596,7 +596,7 @@ describe('HTML filtering rules', () => { actual: '$$', expected: [ { fragment: '$', scopes: SEPARATOR }, - { fragment: '$', scopes: ['text.adblock', 'invalid.illegal.redundant.modifier.separator'] }, + { fragment: '$', scopes: ['text.adblock', 'invalid.illegal.adblock'] }, ], }, { diff --git a/syntaxes/test/adblock/network/network.test.ts b/syntaxes/test/adblock/network/network.test.ts new file mode 100644 index 0000000..3b6e57e --- /dev/null +++ b/syntaxes/test/adblock/network/network.test.ts @@ -0,0 +1,868 @@ +/** + * @file Tests for network rules. + */ +import { + beforeAll, + describe, + expect, + test, +} from 'vitest'; + +import { type AdblockTokenizer, getAdblockTokenizer } from '../../../utils/get-adblock-tokenizer'; + +let tokenizer: AdblockTokenizer; + +beforeAll(async () => { + tokenizer = await getAdblockTokenizer(); +}); + +describe('network rules', () => { + describe('valid', () => { + test('regular network rules without modifiers', () => { + expect('-468x60-').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '-468x60-', scopes: ['text.adblock'] }, + ], + ); + + expect('/ads.js').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '/ads.js', scopes: ['text.adblock'] }, + ], + ); + + expect('||example.com^').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + expect('||example.com/*_banner_').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com/', scopes: ['text.adblock'] }, + { fragment: '*', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '_banner_', scopes: ['text.adblock'] }, + ], + ); + + expect('|http://example.org').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '|', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'http://example.org', scopes: ['text.adblock'] }, + ], + ); + + expect('swf|').toBeTokenizedProperly( + tokenizer, + [ + { fragment: 'swf', scopes: ['text.adblock'] }, + { fragment: '|', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + }); + + test('regexp network rules without modifiers', () => { + expect(String.raw`/banner\d+/`).toBeTokenizedProperly( + tokenizer, + [ + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { fragment: 'banner', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '\\d', + scopes: ['text.adblock', 'string.regexp.adblock', 'constant.other.character-class.set.regexp'], + }, + { + fragment: '+', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.quantifier.regexp'], + }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + ], + ); + }); + + test('network rules without pattern', () => { + expect('$script').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + expect('$~websocket,~xmlhttprequest').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: '~websocket', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '~xmlhttprequest', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + expect('$script,third-party,___,domain=example.com').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'third-party', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '___', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'domain', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + + // quoted value allows unescaped comma + expect(String.raw`$foo=',',bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: "','", scopes: ['text.adblock', 'string.quoted.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + expect(String.raw`$foo='/b{3,}/',bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: "'/b{3,}/'", scopes: ['text.adblock', 'string.quoted.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + // regexp value allows unescaped comma with detailed highlighting + expect(String.raw`$foo=/b{3,}/,bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { fragment: 'b', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '{3,}', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.quantifier.regexp'], + }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + // quoted value allows unescaped $ + expect(String.raw`$foo='$',bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: "'$'", scopes: ['text.adblock', 'string.quoted.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + expect(String.raw`$foo='/b{3,}$/',bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: "'/b{3,}$/'", scopes: ['text.adblock', 'string.quoted.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + // regexp value allows unescaped $ (anchor) + expect(String.raw`$foo=/b{3,}$/,bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { fragment: 'b', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '{3,}', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.quantifier.regexp'], + }, + { + fragment: '$', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.control.anchor.regexp'], + }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + expect(String.raw`$foo=/(b{3,}|foo$)/,bar`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { + fragment: '(', + scopes: ['text.adblock', 'string.regexp.adblock', 'punctuation.definition.group.regexp'], + }, + { fragment: 'b', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '{3,}', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.quantifier.regexp'], + }, + { + fragment: '|', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.or.regexp'], + }, + { fragment: 'foo', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '$', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.control.anchor.regexp'], + }, + { + fragment: ')', + scopes: ['text.adblock', 'string.regexp.adblock', 'punctuation.definition.group.regexp'], + }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + // whitespace is allowed + expect('$foo = bar , baz').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'foo', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ' ', scopes: ['text.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: ' ', scopes: ['text.adblock'] }, + { fragment: 'bar', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: ' ', scopes: ['text.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: ' ', scopes: ['text.adblock'] }, + { fragment: 'baz', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + }); + + test('regular network rules with modifiers', () => { + expect('||example.com^$script').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + expect('||example.com^$~websocket,~xmlhttprequest').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: '~websocket', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '~xmlhttprequest', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + expect('||example.com^$script,__,3p').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '__', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '3p', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + }); + + test('regexp network rules with modifiers', () => { + expect(String.raw`/banner\d+/$image,third-party`).toBeTokenizedProperly( + tokenizer, + [ + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { fragment: 'banner', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '\\d', + scopes: ['text.adblock', 'string.regexp.adblock', 'constant.other.character-class.set.regexp'], + }, + { + fragment: '+', + scopes: ['text.adblock', 'string.regexp.adblock', 'keyword.operator.quantifier.regexp'], + }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'image', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'third-party', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + }); + + test('special modifiers with commas', () => { + // header modifier with regex containing comma and escaped characters + expect('*$script,header=via:/, 1\\.1 google$/').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '*', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'header', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'via', scopes: ['text.adblock', 'entity.name.tag.adblock'] }, + { fragment: ':', scopes: ['text.adblock', 'punctuation.separator.key-value.adblock'] }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.begin.regexp.adblock'], + }, + { fragment: ', 1\\.1 google$', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: '/', + scopes: ['text.adblock', 'punctuation.definition.string.end.regexp.adblock'], + }, + ], + ); + + // header modifier with literal value + expect('*$script,header=via:1.1 google').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '*', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'header', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'via', scopes: ['text.adblock', 'entity.name.tag.adblock'] }, + { fragment: ':', scopes: ['text.adblock', 'punctuation.separator.key-value.adblock'] }, + { fragment: '1.1 google', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + + // header modifier - just presence check + expect('*$script,header=via').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '*', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'header', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'via', scopes: ['text.adblock', 'entity.name.tag.adblock'] }, + ], + ); + + // replace modifier with detailed tokenization test + expect(String.raw`||example.org^$replace=/test/replacement/i`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.org', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'replace', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'test', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'replacement', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'i', scopes: ['text.adblock', 'keyword.other.adblock'] }, + ], + ); + + // replace modifier with $number references in replacement + expect(String.raw`||example.org^$replace=/(test)/\$1/`).toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.org', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'replace', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + fragment: '(', + scopes: ['text.adblock', 'string.regexp.adblock', 'punctuation.definition.group.regexp'], + }, + { fragment: 'test', scopes: ['text.adblock', 'string.regexp.adblock'] }, + { + fragment: ')', + scopes: ['text.adblock', 'string.regexp.adblock', 'punctuation.definition.group.regexp'], + }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { + fragment: '\\$', + scopes: ['text.adblock', 'string.unquoted.adblock', 'constant.character.escape.adblock'], + }, + { fragment: '1', scopes: ['text.adblock', 'string.unquoted.adblock', 'keyword.other.adblock'] }, + { fragment: '/', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + ], + ); + + // replace modifier with complex regexp and escaped slashes + const replaceExample = String.raw`||example.org^$replace=/()[\s\S]*<\/VAST>/\$1<\/VAST>/i`; + const replaceTokens = tokenizer(replaceExample); + + // Verify no invalid tokens + const replaceHasInvalid = replaceTokens.some((token) => token.scopes.some( + (scope) => scope.startsWith('invalid'), + )); + expect(replaceHasInvalid).toBe(false); + + // Verify key structural elements + const replaceScopes = replaceTokens.flatMap((t) => t.scopes); + expect(replaceScopes).toContain('keyword.other.adblock'); // replace keyword and modifiers + expect(replaceScopes).toContain('keyword.operator.adblock'); // slashes + expect(replaceScopes).toContain('string.regexp.adblock'); // regexp part + expect(replaceScopes).toContain('string.unquoted.adblock'); // replacement part + expect(replaceScopes).toContain('constant.character.escape.adblock'); // escaped characters + + // urlskip modifier with regex containing comma - verify it parses correctly + const urlskipExample = String.raw`||click.redditmail.com/CL0/` + + String.raw`$urlskip=/CL0\/.*?(www\.reddit\.com.+?)(?:\?|%3F)/ -uricomponent +https`; + const urlskipTokens = tokenizer(urlskipExample); + + // Verify no invalid tokens + const urlskipHasInvalid = urlskipTokens.some((token) => token.scopes.some( + (scope) => scope.startsWith('invalid'), + )); + expect(urlskipHasInvalid).toBe(false); + + // Verify key structural elements + const urlskipScopes = urlskipTokens.flatMap((t) => t.scopes); + expect(urlskipScopes).toContain('keyword.other.adblock'); + expect(urlskipScopes).toContain('keyword.operator.adblock'); + + // urlskip modifier with simple directives + expect('||example.com/path/to/tracker$urlskip=?url -base64').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com/path/to/tracker', scopes: ['text.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'urlskip', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '?url -base64', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + }); + + test('domain-like modifiers with special syntax', () => { + // Simple domain value + expect('||example.com^$script,domain=example.org').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'script', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: ',', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'domain', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'example.org', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + + // Negated domain with ~ + expect('||ads.js$domain=~example.com').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'ads.js', scopes: ['text.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'domain', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '~', scopes: ['text.adblock', 'keyword.operator.logical.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + + // Multiple domains with pipe separator + expect('||ads.js$domain=example.com|example.org|~test.com').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'ads.js', scopes: ['text.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'domain', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: '|', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'example.org', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: '|', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: '~', scopes: ['text.adblock', 'keyword.operator.logical.adblock'] }, + { fragment: 'test.com', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + + // Domain with regex value - verify structural elements + const domainRegexExample = '||example.com^$domain=/example[0-9]\\.(com|org)/|another.com'; + const domainRegexTokens = tokenizer(domainRegexExample); + + // Verify no invalid tokens + const domainRegexHasInvalid = domainRegexTokens.some((token) => token.scopes.some( + (scope) => scope.startsWith('invalid'), + )); + expect(domainRegexHasInvalid).toBe(false); + + // Verify key structural elements + const domainRegexScopes = domainRegexTokens.flatMap((t) => t.scopes); + expect(domainRegexScopes).toContain('keyword.other.adblock'); + expect(domainRegexScopes).toContain('keyword.operator.adblock'); + expect(domainRegexScopes).toContain('punctuation.definition.string.begin.regexp.adblock'); + expect(domainRegexScopes).toContain('string.regexp.adblock'); + + // Mixed: regex with pipe inside, followed by simple domain - check actual tokens + const mixedExample = '||ads.js$domain=/example[0-9]\\.(com|org)/|another.com'; + const mixedTokens = tokenizer(mixedExample); + + // Check that the value is properly tokenized with regex and domain parts + const mixedFragments = mixedTokens.map((t) => ({ + fragment: mixedExample.substring(t.startIndex, t.endIndex), + scopes: t.scopes, + })); + + // The entire value should not be just a string.unquoted.adblock + const valueTokens = mixedFragments.filter( + (t) => t.fragment.includes('example') || t.fragment.includes('another'), + ); + + // At least one token should have regex scopes + const hasRegexScopes = valueTokens.some( + (t) => t.scopes.some((s) => s.includes('regexp')), + ); + expect(hasRegexScopes).toBe(true); + + // Test 'to' modifier with multiple TLDs + expect('||example.com^$to=com|org|net').toBeTokenizedProperly( + tokenizer, + [ + { fragment: '||', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: 'example.com', scopes: ['text.adblock'] }, + { fragment: '^', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '$', scopes: ['text.adblock', 'keyword.control.adblock'] }, + { fragment: 'to', scopes: ['text.adblock', 'keyword.other.adblock'] }, + { fragment: '=', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'com', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: '|', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'org', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + { fragment: '|', scopes: ['text.adblock', 'keyword.operator.adblock'] }, + { fragment: 'net', scopes: ['text.adblock', 'string.unquoted.adblock'] }, + ], + ); + }); + + test('combined replace and domain modifiers', () => { + // Test replace and domain together in the same rule + const combinedExample = String.raw`||example.org^$replace=/()[\s\S]*<\/VAST>/` + + String.raw`\$1<\/VAST>/i,domain=/example[0-9]\.(com|org)/|another.com`; + const combinedTokens = tokenizer(combinedExample); + + // Verify no invalid tokens + const combinedHasInvalid = combinedTokens.some((token) => token.scopes.some( + (scope) => scope.startsWith('invalid'), + )); + expect(combinedHasInvalid).toBe(false); + + // Verify both modifiers have their key elements + const combinedScopes = combinedTokens.flatMap((t) => t.scopes); + + // Both modifiers should be present with their regex elements + expect(combinedScopes).toContain('keyword.operator.adblock'); // slashes and operators + expect(combinedScopes).toContain('string.regexp.adblock'); // regex content + }); + + test('complicated cases', () => { + // Test a complex filter with multiple modifiers including regex patterns + const complexFilter = String.raw`/^https:\/\/[a-z0-9]{2,}-[a-z0-9]{8}\.(?:com|nl)\/[a-z0-9-]+/` + + String.raw`[a-z0-9]{12}\b/$frame,3p,match-case,to=com|nl,` + + String.raw`ipaddress=/^(1(72\.67\.\d{3}|04\.21\.\d+)\.\d+|188\.114\.9[67]\.[08]|` + + String.raw`64:ff9b::[a-f0-9]{4}:[a-f0-9]{1,4})$/,` + + String.raw`replace='/^/