From 90a9181aaeb9ba8a248af8d8b497d9366e7f6c87 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Fri, 3 Jul 2026 22:36:59 -0400 Subject: [PATCH] fix(yara): reduce packaged malware-signature false positives (#236) Signed-off-by: Rod Boev --- .../nodes/analyzers/static_yara.py | 74 ++++++++--- src/skillspector/yara_rules/malware.yar | 125 ------------------ src/skillspector/yara_rules/malware.yar.b64 | 89 +++++++++++++ tests/nodes/analyzers/test_static_yara.py | 94 ++++++++++++- 4 files changed, 239 insertions(+), 143 deletions(-) delete mode 100644 src/skillspector/yara_rules/malware.yar create mode 100644 src/skillspector/yara_rules/malware.yar.b64 diff --git a/src/skillspector/nodes/analyzers/static_yara.py b/src/skillspector/nodes/analyzers/static_yara.py index 891caa0c..4ba899d6 100644 --- a/src/skillspector/nodes/analyzers/static_yara.py +++ b/src/skillspector/nodes/analyzers/static_yara.py @@ -22,8 +22,11 @@ from __future__ import annotations +import base64 +import binascii import hashlib from pathlib import Path +from tempfile import TemporaryDirectory import yara @@ -40,7 +43,8 @@ _BUILTIN_RULES_DIR = Path(__file__).resolve().parent.parent.parent / "yara_rules" -_RULE_EXTENSIONS = ("*.yar", "*.yara") +_RULE_EXTENSIONS = ("*.yar", "*.yara", "*.yar.b64", "*.yara.b64") +_ENCODED_RULE_SUFFIXES = (".yar.b64", ".yara.b64") _CATEGORY_MAP: dict[str, tuple[str, Severity]] = { "malware": ("YR1", Severity.CRITICAL), @@ -82,15 +86,47 @@ def _content_hash(rule_files: list[Path]) -> str: return h.hexdigest() -def _build_namespace_map(rule_files: list[Path]) -> dict[str, str]: - """Build a {namespace: filepath} dict from rule files, deduplicating namespace names.""" +def _rule_namespace(rule_file: Path) -> str: + """Derive a stable namespace from a rule file name.""" + for suffix in _ENCODED_RULE_SUFFIXES: + if rule_file.name.endswith(suffix): + return rule_file.name[: -len(suffix)] + return rule_file.stem + + +def _materialize_rule_file( + rule_file: Path, temp_dir: Path | None = None, namespace: str | None = None +) -> Path: + """Return a compile-ready rule path, decoding embedded sources when needed.""" + if not rule_file.name.endswith(_ENCODED_RULE_SUFFIXES): + return rule_file + if temp_dir is None: + raise ValueError("temp_dir is required for encoded rule files") + + encoded_source = rule_file.read_text(encoding="utf-8") + decoded_source = base64.b64decode("".join(encoded_source.split())).decode("utf-8") + temp_name = (namespace or _rule_namespace(rule_file)).replace("/", "__") + temp_file = temp_dir / f"{temp_name}.yar" + temp_file.write_text(decoded_source, encoding="utf-8") + return temp_file + + +def _build_namespace_map( + rule_files: list[Path], temp_dir: Path | None = None +) -> tuple[dict[str, str], int]: + """Build a {namespace: filepath} dict and count malformed encoded files.""" filepaths: dict[str, str] = {} + skipped = 0 for rf in rule_files: - ns = rf.stem + ns = _rule_namespace(rf) if ns in filepaths: - ns = f"{rf.parent.name}/{rf.stem}" - filepaths[ns] = str(rf) - return filepaths + ns = f"{rf.parent.name}/{ns}" + try: + filepaths[ns] = str(_materialize_rule_file(rf, temp_dir, ns)) + except (binascii.Error, UnicodeDecodeError) as exc: + skipped += 1 + logger.debug("%s: skipping malformed encoded rule %s: %s", ANALYZER_ID, rf, exc) + return filepaths, skipped def _compile_rules(filepaths: dict[str, str]) -> tuple[yara.Rules | None, int]: @@ -140,18 +176,22 @@ def _load_rules(extra_dir: Path | None = None) -> yara.Rules | None: if _compiled_rules is not None and _rules_hash == current_hash: return _compiled_rules - filepaths = _build_namespace_map(rule_files) - compiled, skipped = _compile_rules(filepaths) + with TemporaryDirectory() as temp_dir_name: + temp_dir = Path(temp_dir_name) + filepaths, materialize_skipped = _build_namespace_map(rule_files, temp_dir) - if compiled is None: - logger.warning("%s: failed to compile any YARA rules", ANALYZER_ID) - return None + compiled, compile_skipped = _compile_rules(filepaths) + skipped = materialize_skipped + compile_skipped + + if compiled is None: + logger.warning("%s: failed to compile any YARA rules", ANALYZER_ID) + return None - _compiled_rules = compiled - _rules_hash = current_hash - loaded = len(filepaths) - skipped - logger.info("%s: compiled %d YARA rule file(s) (%d skipped)", ANALYZER_ID, loaded, skipped) - return compiled + _compiled_rules = compiled + _rules_hash = current_hash + loaded = len(filepaths) - compile_skipped + logger.info("%s: compiled %d YARA rule file(s) (%d skipped)", ANALYZER_ID, loaded, skipped) + return compiled def _extract_match_strings(match: yara.Match) -> tuple[int, str | None]: diff --git a/src/skillspector/yara_rules/malware.yar b/src/skillspector/yara_rules/malware.yar deleted file mode 100644 index 97c2c456..00000000 --- a/src/skillspector/yara_rules/malware.yar +++ /dev/null @@ -1,125 +0,0 @@ -/* - Malware indicator rules for source code scanning. - Based on patterns from Neo23x0/signature-base and community research. - Covers reverse shells, backdoors, keyloggers, ransomware-like behavior, - and C2 framework indicators found in source/script files. -*/ - -rule reverse_shell -{ - meta: - description = "Reverse shell patterns in scripts or source code" - category = "malware" - severity = "CRITICAL" - confidence = "0.85" - reference = "https://github.com/Neo23x0/signature-base" - strings: - $bash_revshell = /bash\s+-i\s+>&\s*\/dev\/tcp\// nocase - $nc_shell = /nc\s.*-e\s*\/bin\/(ba)?sh/ nocase - $ncat_shell = /ncat\s.*-e\s*\/bin\/(ba)?sh/ nocase - $python_socket = /socket\.socket\(.*SOCK_STREAM.*\.connect\(/ - $perl_socket = /use\s+Socket;.*socket\s*\(\s*SOCK/ - $php_fsock = /fsockopen\s*\(.*exec\s*\(/ nocase - $ruby_tcpsocket = /TCPSocket\.\s*new\s*\(.*exec\s*\(/ - $powershell_tcp = /New-Object\s+System\.Net\.Sockets\.TCPClient/ nocase - $socat_shell = /socat\s+.*EXEC.*\/bin\/(ba)?sh/ nocase - $mkfifo_shell = /mkfifo\s+.*\|\s*\/bin\/(ba)?sh/ - condition: - any of them -} - -rule backdoor_persistence -{ - meta: - description = "Backdoor persistence with malicious payloads (shell commands, SSH key injection, hidden root users)" - category = "malware" - severity = "HIGH" - confidence = "0.75" - reference = "https://github.com/Neo23x0/signature-base" - strings: - $hidden_user = /useradd\s+.*-o\s+-u\s+0/ nocase - $cron_persist = /crontab\s.*(curl|wget|nc|bash|python)/ nocase - $ssh_inject = /echo\s+.*>>?\s*.*\.ssh\/authorized_keys/ nocase - $systemd_persist = /\[Service\].*ExecStart.*(nc|bash|python|curl)/ nocase - $bashrc_persist = /echo\s+.*>>?\s*.*\.bashrc/ nocase - $profile_persist = /echo\s+.*>>?\s*.*\.profile/ nocase - $init_persist = /\/etc\/init\.d\/.*(nc|bash|reverse)/ nocase - $ld_preload = /LD_PRELOAD.*\.so/ nocase - condition: - any of them -} - -rule keylogger_indicators -{ - meta: - description = "Keylogger functionality in scripts or source code" - category = "malware" - severity = "HIGH" - confidence = "0.7" - strings: - $pynput = /from\s+pynput\.keyboard\s+import/ nocase - $keyboard_hook = /keyboard\.(on_press|hook|on_release)/ nocase - $xinput_test = /xinput\s+test/ nocase - $logkeys = /logkeys\s+--start/ nocase - $keybd_event = /(GetAsyncKeyState|SetWindowsHookEx.*WH_KEYBOARD)/ nocase - condition: - any of them -} - -rule ransomware_behavior -{ - meta: - description = "Ransomware-like patterns (mass encryption, ransom notes)" - category = "malware" - severity = "CRITICAL" - confidence = "0.8" - strings: - $walk_encrypt = /os\.walk\s*\(.*\.(encrypt|cipher)/ - $ransom_note = /(your\s+files\s+(have\s+been|are)\s+encrypted|pay\s+.*bitcoin|send\s+.*btc)/ nocase - $ext_rename = /os\.rename\s*\(.*\+\s*['"]\.(locked|encrypted|crypt|enc)['"]\s*\)/ - $mass_overwrite = /os\.walk\s*\(.*open\s*\(.*['\"]wb['\"]\)/ - condition: - any of them -} - -rule c2_framework_indicators -{ - meta: - description = "Command-and-control framework indicators (Cobalt Strike, Metasploit, Sliver, etc.)" - category = "malware" - severity = "CRITICAL" - confidence = "0.85" - reference = "https://github.com/Neo23x0/signature-base" - strings: - $cobalt_strike = "cobaltstrike" nocase - $meterpreter = "meterpreter" nocase - $metasploit = /metasploit.*(payload|exploit|stager)/ nocase - $empire = /powershell.*empire/ nocase - $sliver_c2 = /sliver.*(implant|beacon|session)/ nocase - $covenant = /Covenant.*(Grunt|Listener)/ nocase - $havoc_c2 = /havoc.*(demon|teamserver)/ nocase - $beacon_config = /(BeaconType|C2Server|PublicKey.*watermark)/ nocase - condition: - any of them -} - -rule info_stealer -{ - meta: - description = "Information stealer patterns (credential harvesting, browser data theft)" - category = "malware" - severity = "HIGH" - confidence = "0.75" - reference = "https://github.com/Neo23x0/signature-base" - strings: - $chrome_login = /Chrome.*Login\s*Data/ nocase - $firefox_logins = /logins\.json.*firefox/ nocase - $browser_cookies = /Cookies.*(chrome|firefox|edge|opera)/ nocase - $wallet_steal = /wallet\.dat/ nocase - $mimikatz = "mimikatz" nocase - $lazagne = "lazagne" nocase - $cred_dump_sam = /reg\s+save\s+.*\\sam/ nocase - $ntds_dump = /ntds\.dit/ nocase - condition: - any of them -} diff --git a/src/skillspector/yara_rules/malware.yar.b64 b/src/skillspector/yara_rules/malware.yar.b64 new file mode 100644 index 00000000..d0b072f4 --- /dev/null +++ b/src/skillspector/yara_rules/malware.yar.b64 @@ -0,0 +1,89 @@ +LyoNCiAgICBNYWx3YXJlIGluZGljYXRvciBydWxlcyBmb3Igc291cmNlIGNvZGUgc2Nhbm5pbmcu +DQogICAgQmFzZWQgb24gcGF0dGVybnMgZnJvbSBOZW8yM3gwL3NpZ25hdHVyZS1iYXNlIGFuZCBj +b21tdW5pdHkgcmVzZWFyY2guDQogICAgQ292ZXJzIHJldmVyc2Ugc2hlbGxzLCBiYWNrZG9vcnMs +IGtleWxvZ2dlcnMsIHJhbnNvbXdhcmUtbGlrZSBiZWhhdmlvciwNCiAgICBhbmQgQzIgZnJhbWV3 +b3JrIGluZGljYXRvcnMgZm91bmQgaW4gc291cmNlL3NjcmlwdCBmaWxlcy4NCiovDQoNCnJ1bGUg +cmV2ZXJzZV9zaGVsbA0Kew0KICAgIG1ldGE6DQogICAgICAgIGRlc2NyaXB0aW9uID0gIlJldmVy +c2Ugc2hlbGwgcGF0dGVybnMgaW4gc2NyaXB0cyBvciBzb3VyY2UgY29kZSINCiAgICAgICAgY2F0 +ZWdvcnkgPSAibWFsd2FyZSINCiAgICAgICAgc2V2ZXJpdHkgPSAiQ1JJVElDQUwiDQogICAgICAg +IGNvbmZpZGVuY2UgPSAiMC44NSINCiAgICAgICAgcmVmZXJlbmNlID0gImh0dHBzOi8vZ2l0aHVi +LmNvbS9OZW8yM3gwL3NpZ25hdHVyZS1iYXNlIg0KICAgIHN0cmluZ3M6DQogICAgICAgICRiYXNo +X3JldnNoZWxsICAgID0gL2Jhc2hccystaVxzKz4mXHMqXC9kZXZcL3RjcFwvLyBub2Nhc2UNCiAg +ICAgICAgJG5jX3NoZWxsICAgICAgICAgPSAvbmNccy4qLWVccypcL2JpblwvKGJhKT9zaC8gbm9j +YXNlDQogICAgICAgICRuY2F0X3NoZWxsICAgICAgID0gL25jYXRccy4qLWVccypcL2JpblwvKGJh +KT9zaC8gbm9jYXNlDQogICAgICAgICRweXRob25fc29ja2V0ICAgID0gL3NvY2tldFwuc29ja2V0 +XCguKlNPQ0tfU1RSRUFNLipcLmNvbm5lY3RcKC8NCiAgICAgICAgJHBlcmxfc29ja2V0ICAgICAg +PSAvdXNlXHMrU29ja2V0Oy4qc29ja2V0XHMqXChccypTT0NLLw0KICAgICAgICAkcGhwX2Zzb2Nr +ICAgICAgICA9IC9mc29ja29wZW5ccypcKC4qZXhlY1xzKlwoLyBub2Nhc2UNCiAgICAgICAgJHJ1 +YnlfdGNwc29ja2V0ICAgPSAvVENQU29ja2V0XC5ccypuZXdccypcKC4qZXhlY1xzKlwoLw0KICAg +ICAgICAkcG93ZXJzaGVsbF90Y3AgICA9IC9OZXctT2JqZWN0XHMrU3lzdGVtXC5OZXRcLlNvY2tl +dHNcLlRDUENsaWVudC8gbm9jYXNlDQogICAgICAgICRzb2NhdF9zaGVsbCAgICAgID0gL3NvY2F0 +XHMrLipFWEVDLipcL2JpblwvKGJhKT9zaC8gbm9jYXNlDQogICAgICAgICRta2ZpZm9fc2hlbGwg +ICAgID0gL21rZmlmb1xzKy4qXHxccypcL2JpblwvKGJhKT9zaC8NCiAgICBjb25kaXRpb246DQog +ICAgICAgIGFueSBvZiB0aGVtDQp9DQoNCnJ1bGUgYmFja2Rvb3JfcGVyc2lzdGVuY2UNCnsNCiAg +ICBtZXRhOg0KICAgICAgICBkZXNjcmlwdGlvbiA9ICJCYWNrZG9vciBwZXJzaXN0ZW5jZSB3aXRo +IG1hbGljaW91cyBwYXlsb2FkcyAoc2hlbGwgY29tbWFuZHMsIFNTSCBrZXkgaW5qZWN0aW9uLCBo +aWRkZW4gcm9vdCB1c2VycykiDQogICAgICAgIGNhdGVnb3J5ID0gIm1hbHdhcmUiDQogICAgICAg +IHNldmVyaXR5ID0gIkhJR0giDQogICAgICAgIGNvbmZpZGVuY2UgPSAiMC43NSINCiAgICAgICAg +cmVmZXJlbmNlID0gImh0dHBzOi8vZ2l0aHViLmNvbS9OZW8yM3gwL3NpZ25hdHVyZS1iYXNlIg0K +ICAgIHN0cmluZ3M6DQogICAgICAgICRoaWRkZW5fdXNlciAgICAgICA9IC91c2VyYWRkXHMrLiot +b1xzKy11XHMrMC8gbm9jYXNlDQogICAgICAgICRjcm9uX3BlcnNpc3QgICAgICA9IC9jcm9udGFi +XHMuKihjdXJsfHdnZXR8bmN8YmFzaHxweXRob24pLyBub2Nhc2UNCiAgICAgICAgJHNzaF9pbmpl +Y3QgICAgICAgID0gL2VjaG9ccysuKj4+P1xzKi4qXC5zc2hcL2F1dGhvcml6ZWRfa2V5cy8gbm9j +YXNlDQogICAgICAgICRzeXN0ZW1kX3BlcnNpc3QgICA9IC9cW1NlcnZpY2VcXS4qRXhlY1N0YXJ0 +LioobmN8YmFzaHxweXRob258Y3VybCkvIG5vY2FzZQ0KICAgICAgICAkYmFzaHJjX3BlcnNpc3Qg +ICAgPSAvZWNob1xzKy4qPj4/XHMqLipcLmJhc2hyYy8gbm9jYXNlDQogICAgICAgICRwcm9maWxl +X3BlcnNpc3QgICA9IC9lY2hvXHMrLio+Pj9ccyouKlwucHJvZmlsZS8gbm9jYXNlDQogICAgICAg +ICRpbml0X3BlcnNpc3QgICAgICA9IC9cL2V0Y1wvaW5pdFwuZFwvLioobmN8YmFzaHxyZXZlcnNl +KS8gbm9jYXNlDQogICAgICAgICRsZF9wcmVsb2FkICAgICAgICA9IC9MRF9QUkVMT0FELipcLnNv +LyBub2Nhc2UNCiAgICBjb25kaXRpb246DQogICAgICAgIGFueSBvZiB0aGVtDQp9DQoNCnJ1bGUg +a2V5bG9nZ2VyX2luZGljYXRvcnMNCnsNCiAgICBtZXRhOg0KICAgICAgICBkZXNjcmlwdGlvbiA9 +ICJLZXlsb2dnZXIgZnVuY3Rpb25hbGl0eSBpbiBzY3JpcHRzIG9yIHNvdXJjZSBjb2RlIg0KICAg +ICAgICBjYXRlZ29yeSA9ICJtYWx3YXJlIg0KICAgICAgICBzZXZlcml0eSA9ICJISUdIIg0KICAg +ICAgICBjb25maWRlbmNlID0gIjAuNyINCiAgICBzdHJpbmdzOg0KICAgICAgICAkcHlucHV0ICAg +ICAgICAgPSAvZnJvbVxzK3B5bnB1dFwua2V5Ym9hcmRccytpbXBvcnQvIG5vY2FzZQ0KICAgICAg +ICAka2V5Ym9hcmRfaG9vayAgPSAva2V5Ym9hcmRcLihvbl9wcmVzc3xob29rfG9uX3JlbGVhc2Up +LyBub2Nhc2UNCiAgICAgICAgJHhpbnB1dF90ZXN0ICAgID0gL3hpbnB1dFxzK3Rlc3QvIG5vY2Fz +ZQ0KICAgICAgICAkbG9na2V5cyAgICAgICAgPSAvbG9na2V5c1xzKy0tc3RhcnQvIG5vY2FzZQ0K +ICAgICAgICAka2V5YmRfZXZlbnQgICAgPSAvKEdldEFzeW5jS2V5U3RhdGV8U2V0V2luZG93c0hv +b2tFeC4qV0hfS0VZQk9BUkQpLyBub2Nhc2UNCiAgICBjb25kaXRpb246DQogICAgICAgIGFueSBv +ZiB0aGVtDQp9DQoNCnJ1bGUgcmFuc29td2FyZV9iZWhhdmlvcg0Kew0KICAgIG1ldGE6DQogICAg +ICAgIGRlc2NyaXB0aW9uID0gIlJhbnNvbXdhcmUtbGlrZSBwYXR0ZXJucyAobWFzcyBlbmNyeXB0 +aW9uLCByYW5zb20gbm90ZXMpIg0KICAgICAgICBjYXRlZ29yeSA9ICJtYWx3YXJlIg0KICAgICAg +ICBzZXZlcml0eSA9ICJDUklUSUNBTCINCiAgICAgICAgY29uZmlkZW5jZSA9ICIwLjgiDQogICAg +c3RyaW5nczoNCiAgICAgICAgJHdhbGtfZW5jcnlwdCAgID0gL29zXC53YWxrXHMqXCguKlwuKGVu +Y3J5cHR8Y2lwaGVyKS8NCiAgICAgICAgJHJhbnNvbV9ub3RlICAgID0gLyh5b3VyXHMrZmlsZXNc +cysoaGF2ZVxzK2JlZW58YXJlKVxzK2VuY3J5cHRlZHxwYXlccysuKmJpdGNvaW58c2VuZFxzKy4q +YnRjKS8gbm9jYXNlDQogICAgICAgICRleHRfcmVuYW1lICAgICA9IC9vc1wucmVuYW1lXHMqXCgu +KlwrXHMqWyciXVwuKGxvY2tlZHxlbmNyeXB0ZWR8Y3J5cHR8ZW5jKVsnIl1ccypcKS8NCiAgICAg +ICAgJG1hc3Nfb3ZlcndyaXRlID0gL29zXC53YWxrXHMqXCguKm9wZW5ccypcKC4qWydcIl13Ylsn +XCJdXCkvDQogICAgY29uZGl0aW9uOg0KICAgICAgICBhbnkgb2YgdGhlbQ0KfQ0KDQpydWxlIGMy +X2ZyYW1ld29ya19pbmRpY2F0b3JzDQp7DQogICAgbWV0YToNCiAgICAgICAgZGVzY3JpcHRpb24g +PSAiQ29tbWFuZC1hbmQtY29udHJvbCBmcmFtZXdvcmsgaW5kaWNhdG9ycyAoQ29iYWx0IFN0cmlr +ZSwgTWV0YXNwbG9pdCwgU2xpdmVyLCBldGMuKSINCiAgICAgICAgY2F0ZWdvcnkgPSAibWFsd2Fy +ZSINCiAgICAgICAgc2V2ZXJpdHkgPSAiQ1JJVElDQUwiDQogICAgICAgIGNvbmZpZGVuY2UgPSAi +MC44NSINCiAgICAgICAgcmVmZXJlbmNlID0gImh0dHBzOi8vZ2l0aHViLmNvbS9OZW8yM3gwL3Np +Z25hdHVyZS1iYXNlIg0KICAgIHN0cmluZ3M6DQogICAgICAgICRjb2JhbHRfc3RyaWtlICA9ICJj +b2JhbHRzdHJpa2UiIG5vY2FzZQ0KICAgICAgICAkbWV0ZXJwcmV0ZXIgICAgPSAibWV0ZXJwcmV0 +ZXIiIG5vY2FzZQ0KICAgICAgICAkbWV0YXNwbG9pdCAgICAgPSAvbWV0YXNwbG9pdC4qKHBheWxv +YWR8ZXhwbG9pdHxzdGFnZXIpLyBub2Nhc2UNCiAgICAgICAgJGVtcGlyZSAgICAgICAgID0gL3Bv +d2Vyc2hlbGwuKmVtcGlyZS8gbm9jYXNlDQogICAgICAgICRzbGl2ZXJfYzIgICAgICA9IC9zbGl2 +ZXIuKihpbXBsYW50fGJlYWNvbnxzZXNzaW9uKS8gbm9jYXNlDQogICAgICAgICRjb3ZlbmFudCAg +ICAgICA9IC9Db3ZlbmFudC4qKEdydW50fExpc3RlbmVyKS8gbm9jYXNlDQogICAgICAgICRoYXZv +Y19jMiAgICAgICA9IC9oYXZvYy4qKGRlbW9ufHRlYW1zZXJ2ZXIpLyBub2Nhc2UNCiAgICAgICAg +JGJlYWNvbl9jb25maWcgID0gLyhCZWFjb25UeXBlfEMyU2VydmVyfFB1YmxpY0tleS4qd2F0ZXJt +YXJrKS8gbm9jYXNlDQogICAgY29uZGl0aW9uOg0KICAgICAgICBhbnkgb2YgdGhlbQ0KfQ0KDQpy +dWxlIGluZm9fc3RlYWxlcg0Kew0KICAgIG1ldGE6DQogICAgICAgIGRlc2NyaXB0aW9uID0gIklu +Zm9ybWF0aW9uIHN0ZWFsZXIgcGF0dGVybnMgKGNyZWRlbnRpYWwgaGFydmVzdGluZywgYnJvd3Nl +ciBkYXRhIHRoZWZ0KSINCiAgICAgICAgY2F0ZWdvcnkgPSAibWFsd2FyZSINCiAgICAgICAgc2V2 +ZXJpdHkgPSAiSElHSCINCiAgICAgICAgY29uZmlkZW5jZSA9ICIwLjc1Ig0KICAgICAgICByZWZl +cmVuY2UgPSAiaHR0cHM6Ly9naXRodWIuY29tL05lbzIzeDAvc2lnbmF0dXJlLWJhc2UiDQogICAg +c3RyaW5nczoNCiAgICAgICAgJGNocm9tZV9sb2dpbiAgICAgPSAvQ2hyb21lLipMb2dpblxzKkRh +dGEvIG5vY2FzZQ0KICAgICAgICAkZmlyZWZveF9sb2dpbnMgICA9IC9sb2dpbnNcLmpzb24uKmZp +cmVmb3gvIG5vY2FzZQ0KICAgICAgICAkYnJvd3Nlcl9jb29raWVzICA9IC9Db29raWVzLiooY2hy +b21lfGZpcmVmb3h8ZWRnZXxvcGVyYSkvIG5vY2FzZQ0KICAgICAgICAkd2FsbGV0X3N0ZWFsICAg +ICA9IC93YWxsZXRcLmRhdC8gbm9jYXNlDQogICAgICAgICRtaW1pa2F0eiAgICAgICAgID0gIm1p +bWlrYXR6IiBub2Nhc2UNCiAgICAgICAgJGxhemFnbmUgICAgICAgICAgPSAibGF6YWduZSIgbm9j +YXNlDQogICAgICAgICRjcmVkX2R1bXBfc2FtICAgID0gL3JlZ1xzK3NhdmVccysuKlxcc2FtLyBu +b2Nhc2UNCiAgICAgICAgJG50ZHNfZHVtcCAgICAgICAgPSAvbnRkc1wuZGl0LyBub2Nhc2UNCiAg +ICBjb25kaXRpb246DQogICAgICAgIGFueSBvZiB0aGVtDQp9DQo= diff --git a/tests/nodes/analyzers/test_static_yara.py b/tests/nodes/analyzers/test_static_yara.py index c684533e..89d20389 100644 --- a/tests/nodes/analyzers/test_static_yara.py +++ b/tests/nodes/analyzers/test_static_yara.py @@ -21,6 +21,7 @@ from __future__ import annotations +import base64 from pathlib import Path import pytest @@ -80,6 +81,10 @@ def _run_builtin(content: str, filename: str = "skill.py") -> list: return static_yara.node(state)["findings"] +def _reverse_shell_fixture() -> str: + return base64.b64decode("YmFzaCAtaSA+JiAvZGV2L3RjcC8xMjcuMC4wLjEvNDQ0NCAwPiYx").decode() + + def _has_rule(findings: list, rule_name: str) -> bool: """Return True when a finding message references a specific YARA rule.""" return any(rule_name in f.message for f in findings) @@ -284,6 +289,32 @@ def test_no_rules_dir_uses_builtin(self): assert rules is not None +class TestBuiltInMalwarePackaging: + def test_builtin_malware_finding_preserved(self): + findings = _run_builtin( + _reverse_shell_fixture(), + "shell.sh", + ) + assert _has_rule(findings, "reverse_shell") + assert any(f.rule_id == "YR1" for f in findings) + + def test_extra_rules_still_match_with_builtin_malware_representation(self, tmp_path): + _write_rule( + tmp_path, + "extra_marker", + category="hack_tool", + severity="MEDIUM", + strings={"a": "EXTRA_MARKER"}, + ) + findings = _run( + f"EXTRA_MARKER\n{_reverse_shell_fixture()}", + "bundle.sh", + str(tmp_path), + ) + assert _has_rule(findings, "extra_marker") + assert _has_rule(findings, "reverse_shell") + + # ── Built-in agent skill rules ──────────────────────────────────────── @@ -401,11 +432,16 @@ class TestHelpers: def test_collect_rule_files_finds_yar(self, tmp_path): (tmp_path / "a.yar").write_text("rule a { condition: false }") (tmp_path / "b.yara").write_text("rule b { condition: false }") + encoded = base64.b64encode(b"rule d { condition: false }").decode() + (tmp_path / "d.yar.b64").write_text(encoded) + (tmp_path / "e.yara.b64").write_text(encoded) (tmp_path / "c.txt").write_text("not a rule") files = static_yara._collect_rule_files(tmp_path) names = {f.name for f in files} assert "a.yar" in names assert "b.yara" in names + assert "d.yar.b64" in names + assert "e.yara.b64" in names assert "c.txt" not in names def test_collect_rule_files_nonexistent_dir(self, tmp_path): @@ -416,9 +452,65 @@ def test_build_namespace_map(self, tmp_path): (tmp_path / "alpha.yar").write_text("") (tmp_path / "beta.yar").write_text("") files = sorted(tmp_path.glob("*.yar")) - ns_map = static_yara._build_namespace_map(files) + ns_map, skipped = static_yara._build_namespace_map(files) assert "alpha" in ns_map assert "beta" in ns_map + assert skipped == 0 + + def test_build_namespace_map_decodes_encoded_rules(self, tmp_path): + encoded_source = base64.b64encode(b"rule encoded { condition: false }").decode() + encoded_file = tmp_path / "encoded.yar.b64" + encoded_file.write_text(encoded_source) + ns_map, skipped = static_yara._build_namespace_map([encoded_file], tmp_path) + decoded_path = Path(ns_map["encoded"]) + assert decoded_path.read_text() == "rule encoded { condition: false }" + assert skipped == 0 + + def test_build_namespace_map_keeps_encoded_namespace_collisions_apart(self, tmp_path): + first_dir = tmp_path / "builtin" + second_dir = tmp_path / "extra" + materialized_dir = tmp_path / "materialized" + first_dir.mkdir() + second_dir.mkdir() + materialized_dir.mkdir() + first_file = first_dir / "malware.yar.b64" + second_file = second_dir / "malware.yar.b64" + first_file.write_text(base64.b64encode(b"rule first { condition: false }").decode()) + second_file.write_text(base64.b64encode(b"rule second { condition: false }").decode()) + + ns_map, skipped = static_yara._build_namespace_map( + [first_file, second_file], materialized_dir + ) + + first_path = Path(ns_map["malware"]) + second_path = Path(ns_map["extra/malware"]) + assert first_path != second_path + assert first_path.read_text() == "rule first { condition: false }" + assert second_path.read_text() == "rule second { condition: false }" + assert skipped == 0 + + def test_build_namespace_map_skips_malformed_encoded_rules(self, tmp_path): + valid_file = tmp_path / "valid.yar.b64" + invalid_file = tmp_path / "invalid.yar.b64" + valid_file.write_text(base64.b64encode(b"rule valid { condition: false }").decode()) + invalid_file.write_text("not base64") + + ns_map, skipped = static_yara._build_namespace_map([valid_file, invalid_file], tmp_path) + + assert "valid" in ns_map + assert "invalid" not in ns_map + assert skipped == 1 + + def test_malformed_extra_encoded_rule_does_not_block_builtin_rules(self, tmp_path): + (tmp_path / "bad.yar.b64").write_text("not base64") + + findings = _run( + _reverse_shell_fixture(), + "shell.sh", + str(tmp_path), + ) + + assert _has_rule(findings, "reverse_shell") def test_content_hash_deterministic(self, tmp_path): (tmp_path / "r.yar").write_text("rule r { condition: false }")