diff --git a/pyplugins/config_patchers/base_patch.py b/pyplugins/config_patchers/base_patch.py new file mode 100644 index 000000000..7e67c211d --- /dev/null +++ b/pyplugins/config_patchers/base_patch.py @@ -0,0 +1,182 @@ +import os +from penguin.static_plugin import ConfigPatcherPlugin +from penguin import getColoredLogger +from penguin.arch import arch_end +from penguin.defaults import default_init_script, default_plugins, static_dir as STATIC_DIR +from penguin.utils import get_arch_subdir + +logger = getColoredLogger("penguin.config_patchers") + +import penguin +RESOURCES = os.path.join(os.path.dirname(penguin.__file__), "resources") + +class BasePatch(ConfigPatcherPlugin): + """ + Generate base config for static_files and default plugins + """ + depends_on = ['ArchId', 'InitFinder', 'KernelVersionFinder'] + UNKNOWN_INIT: str = "UNKNOWN_FIX_ME" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = "base" + + def generate(self, patches: dict) -> dict: + arch_info = self.prior_results.get('ArchId') + inits = self.prior_results.get('InitFinder', []) + self.kernel_versions = self.prior_results.get('KernelVersionFinder', {"selected_kernel": ""}) + + self.set_arch_info(arch_info) + + if len(inits): + self.igloo_init = inits[0] + else: + self.igloo_init = self.UNKNOWN_INIT + logger.warning("Failed to find any init programs - config will need manual refinement") + + if 'mips' in self.arch_name or self.arch_name == "intel64": + igloo_serial_major = 4 + igloo_serial_minor = 65 + elif self.arch_name in ['armel', 'aarch64']: + igloo_serial_major = 204 + igloo_serial_minor = 65 + elif "powerpc" in self.arch_name: + igloo_serial_major = 229 + igloo_serial_minor = 1 + elif self.arch_name == "loongarch64": + igloo_serial_major = 4 + igloo_serial_minor = 65 + else: + igloo_serial_major = 204 + igloo_serial_minor = 65 + + result = { + "core": { + "arch": self.arch_name, + "kernel": self.kernel_versions["selected_kernel"], + }, + "env": { + "igloo_init": self.igloo_init, + }, + "pseudofiles": { + "/dev/ttyS1": { + "read": { + "model": "zero", + }, + "write": { + "model": "discard", + }, + "ioctl": { + "*": { + "model": "return_const", + "val": 0, + } + } + }, + "/dev/ttyAMA1": { + "read": { + "model": "zero", + }, + "write": { + "model": "discard", + }, + "ioctl": { + "*": { + "model": "return_const", + "val": 0, + } + } + } + }, + "static_files": { + "/igloo/init": { + "type": "inline_file", + "contents": default_init_script, + "mode": 0o111, + }, + "/igloo/utils/sh": { + "type": "symlink", + "target": "/igloo/utils/busybox", + }, + "/igloo/utils/sleep": { + "type": "symlink", + "target": "/igloo/utils/busybox", + }, + "/igloo/ltrace/*": { + "type": "host_file", + "mode": 0o444, + "host_path": os.path.join(*[STATIC_DIR, "ltrace", "*"]), + }, + "/igloo/dylibs/*": { + "type": "host_file", + "mode": 0o755, + "host_path": os.path.join(STATIC_DIR, "dylibs", self.dylib_dir or self.arch_dir, "*"), + }, + "/igloo/source.d/*": { + "type": "host_file", + "mode": 0o755, + "host_path": os.path.join(*[RESOURCES, "source.d", "*"]), + }, + "/igloo/serial": { + "type": "dev", + "devtype": "char", + "major": igloo_serial_major, + "minor": igloo_serial_minor, + "mode": 0o666, + } + }, + "plugins": default_plugins, + } + + guest_scripts_dir = os.path.join(STATIC_DIR, "guest-utils", "scripts") + for f in os.listdir(guest_scripts_dir): + result["static_files"][f"/igloo/utils/{f}"] = { + "type": "host_file", + "host_path": f"{guest_scripts_dir}/{f}", + "mode": 0o755, + } + result["static_files"]["/igloo/utils/*"] = { + "type": "host_file", + "host_path": f"{STATIC_DIR}/{self.arch_dir}/*", + "mode": 0o755, + } + + return result + + def set_arch_info(self, arch_identified: str) -> None: + arch, endian = arch_end(arch_identified) + if arch is None: + raise NotImplementedError(f"Architecture {arch_identified} not supported ({arch}, {endian})") + + if arch == "aarch64": + self.arch_name = "aarch64" + elif arch == "intel64": + self.arch_name = "intel64" + elif arch == "loongarch64": + self.arch_name = "loongarch64" + elif arch == "riscv64": + self.arch_name = "riscv64" + elif arch == "powerpc": + self.arch_name = "powerpc" + elif arch == "powerpc64": + if endian == "el": + self.arch_name = "powerpc64le" + else: + self.arch_name = "powerpc64" + else: + self.arch_name = arch + endian + + mock_config = {"core": {"arch": self.arch_name}} + self.arch_dir = get_arch_subdir(mock_config) + + if arch_identified == "aarch64": + self.dylib_dir = "arm64" + elif arch_identified == "intel64": + self.dylib_dir = "x86_64" + elif arch_identified == "loongarch64": + self.dylib_dir = "loongarch" + elif "powerpc" in self.arch_name: + self.dylib_dir = self.arch_name.replace("powerpc", "ppc") + else: + self.dylib_dir = self.arch_dir diff --git a/pyplugins/config_patchers/delete_files.py b/pyplugins/config_patchers/delete_files.py new file mode 100644 index 000000000..d20a141e5 --- /dev/null +++ b/pyplugins/config_patchers/delete_files.py @@ -0,0 +1,18 @@ +import os +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin + +class DeleteFiles(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.delete_files" + self.enabled = True + + def generate(self, patches: dict) -> dict: + result = defaultdict(dict) + for f in ["/etc/securetty", "/etc/scripts/sys_resetbutton"]: + if os.path.isfile(os.path.join(self.extracted_fs, f[1:])): + result["static_files"][f] = { + "type": "delete", + } + return result diff --git a/pyplugins/config_patchers/dynamic_exploration.py b/pyplugins/config_patchers/dynamic_exploration.py new file mode 100644 index 000000000..db1b9c644 --- /dev/null +++ b/pyplugins/config_patchers/dynamic_exploration.py @@ -0,0 +1,28 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class DynamicExploration(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "auto_explore" + self.enabled = False + + def generate(self, patches: dict) -> dict: + return { + "core": { + "root_shell": False, + }, + "plugins": { + "nmap": { + "enabled": True, + }, + "vpn": { + "enabled": True, + "log": True, + }, + "netbinds": + { + "enabled": True, + "shutdown_on_www": False, + }, + } + } diff --git a/pyplugins/config_patchers/file_helper.py b/pyplugins/config_patchers/file_helper.py new file mode 100644 index 000000000..a35af7378 --- /dev/null +++ b/pyplugins/config_patchers/file_helper.py @@ -0,0 +1,67 @@ +import os +import subprocess +import re +from pathlib import Path + +class FileHelper: + @staticmethod + def find_executables(tmp_dir: str, target_dirs: set[str] | None = None): + if not target_dirs: + target_dirs = {"/"} + for root, _, files in os.walk(tmp_dir): + if "/igloo" in root: + continue + for file in files: + file_path = Path(root) / file + if ( + file_path.is_file() + and os.access(file_path, os.X_OK) + and any(str(file_path).endswith(d) for d in target_dirs) + ): + yield file_path + + @staticmethod + def find_strings_in_file(file_path: str, pattern: str) -> list[str]: + result = subprocess.run(["strings", file_path], capture_output=True, text=True) + return [line for line in result.stdout.splitlines() if re.search(pattern, line)] + + @staticmethod + def find_shell_scripts(tmp_dir: str): + for root, _, files in os.walk(tmp_dir): + if "/igloo" in root: + continue + for file in files: + file_path = Path(root) / file + if ( + file_path.is_file() + and os.access(file_path, os.X_OK) + and str(file_path).endswith(".sh") + ): + yield file_path + + @staticmethod + def exists(tmp_dir: str, target: str) -> bool: + assert target.startswith("/") + assert os.path.exists(tmp_dir) + + target = target[1:] + parts = target.split("/") + + current_path = tmp_dir + + for part in parts: + next_path = os.path.join(current_path, part) + + if os.path.islink(next_path): + resolved = os.readlink(next_path) + if resolved.startswith("/"): + current_path = os.path.realpath(os.path.join(tmp_dir, resolved[1:])) + else: + current_path = os.path.realpath(os.path.join(current_path, resolved)) + else: + current_path = next_path + + if not os.path.exists(current_path): + return False + + return os.path.exists(current_path) diff --git a/pyplugins/config_patchers/force_www.py b/pyplugins/config_patchers/force_www.py new file mode 100644 index 000000000..8c87665ff --- /dev/null +++ b/pyplugins/config_patchers/force_www.py @@ -0,0 +1,64 @@ +import os +from penguin.static_plugin import ConfigPatcherPlugin + +class ForceWWW(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = False + self.patch_name = 'force_www' + + def generate(self, patches: dict) -> dict | None: + file2cmd = { + "./etc/init.d/uhttpd": "/etc/init.d/uhttpd start", + "./usr/bin/httpd": "/usr/bin/httpd", + "./usr/sbin/httpd": "/usr/sbin/httpd", + "./bin/goahead": "/bin/goahead", + "./bin/alphapd": "/bin/alphapd", + "./bin/boa": "/bin/boa", + "./usr/sbin/lighttpd": "/usr/sbin/lighttpd -f /etc/lighttpd/lighttpd.conf", + } + + www_cmds = [] + www_paths = [] + + have_lighttpd_conf = os.path.isfile(os.path.join(self.extracted_fs, "./etc/lighttpd/lighttpd.conf")) + + for file, cmd in file2cmd.items(): + if os.path.isfile(os.path.join(self.extracted_fs, file)): + if file == "./usr/sbin/lighttpd" and not have_lighttpd_conf: + continue + www_cmds.append(cmd) + www_paths.append(file) + + if not len(www_cmds): + return + + cmd_str = """#!/igloo/utils/sh + /igloo/utils/busybox sleep 120 + + while true; do + """ + + for cmd in www_cmds: + cmd_str += f""" + if ! (/igloo/utils/busybox ps | /igloo/utils/busybox grep -v grep | /igloo/utils/busybox grep -sqi "{cmd}"); then + {cmd} & + fi + """ + cmd_str += """ + /igloo/utils/busybox sleep 30 + done + """ + + return { + "core": { + 'force_www': True + }, + "static_files": { + "/igloo/utils/www_cmds": { + "type": "inline_file", + "contents": cmd_str, + "mode": 0o755, + } + } + } diff --git a/pyplugins/config_patchers/generate_missing_dirs.py b/pyplugins/config_patchers/generate_missing_dirs.py new file mode 100644 index 000000000..176af827d --- /dev/null +++ b/pyplugins/config_patchers/generate_missing_dirs.py @@ -0,0 +1,113 @@ +import os +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class GenerateMissingDirs(ConfigPatcherPlugin): + TARGET_DIRECTORIES: list[str] = [ + "/proc", + "/etc_ro", + "/tmp", + "/var", + "/run", + "/sys", + "/root", + "/tmp/var", + "/tmp/media", + "/tmp/etc", + "/tmp/var/run", + "/tmp/home", + "/tmp/home/root", + "/tmp/mnt", + "/tmp/opt", + "/tmp/www", + "/var/run", + "/var/lock", + "/usr/bin", + "/usr/sbin", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.missing_dirs" + self.enabled = True + + @staticmethod + def _resolve_path(d: str, symlinks: dict, depth: int = 0) -> str: + parts = d.split("/") + for i in range(len(parts), 1, -1): + sub_path = "/".join(parts[:i]) + if sub_path in symlinks: + if depth > 10 or d == symlinks[sub_path]: + logger.warning(f"Symlink loop detected for {d}") + return d + return GenerateMissingDirs._resolve_path( + d.replace(sub_path, symlinks[sub_path], 1), symlinks, depth=depth+1 + ) + if not d.startswith("/"): + d = "/" + d + + if d in symlinks: + if depth > 10 or d == symlinks[d]: + logger.warning(f"Symlink loop detected for {d}") + return d + else: + return GenerateMissingDirs._resolve_path(symlinks[d], symlinks, depth=depth+1) + + return d + + def generate(self, patches: dict) -> dict: + from .tar_helper import TarHelper + symlinks = TarHelper.get_symlink_members(self.fs_archive) + archive_files = {member.name[1:] for member in TarHelper.get_all_members(self.fs_archive)} + result = defaultdict(dict) + + for d in self.TARGET_DIRECTORIES: + resolved_path = self._resolve_path(d, symlinks) + if ".." in resolved_path.split("/"): + resolved_path = os.path.normpath(resolved_path) + + if ".." in resolved_path.split("/"): + logger.debug("Skipping directory with .. in path: " + resolved_path) + continue + + while resolved_path.endswith("/"): + resolved_path = resolved_path[:-1] + + if resolved_path == ".": + continue + + if resolved_path.endswith("/."): + resolved_path = resolved_path[:-2] + + for i in range(1, len(resolved_path.split("/"))): + parent = "/".join(resolved_path.split("/")[:i]) + if parent in symlinks: + logger.debug( + f"Skipping {resolved_path} because parent {parent} is a symlink" + ) + continue + + while "/./" in resolved_path: + resolved_path = resolved_path.replace("/./", "/") + while "//" in resolved_path: + resolved_path = resolved_path.replace("//", "/") + while resolved_path.endswith("/"): + resolved_path = resolved_path[:-1] + + if resolved_path in archive_files: + continue + if any([resolved_path in p[0].get('static_files', {}).keys() for p in patches.values()]): + continue + + path_parts = resolved_path.split("/") + for i in range(1, len(path_parts) + 1): + subdir = "/".join(path_parts[:i]) + if subdir not in archive_files: + result['static_files'][subdir] = { + "type": "dir", + "mode": 0o755, + } + return result diff --git a/pyplugins/config_patchers/generate_missing_files.py b/pyplugins/config_patchers/generate_missing_files.py new file mode 100644 index 000000000..8e61f8f44 --- /dev/null +++ b/pyplugins/config_patchers/generate_missing_files.py @@ -0,0 +1,51 @@ +import os +import re +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin + +class GenerateMissingFiles(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.missing_files" + self.enabled = True + + def generate(self, patches: dict) -> dict: + result = defaultdict(dict) + + model = { + "/bin/sh": { + "type": "symlink", + "target": "/igloo/utils/busybox" + }, + "/etc/TZ": { + "type": "inline_file", + "contents": "EST5EDT", + "mode": 0o755, + }, + "/var/run/nvramd.pid": { + "type": "inline_file", + "contents": "", + "mode": 0o644, + }, + } + + for fname, data in model.items(): + if not os.path.isfile(os.path.join(self.extracted_fs, fname[1:])): + result['static_files'][fname] = data + + hosts = "" + if os.path.isfile(os.path.join(self.extracted_fs, "etc/hosts")): + with open(os.path.join(self.extracted_fs, "etc/hosts"), "r") as f: + hosts = f.read() + + if not re.search(r"^127\.0\.0\.1\s+localhost\s*$", hosts, re.MULTILINE): + if len(hosts) and not hosts.endswith("\n"): + hosts += "\n" + hosts += "127.0.0.1 localhost\n" + + result["static_files"]["/etc/hosts"] = { + "type": "inline_file", + "contents": hosts, + "mode": 0o755, + } + return result diff --git a/pyplugins/config_patchers/generate_referenced_dirs.py b/pyplugins/config_patchers/generate_referenced_dirs.py new file mode 100644 index 000000000..f96ab9de5 --- /dev/null +++ b/pyplugins/config_patchers/generate_referenced_dirs.py @@ -0,0 +1,25 @@ +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin + +class GenerateReferencedDirs(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.binary_paths" + self.enabled = True + + def generate(self, patches: dict) -> dict: + from .file_helper import FileHelper + result = defaultdict(dict) + for f in FileHelper.find_executables( + self.extracted_fs, {"/bin", "/sbin", "/usr/bin", "/usr/sbin"} + ): + for dest in list( + set(FileHelper.find_strings_in_file(f, "^(/var|/etc|/tmp)(.+)([^\\/]+)$")) + ): + if any([x in dest for x in ["%s", "%c", "%d", "/tmp/services"]]): + continue + result["static_files"][dest] = { + "type": "dir", + "mode": 0o755, + } + return result diff --git a/pyplugins/config_patchers/generate_shell_mounts.py b/pyplugins/config_patchers/generate_shell_mounts.py new file mode 100644 index 000000000..15df5d7e3 --- /dev/null +++ b/pyplugins/config_patchers/generate_shell_mounts.py @@ -0,0 +1,36 @@ +import os +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin + +class GenerateShellMounts(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.shell_script_mounts" + self.enabled = True + + def generate(self, patches: dict) -> dict: + from .file_helper import FileHelper + from .tar_helper import TarHelper + existing = {member.name[1:] for member in TarHelper.get_all_members(self.fs_archive)} + result = defaultdict(dict) + + for f in FileHelper.find_shell_scripts(self.extracted_fs): + for dest in list( + set(FileHelper.find_strings_in_file(f, "^/mnt/[a-zA-Z0-9._/]+$")) + ): + if not dest.endswith("/"): + dest = os.path.dirname(dest) + + if dest in existing: + continue + if any([dest in p[0].get('static_files', {}).keys() for p in patches.values()]): + continue + + if FileHelper.exists(self.extracted_fs, dest): + continue + + result['static_files'][dest] = { + "type": "dir", + "mode": 0o755, + } + return result diff --git a/pyplugins/config_patchers/kernel_modules.py b/pyplugins/config_patchers/kernel_modules.py new file mode 100644 index 000000000..90065a064 --- /dev/null +++ b/pyplugins/config_patchers/kernel_modules.py @@ -0,0 +1,74 @@ +import os +import re +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class KernelModules(ConfigPatcherPlugin): + depends_on = ['KernelVersionFinder'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.kernel_modules" + self.enabled = True + + @staticmethod + def is_kernel_version(name: str) -> bool: + return re.match(r"^\d+\.\d+\.\d+(-[\w\.]+)?$", name) is not None + + @staticmethod + def pad_kernel_version(ver: str) -> str: + base = ver.split("-", 1)[0] + tokens = base.split(".") + while len(tokens) < 3: + tokens.append("0") + return ".".join(tokens) + + def generate(self, patches: dict) -> dict: + self.kernel_version = self.prior_results.get('KernelVersionFinder', {}) + if not self.kernel_version or "selected_kernel" not in self.kernel_version: + return {} + + result = defaultdict(dict) + kernel_version = None + potential_kernels = set() + + modules_path = os.path.join(self.extracted_fs, "lib/modules") + if os.path.exists(modules_path): + for d in os.listdir(modules_path): + d_path = os.path.join(modules_path, d) + if os.path.isdir(d_path): + potential_kernels.add(d) + + potential_kernels = {d for d in potential_kernels if self.is_kernel_version(d)} + + if len(potential_kernels) == 1: + kernel_version = potential_kernels.pop() + elif len(potential_kernels) > 1: + for potential_name in potential_kernels: + if "." in potential_name and "-" in potential_name: + kernel_version = potential_name + break + if not kernel_version: + for potential_name in potential_kernels: + if "." in potential_name: + kernel_version = potential_name + break + if not kernel_version: + logger.warning( + "Multiple kernel versions look valid (TODO improve selection logic, grabbing first)" + ) + logger.warning(potential_kernels) + kernel_version = potential_kernels.pop() + + if kernel_version: + padded_selected = self.pad_kernel_version(self.kernel_version["selected_kernel"]) + padded_target = self.pad_kernel_version(kernel_version) + result["static_files"][f"/lib/modules/{padded_selected}"] = { + "type": "symlink", + "target": f"/lib/modules/{padded_target}", + } + + return result diff --git a/pyplugins/config_patchers/lib_inject_fixed_aliases.py b/pyplugins/config_patchers/lib_inject_fixed_aliases.py new file mode 100644 index 000000000..3a1a173a3 --- /dev/null +++ b/pyplugins/config_patchers/lib_inject_fixed_aliases.py @@ -0,0 +1,11 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import default_lib_aliases + +class LibInjectFixedAliases(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = False + self.patch_name = 'lib_inject.fixed_models' + + def generate(self, patches: dict) -> dict: + return {'lib_inject': {'aliases': default_lib_aliases}} diff --git a/pyplugins/config_patchers/lib_inject_jit_aliases.py b/pyplugins/config_patchers/lib_inject_jit_aliases.py new file mode 100644 index 000000000..c1b144697 --- /dev/null +++ b/pyplugins/config_patchers/lib_inject_jit_aliases.py @@ -0,0 +1,32 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import default_lib_aliases +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class LibInjectJITAliases(ConfigPatcherPlugin): + depends_on = ['LibrarySymbols'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = 'lib_inject.jit_models' + + def generate(self, patches: dict) -> dict | None: + library_info = self.prior_results.get('LibrarySymbols', {}) + aliases = {} + + for _, exported_syms in library_info.get("symbols", {}).items(): + for sym in exported_syms: + if "nvram" in sym and sym not in default_lib_aliases: + if "_get" in sym: + target = "libinject_nvram_get" + elif "_set" in sym: + target = "libinject_nvram_get" + else: + target = "libinject_ret_0" + aliases[sym] = target + logger.info(f"\tJIT mapping {sym} -> {target}") + + if len(aliases): + return {'lib_inject': {'aliases': aliases}} diff --git a/pyplugins/config_patchers/lib_inject_string_introspection.py b/pyplugins/config_patchers/lib_inject_string_introspection.py new file mode 100644 index 000000000..92119772b --- /dev/null +++ b/pyplugins/config_patchers/lib_inject_string_introspection.py @@ -0,0 +1,20 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import default_libinject_string_introspection + +class LibInjectStringIntrospection(ConfigPatcherPlugin): + depends_on = ['LibrarySymbols'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = 'lib_inject.string_introspection' + + def generate(self, patches: dict) -> dict: + library_info = self.prior_results.get('LibrarySymbols', {}) + aliases = {} + for _, exported_syms in library_info.get("symbols", {}).items(): + for sym in exported_syms: + if sym in default_libinject_string_introspection: + aliases[sym] = default_libinject_string_introspection[sym] + + return {'lib_inject': {'aliases': aliases}} diff --git a/pyplugins/config_patchers/lib_inject_symlinks.py b/pyplugins/config_patchers/lib_inject_symlinks.py new file mode 100644 index 000000000..b455b67a8 --- /dev/null +++ b/pyplugins/config_patchers/lib_inject_symlinks.py @@ -0,0 +1,48 @@ +import os +from collections import defaultdict +from pathlib import Path +from elftools.common.exceptions import ELFError +from elftools.elf.elffile import ELFFile +from penguin.arch import arch_filter +from penguin.static_plugin import ConfigPatcherPlugin + +class LibInjectSymlinks(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = 'lib_inject.core' + + def generate(self, patches: dict) -> dict: + libc_paths = [] + result = defaultdict(dict) + + for root, dirs, files in os.walk(self.extracted_fs): + for filename in files: + if filename.startswith("libc.so"): + libc_paths.append(Path(os.path.join(root, filename))) + + for p in libc_paths: + if not os.path.isfile(p) or (os.path.islink(p) and not os.path.exists(p)): + continue + + with open(p, 'rb') as file: + try: + e = ELFFile(file) + except ELFError: + continue + + abi = arch_filter(e).abi + + dest = Path("/") / \ + p.relative_to(self.extracted_fs).parent / \ + "lib_inject.so" + + result["static_files"][str(dest)] = { + "type": "symlink", + "target": f"/igloo/lib_inject_{abi}.so", + } + + if len(result.get("static_files", [])): + result["env"] = {"LD_PRELOAD": "lib_inject.so"} + + return result diff --git a/pyplugins/config_patchers/lib_inject_tailored_aliases.py b/pyplugins/config_patchers/lib_inject_tailored_aliases.py new file mode 100644 index 000000000..fe2f32d95 --- /dev/null +++ b/pyplugins/config_patchers/lib_inject_tailored_aliases.py @@ -0,0 +1,33 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import default_lib_aliases +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class LibInjectTailoredAliases(ConfigPatcherPlugin): + depends_on = ['LibrarySymbols'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = 'lib_inject.dynamic_models' + self.unmodeled = set() + + def generate(self, patches: dict) -> dict | None: + library_info = self.prior_results.get('LibrarySymbols', {}) + aliases = {} + + for _, exported_syms in library_info.get("symbols", {}).items(): + for sym in exported_syms: + if sym in default_lib_aliases: + aliases[sym] = default_lib_aliases[sym] + elif "nvram" in sym and sym not in self.unmodeled: + self.unmodeled.add(sym) + + if len(self.unmodeled): + logger.info(f"Detected {len(self.unmodeled)} unmodeled symbols around nvram. You may wish to create libinject models for these:") + for sym in self.unmodeled: + logger.info(f"\t{sym}") + + if len(aliases): + return {'lib_inject': {'aliases': aliases}} diff --git a/pyplugins/config_patchers/linksys_hack.py b/pyplugins/config_patchers/linksys_hack.py new file mode 100644 index 000000000..d0e71d363 --- /dev/null +++ b/pyplugins/config_patchers/linksys_hack.py @@ -0,0 +1,23 @@ +import os +from collections import defaultdict +from penguin.static_plugin import ConfigPatcherPlugin + +class LinksysHack(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "pseudofiles.linksys" + self.enabled = True + + def generate(self, patches: dict) -> dict: + result = defaultdict(dict) + if all( + os.path.isfile(os.path.join(self.extracted_fs, x[1:])) + for x in ["/bin/gpio", "/usr/lib/libcm.so", "/usr/lib/libshared.so"] + ): + result["pseudofiles"]["/dev/gpio/in"] = { + "read": { + "model": "return_const", + "val": 0xFFFFFFFF, + } + } + return result diff --git a/pyplugins/config_patchers/manual_interact.py b/pyplugins/config_patchers/manual_interact.py new file mode 100644 index 000000000..68e918753 --- /dev/null +++ b/pyplugins/config_patchers/manual_interact.py @@ -0,0 +1,28 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class ManualInteract(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "manual" + self.enabled = True + + def generate(self, patches: dict) -> dict: + return { + "core": { + "root_shell": True + }, + "plugins": { + "nmap": { + "enabled": False, + }, + "vpn": { + "enabled": True, + }, + "netbinds": + { + "enabled": True, + "shutdown_on_www": False, + }, + + } + } diff --git a/pyplugins/config_patchers/netdevs_default.py b/pyplugins/config_patchers/netdevs_default.py new file mode 100644 index 000000000..780b37a04 --- /dev/null +++ b/pyplugins/config_patchers/netdevs_default.py @@ -0,0 +1,11 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import default_netdevs + +class NetdevsDefault(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = "netdevs.default" + + def generate(self, patches: dict) -> dict: + return {'netdevs': default_netdevs} diff --git a/pyplugins/config_patchers/netdevs_tailored.py b/pyplugins/config_patchers/netdevs_tailored.py new file mode 100644 index 000000000..e3c0fb0e7 --- /dev/null +++ b/pyplugins/config_patchers/netdevs_tailored.py @@ -0,0 +1,19 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class NetdevsTailored(ConfigPatcherPlugin): + depends_on = ['InterfaceFinder'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = "netdevs.dynamic" + + def generate(self, patches: dict) -> dict | None: + netdevs = self.prior_results.get('InterfaceFinder') + values = set() + if not netdevs: + return + for src, devs in netdevs.items(): + values.update(devs) + if len(values): + return {'netdevs': sorted(list(values))} diff --git a/pyplugins/config_patchers/nvram_config_recovery.py b/pyplugins/config_patchers/nvram_config_recovery.py new file mode 100644 index 000000000..0a8fef7d2 --- /dev/null +++ b/pyplugins/config_patchers/nvram_config_recovery.py @@ -0,0 +1,13 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class NvramConfigRecovery(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "nvram.02_config_paths" + self.enabled = True + + def generate(self, patches: dict) -> dict | None: + from .nvram_helper import NvramHelper + result = NvramHelper.nvram_config_analysis(self.extracted_fs, True) + if len(result): + return {'nvram': result} diff --git a/pyplugins/config_patchers/nvram_config_recovery_wild.py b/pyplugins/config_patchers/nvram_config_recovery_wild.py new file mode 100644 index 000000000..7a71573ac --- /dev/null +++ b/pyplugins/config_patchers/nvram_config_recovery_wild.py @@ -0,0 +1,13 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class NvramConfigRecoveryWild(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "nvram.03_config_paths_basename" + self.enabled = True + + def generate(self, patches: dict) -> dict | None: + from .nvram_helper import NvramHelper + result = NvramHelper.nvram_config_analysis(self.extracted_fs, False) + if len(result): + return {'nvram': result} diff --git a/pyplugins/config_patchers/nvram_defaults.py b/pyplugins/config_patchers/nvram_defaults.py new file mode 100644 index 000000000..d69a27aad --- /dev/null +++ b/pyplugins/config_patchers/nvram_defaults.py @@ -0,0 +1,13 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class NvramDefaults(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "nvram.04_defaults" + self.enabled = True + + def generate(self, patches: dict) -> dict | None: + from .nvram_helper import NvramHelper + result = NvramHelper._get_default_nvram_values() + if len(result): + return {'nvram': result} diff --git a/pyplugins/config_patchers/nvram_firm_ae_file_specific.py b/pyplugins/config_patchers/nvram_firm_ae_file_specific.py new file mode 100644 index 000000000..5c787fc6e --- /dev/null +++ b/pyplugins/config_patchers/nvram_firm_ae_file_specific.py @@ -0,0 +1,40 @@ +import os +from penguin.static_plugin import ConfigPatcherPlugin +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class NvramFirmAEFileSpecific(ConfigPatcherPlugin): + FIRMAE_TARGETS: dict[str, list[tuple[str, str]]] = { + "./sbin/rc": [("ipv6_6to4_lan_ip", "2002:7f00:0001::")], + "./lib/libacos_shared.so": [("time_zone_x", "0")], + "./sbin/acos_service": [("rip_enable", "0")], + "./usr/sbin/httpd": [ + ("rip_multicast", "0"), + ("bs_trustedip_enable", "0"), + ("filter_rule_tbl", ""), + ], + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "nvram.05_firmae_file_specific" + + def generate(self, patches: dict) -> dict | None: + result = {} + + for key, queries in self.FIRMAE_TARGETS.items(): + if not os.path.isfile(os.path.join(self.extracted_fs, key[1:])): + continue + + try: + with open(os.path.join(self.extracted_fs, key[1:]), "rb") as f: + content = f.read() + for query, _ in queries: + if query.encode() in content: + result[key] = query + except Exception as e: + logger.error(f"Failed to read {key} for nvram key check: {e}") + + if len(result): + return {'nvram': result} diff --git a/pyplugins/config_patchers/nvram_helper.py b/pyplugins/config_patchers/nvram_helper.py new file mode 100644 index 000000000..920b1854f --- /dev/null +++ b/pyplugins/config_patchers/nvram_helper.py @@ -0,0 +1,135 @@ +import os +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class NvramHelper: + @staticmethod + def _get_default_nvram_values() -> dict[str, str]: + nvram = { + "console_loglevel": "7", + "restore_defaults": "1", + "sku_name": "", + "wla_wlanstate": "", + "lan_if": "br0", + "lan_ipaddr": "192.168.0.50", + "lan_bipaddr": "192.168.0.255", + "lan_netmask": "255.255.255.0", + "time_zone": "PST8PDT", + "wan_hwaddr_def": "01:23:45:67:89:ab", + "wan_ifname": "eth0", + "lan_ifnames": "eth1 eth2 eth3 eth4", + "ethConver": "1", + "lan_proto": "dhcp", + "wan_ipaddr": "0.0.0.0", + "wan_netmask": "255.255.255.0", + "wanif": "eth0", + "time_zone_x": "0", + "rip_multicast": "0", + "bs_trustedip_enable": "0", + "et0macaddr": "01:23:45:67:89:ab", + "filter_rule_tbl": "", + "pppoe2_schedule_config": "127:0:0:23:59", + "schedule_config": "127:0:0:23:59", + "access_control_mode": "0", + "fwpt_df_count": "0", + "static_if_status": "1", + "www_relocation": "", + } + + def _add_firmae_for_entries(config_dict, pattern, value, start, end): + for index in range(start, end + 1): + config_dict[pattern % index] = value + + _add_firmae_for_entries( + nvram, + "usb_info_dev%d", + "A200396E0402FF83@1@14.4G@U@1@USB_Storage;U:;0;0@", + 0, + 101, + ) + _add_firmae_for_entries(nvram, "wla_ap_isolate_%d", "", 1, 5) + _add_firmae_for_entries(nvram, "wlg_ap_isolate_%d", "", 1, 5) + _add_firmae_for_entries(nvram, "wlg_allow_access_%d", "", 1, 5) + _add_firmae_for_entries(nvram, "%d:macaddr", "01:23:45:67:89:ab", 0, 3) + _add_firmae_for_entries(nvram, "lan%d_ifnames", "", 1, 10) + + return nvram + + @staticmethod + def parse_nvram_file(path: str, f) -> dict: + file_content = f.read() + key_val_pairs = file_content.split(b"\x00") + results_null = {} + results_lines = {} + + for pair in key_val_pairs[:-1]: + try: + key, val = pair.split(b"=", 1) + if key.startswith(b"#"): + continue + results_null[key] = val + except ValueError: + logger.warning(f"could not process default nvram file {path} for {pair}") + continue + + for line in file_content.split(b"\n"): + if line.startswith(b"#"): + continue + if b"=" not in line: + continue + key, val = line.split(b"=", 1) + results_lines[key] = val + + if len(results_null) > 5 and len(results_null) > len(results_lines): + return results_null + elif len(results_lines) > 5 and len(results_lines) > len(results_null): + return results_lines + else: + return {} + + @staticmethod + def nvram_config_analysis(fs_path: str, full_path: bool = True) -> dict[str, str]: + nvram_paths = [ + "./var/etc/nvram.default", + "./etc/nvram.default", + "./etc/nvram.conf", + "./etc/nvram.deft", + "./etc/nvram.update", + "./etc/wlan/nvram_params", + "./etc/system_nvram_defaults", + "./image/mnt/nvram_ap.default", + "./etc_ro/Wireless/RT2860AP/RT2860_default_vlan", + "./etc_ro/Wireless/RT2860AP/RT2860_default_novlan", + "./image/mnt/nvram_whp.default", + "./image/mnt/nvram_rt.default", + "./image/mnt/nvram_rpt.default", + "./image/mnt/nvram.default", + ] + nvram_basenames = set([os.path.basename(x) for x in nvram_paths]) + + path_nvrams = {} + if full_path: + for path in nvram_paths: + abs_path = os.path.join(fs_path, path.lstrip("/")) + if os.path.exists(abs_path): + with open(abs_path, "rb") as f: + result = NvramHelper.parse_nvram_file(path, f) + for k, v in result.items(): + path_nvrams[k.decode()] = v.decode() + else: + for root, _, files in os.walk(fs_path): + for file in files: + abs_path = os.path.join(root, file) + rel_path = "./" + os.path.relpath(abs_path, fs_path) + + if rel_path in nvram_paths: + continue + + if any(file == fname for fname in nvram_basenames): + with open(abs_path, "rb") as f: + result = NvramHelper.parse_nvram_file(rel_path, f) + for k, v in result.items(): + path_nvrams[k.decode()] = v.decode() + + return path_nvrams diff --git a/pyplugins/config_patchers/nvram_library_recovery.py b/pyplugins/config_patchers/nvram_library_recovery.py new file mode 100644 index 000000000..2c0cba2de --- /dev/null +++ b/pyplugins/config_patchers/nvram_library_recovery.py @@ -0,0 +1,26 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class NvramLibraryRecovery(ConfigPatcherPlugin): + depends_on = ['LibrarySymbols'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "nvram.01_library" + self.enabled = True + + def generate(self, patches: dict) -> dict | None: + library_info = self.prior_results.get('LibrarySymbols', {}) + sources = library_info.get("nvram", {}) + if not len(sources): + return + + sorted_sources = sorted(sources.items(), key=lambda x: len(x[1]), reverse=True) + + nvram_defaults = {} + for source, nvram in sorted_sources: + for key, value in nvram.items(): + if key not in nvram_defaults: + nvram_defaults[key] = value + + if len(nvram_defaults): + return {'nvram': nvram_defaults} diff --git a/pyplugins/config_patchers/pseudofiles_expert.py b/pyplugins/config_patchers/pseudofiles_expert.py new file mode 100644 index 000000000..dd38236e3 --- /dev/null +++ b/pyplugins/config_patchers/pseudofiles_expert.py @@ -0,0 +1,11 @@ +from penguin.static_plugin import ConfigPatcherPlugin +from penguin.defaults import expert_knowledge_pseudofiles + +class PseudofilesExpert(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.enabled = True + self.patch_name = "pseudofiles.expert_knowledge" + + def generate(self, patches: dict) -> dict: + return {'pseudofiles': expert_knowledge_pseudofiles} diff --git a/pyplugins/config_patchers/pseudofiles_tailored.py b/pyplugins/config_patchers/pseudofiles_tailored.py new file mode 100644 index 000000000..3d14a3db2 --- /dev/null +++ b/pyplugins/config_patchers/pseudofiles_tailored.py @@ -0,0 +1,42 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class PseudofilesTailored(ConfigPatcherPlugin): + depends_on = ['PseudofileFinder'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "pseudofiles.dynamic" + self.enabled = True + + def generate(self, patches: dict) -> dict | None: + pseudofiles = self.prior_results.get('PseudofileFinder', {}) + results = {} + mtd_count = 0 + + for section, file_names in pseudofiles.items(): + for file_name in file_names: + if section == 'dev' and file_name.startswith("/dev/mtd"): + continue + + if file_name.endswith("/"): + file_name += ".placeholder" + results[file_name] = { + 'read': { + "model": "zero", + }, + 'write': { + "model": "discard", + } + } + + if section == "dev": + results[file_name]['ioctl'] = { + '*': {"model": "return_const", "val": 0} + } + + if file_name.startswith("/dev/mtd"): + results[file_name]['name'] = f"uboot.{mtd_count}" + mtd_count += 1 + + if len(results): + return {'pseudofiles': results} diff --git a/pyplugins/config_patchers/root_shell.py b/pyplugins/config_patchers/root_shell.py new file mode 100644 index 000000000..a90581bb8 --- /dev/null +++ b/pyplugins/config_patchers/root_shell.py @@ -0,0 +1,14 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class RootShell(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "root_shell" + self.enabled = False + + def generate(self, patches: dict) -> dict: + return { + "core": { + "root_shell": False, + }, + } diff --git a/pyplugins/config_patchers/shim_binaries.py b/pyplugins/config_patchers/shim_binaries.py new file mode 100644 index 000000000..7350dac53 --- /dev/null +++ b/pyplugins/config_patchers/shim_binaries.py @@ -0,0 +1,34 @@ +import os +import stat +from collections import defaultdict +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.config_patchers") + +class ShimBinaries: + def __init__(self, files): + self.files = files + + def make_shims(self, shim_targets: dict[str, str]) -> dict: + result = defaultdict(dict) + for fname in self.files: + path = fname.name[1:] #.path.lstrip('.') + basename = os.path.basename(path) + + if path.startswith("/igloo/utils/"): + raise ValueError( + "Unexpected /igloo/utils present in input filesystem archive" + ) + + if not (fname.isfile() or fname.issym()) or not fname.mode & ( + stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ): + continue + + if basename in shim_targets: + logger.debug(f"making shim for {basename}, full path: {path}") + result["static_files"][path] = { + "type": "shim", + "target": f"/igloo/utils/{shim_targets[basename]}", + } + return result diff --git a/pyplugins/config_patchers/shim_busybox.py b/pyplugins/config_patchers/shim_busybox.py new file mode 100644 index 000000000..80dcde939 --- /dev/null +++ b/pyplugins/config_patchers/shim_busybox.py @@ -0,0 +1,17 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class ShimBusybox(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.shims.busybox" + self.enabled = False + + def generate(self, patches: dict) -> dict: + from .tar_helper import TarHelper + from .shim_binaries import ShimBinaries + files = TarHelper.get_all_members(self.fs_archive) + return ShimBinaries(files).make_shims({ + "ash": "busybox", + "sh": "busybox", + "bash": "bash", + }) diff --git a/pyplugins/config_patchers/shim_crypto.py b/pyplugins/config_patchers/shim_crypto.py new file mode 100644 index 000000000..26c767b30 --- /dev/null +++ b/pyplugins/config_patchers/shim_crypto.py @@ -0,0 +1,31 @@ +import os +from penguin.static_plugin import ConfigPatcherPlugin + +class ShimCrypto(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.shims.crypto" + self.enabled = False + + def generate(self, patches: dict) -> dict | None: + from .tar_helper import TarHelper + from .shim_binaries import ShimBinaries + import penguin + RESOURCES = os.path.join(os.path.dirname(penguin.__file__), "resources") + + files = TarHelper.get_all_members(self.fs_archive) + result = ShimBinaries(files).make_shims({ + "openssl": "openssl", + "ssh-keygen": "ssh-keygen" + }) + + if not len(result.get("static_files", [])): + return + + result["static_files"]["/igloo/keys/*"] = { + "type": "host_file", + "mode": 0o444, + "host_path": os.path.join(*[RESOURCES, "static_keys", "*"]) + } + + return result diff --git a/pyplugins/config_patchers/shim_fw_env.py b/pyplugins/config_patchers/shim_fw_env.py new file mode 100644 index 000000000..317a72694 --- /dev/null +++ b/pyplugins/config_patchers/shim_fw_env.py @@ -0,0 +1,17 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class ShimFwEnv(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + raise NotImplementedError("Untested shim type") + super().__init__(*args, **kwargs) + self.patch_name = "static.shims.fw_env" + + def generate(self, patches: dict) -> dict: + from .tar_helper import TarHelper + from .shim_binaries import ShimBinaries + files = TarHelper.get_all_members(self.fs_archive) + return ShimBinaries(files).make_shims({ + "fw_printenv": "fw_printenv", + "fw_getenv": "fw_printenv", + "fw_setenv": "fw_printenv", + }) diff --git a/pyplugins/config_patchers/shim_no_modules.py b/pyplugins/config_patchers/shim_no_modules.py new file mode 100644 index 000000000..8df0018b2 --- /dev/null +++ b/pyplugins/config_patchers/shim_no_modules.py @@ -0,0 +1,15 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class ShimNoModules(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.shims.no_modules" + self.enabled = True + + def generate(self, patches: dict) -> dict: + from .tar_helper import TarHelper + from .shim_binaries import ShimBinaries + files = TarHelper.get_all_members(self.fs_archive) + return ShimBinaries(files).make_shims({ + "insmod": "exit0.sh" + }) diff --git a/pyplugins/config_patchers/shim_stop_bins.py b/pyplugins/config_patchers/shim_stop_bins.py new file mode 100644 index 000000000..a990721a0 --- /dev/null +++ b/pyplugins/config_patchers/shim_stop_bins.py @@ -0,0 +1,16 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class ShimStopBins(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "static.shims.stop_bins" + self.enabled = True + + def generate(self, patches: dict) -> dict: + from .tar_helper import TarHelper + from .shim_binaries import ShimBinaries + files = TarHelper.get_all_members(self.fs_archive) + return ShimBinaries(files).make_shims({ + "reboot": "exit0.sh", + "halt": "exit0.sh", + }) diff --git a/pyplugins/config_patchers/single_shot.py b/pyplugins/config_patchers/single_shot.py new file mode 100644 index 000000000..c3cd4a9d9 --- /dev/null +++ b/pyplugins/config_patchers/single_shot.py @@ -0,0 +1,32 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class SingleShot(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "single_shot" + self.enabled = False + + def generate(self, patches: dict) -> dict: + return { + "core": { + "root_shell": False, + }, + "plugins": { + "nmap": { + "enabled": False, + }, + "vpn": { + "enabled": True, + }, + "netbinds": + { + "enabled": True, + "shutdown_on_www": False, + }, + "fetch_web": { + "enabled": True, + "shutdown_after_www": True, + }, + + } + } diff --git a/pyplugins/config_patchers/single_shot_ficd.py b/pyplugins/config_patchers/single_shot_ficd.py new file mode 100644 index 000000000..0ce8d96c6 --- /dev/null +++ b/pyplugins/config_patchers/single_shot_ficd.py @@ -0,0 +1,35 @@ +from penguin.static_plugin import ConfigPatcherPlugin + +class SingleShotFICD(ConfigPatcherPlugin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.patch_name = "single_shot_ficd" + self.enabled = False + + def generate(self, patches: dict) -> dict: + return { + "core": { + "root_shell": False, + }, + "plugins": { + "nmap": { + "enabled": False, + }, + "vpn": { + "enabled": True, + }, + "netbinds": + { + "enabled": True, + "shutdown_on_www": False, + }, + "ficd": { + "enabled": True, + "stop_on_if": True, + }, + "fetch_web": { + "enabled": True, + "shutdown_after_www": True, + }, + } + } diff --git a/pyplugins/config_patchers/tar_helper.py b/pyplugins/config_patchers/tar_helper.py new file mode 100644 index 000000000..36d39f990 --- /dev/null +++ b/pyplugins/config_patchers/tar_helper.py @@ -0,0 +1,40 @@ +import tarfile + +class TarHelper: + @staticmethod + def get_symlink_members(tarfile_path: str) -> dict[str, str]: + with tarfile.open(tarfile_path, "r") as tar: + return { + member.name[1:]: member.linkname + for member in tar.getmembers() + if member.issym() + } + + @staticmethod + def get_all_members(tarfile_path: str): + with tarfile.open(tarfile_path, "r") as tar: + return tar.getmembers() + + @staticmethod + def get_other_members(tarfile_path: str): + with tarfile.open(tarfile_path, "r") as tar: + return { + member.name[1:] + for member in tar.getmembers() + if not member.isfile() and not member.isdir() + } + + @staticmethod + def get_directory_members(tarfile_path: str) -> set[str]: + with tarfile.open(tarfile_path, "r") as tar: + results = {member.name[1:] for member in tar.getmembers() if member.isdir()} + for r in list(results): + parts = r.split("/") + for i in range(len(parts)): + results.add("/".join(parts[: i + 1])) + return results + + @staticmethod + def get_file_members(tarfile_path: str) -> set[str]: + with tarfile.open(tarfile_path, "r") as tar: + return {member.name[1:] for member in tar.getmembers() if member.isfile()} diff --git a/pyplugins/static_analysis/arch_id.py b/pyplugins/static_analysis/arch_id.py new file mode 100644 index 000000000..7a3afb8d7 --- /dev/null +++ b/pyplugins/static_analysis/arch_id.py @@ -0,0 +1,81 @@ +import os +from collections import Counter +from elftools.common.exceptions import ELFError +from elftools.elf.elffile import ELFFile +from penguin.static_plugin import StaticAnalysisPlugin +from penguin import getColoredLogger +from penguin.arch import arch_filter + +logger = getColoredLogger("penguin.static_analyses") + +class ArchId(StaticAnalysisPlugin): + """ + Identify the most common architecture in the extracted filesystem. + """ + def run(self) -> str: + arch_counts = {32: Counter(), 64: Counter(), "unknown": 0} + for root, _, files in os.walk(self.extracted_fs): + for file_name in files: + path = os.path.join(root, file_name) + + if ( + os.path.isfile(path) + and not os.path.islink(path) + and self._binary_filter(self.extracted_fs, path) + ): + logger.debug(f"Checking architecture in {path}") + with open(path, "rb") as f: + if f.read(4) != b"\x7fELF": + continue + f.seek(0) + try: + ef = ELFFile(f) + except ELFError as e: + logger.warning(f"Failed to parse ELF file {path}: {e}. Ignoring") + continue + info = arch_filter(ef) + if info.bits is None or info.arch is None: + arch_counts["unknown"] += 1 + else: + arch_counts[info.bits][info.arch] += 1 + + intel_archs = ("intel", "intel64") + archs_list = list(arch_counts[32].keys()) + list(arch_counts[64].keys()) + if any(arch in intel_archs for arch in archs_list) and any( + arch not in intel_archs for arch in archs_list + ): + del arch_counts[32]["intel"] + if "intel64" in arch_counts[64]: + del arch_counts[64]["intel64"] + + best_64 = arch_counts[64].most_common(1) + best_32 = arch_counts[32].most_common(1) + if len(best_64) != 0: + best = best_64[0][0] + best_count = best_64[0][1] + elif len(best_32) != 0: + best = best_32[0][0] + best_count = best_32[0][1] + else: + raise ValueError("Failed to determine architecture of filesystem") + + if arch_counts["unknown"] > best_count: + for arch, count in arch_counts[32].items(): + logger.info(f"32-bit arch {arch} has {count} files") + for arch, count in arch_counts[64].items(): + logger.info(f"64-bit arch {arch} has {count} files") + logger.info(f"Unknown architecture count: {arch_counts['unknown']}") + raise ValueError("Failed to determine architecture of filesystem") + + logger.debug(f"Identified architecture: {best}") + return best + + @staticmethod + def _binary_filter(fsbase: str, name: str) -> bool: + base_directories = ["sbin", "bin", "usr/sbin", "usr/bin"] + for base in base_directories: + if name.startswith(os.path.join(fsbase, base)): + return True + return name.endswith((".so", ".ko")) or \ + ".so." in name or \ + name.endswith("busybox") diff --git a/pyplugins/static_analysis/cluster_collector.py b/pyplugins/static_analysis/cluster_collector.py new file mode 100644 index 000000000..32cf6890f --- /dev/null +++ b/pyplugins/static_analysis/cluster_collector.py @@ -0,0 +1,44 @@ +import os +from subprocess import check_output, CalledProcessError, STDOUT +from penguin.static_plugin import StaticAnalysisPlugin +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.static_analyses") + +class ClusterCollector(StaticAnalysisPlugin): + """ + Collect summary statistics for the filesystem to help identify clusters. + """ + def run(self) -> dict[str, list[str]]: + all_files = set() + executables = set() + executable_hashes = set() + + for root, _, files in os.walk(self.extracted_fs): + for f in files: + file_path = os.path.join(root, f) + + if os.path.isfile(file_path): + all_files.add(os.path.basename(f)) + + if os.path.isfile(file_path) and os.access(file_path, os.X_OK): + executables.add(os.path.basename(f)) + + hash_value = self.compute_file_hash(file_path) + if hash_value: + executable_hashes.add(hash_value) + + return { + 'files': list(all_files), + 'executables': list(executables), + 'executable_hashes': list(executable_hashes) + } + + @staticmethod + def compute_file_hash(file_path: str) -> str | None: + try: + output = check_output(["sha256sum", file_path], stderr=STDOUT) + return output.decode('utf-8').split()[0] + except (CalledProcessError, FileNotFoundError, IOError) as e: + logger.debug(f"Failed to hash file {file_path}: {e}") + return None diff --git a/pyplugins/static_analysis/env_finder.py b/pyplugins/static_analysis/env_finder.py new file mode 100644 index 000000000..cf3cb0fa1 --- /dev/null +++ b/pyplugins/static_analysis/env_finder.py @@ -0,0 +1,37 @@ +import re +from penguin.static_plugin import StaticAnalysisPlugin + +class EnvFinder(StaticAnalysisPlugin): + """ + Identify potential environment variables and their values in the filesystem. + """ + depends_on = ['InitFinder'] + BORING_VARS: list[str] = ["TERM"] + + def run(self) -> dict[str, list | None]: + # Need to dynamically load FileSystemHelper to avoid circular import if needed, + # but since we're in the same directory, we can import it. + from .file_system_helper import FileSystemHelper + + task_options = [0xBF000000, 0x7F000000, 0x3F000000] + + potential_env = { + "igloo_task_size": task_options, + "igloo_init": self.prior_results.get('InitFinder', []) + } + + pattern = re.compile(r"\/proc\/cmdline.*?([A-Za-z0-9_]+)=", re.MULTILINE) + potential_keys = FileSystemHelper.find_regex(pattern, self.extracted_fs, ignore=self.BORING_VARS).keys() + + for k in potential_keys: + known_vals = None + pattern = re.compile(k + r"=([A-Za-z0-9_]+)", re.MULTILINE) + potential_vals = FileSystemHelper.find_regex(pattern, self.extracted_fs, + ignore=self.BORING_VARS).keys() + + if len(potential_vals): + known_vals = list(potential_vals) + + potential_env[k] = known_vals + + return potential_env diff --git a/pyplugins/static_analysis/file_system_helper.py b/pyplugins/static_analysis/file_system_helper.py new file mode 100644 index 000000000..3696f4fa9 --- /dev/null +++ b/pyplugins/static_analysis/file_system_helper.py @@ -0,0 +1,94 @@ +import os +import re +from subprocess import check_output, PIPE, SubprocessError +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.static_analyses") + +class FileSystemHelper: + @staticmethod + def find_regex( + target_regex: re.Pattern, + extract_root: str, + ignore: list | tuple | None = None + ) -> dict: + results = {} + if not ignore: + ignore = tuple() + elif isinstance(ignore, list): + ignore = tuple(ignore) + + pattern_str = target_regex.pattern + extract_path_str = str(extract_root) + + try: + file_list_output = check_output( + f"rg --files-with-matches -a '{pattern_str}' '{extract_path_str}'", + stderr=PIPE, + shell=True, + ) + + if file_list_output: + for filepath in file_list_output.decode().splitlines(): + if not os.path.isfile(filepath) or os.path.islink(filepath): + continue + try: + with open(filepath, "r", encoding="utf-8", errors="replace") as f: + content = f.read() + except Exception as e: + logger.warning(f"failed to read file {filepath}: {e}") + continue + matches = target_regex.findall(content) + for match in matches: + if match in ignore: + continue + if match not in results: + results[match] = {"count": 0, "files": set()} + results[match]["count"] += 1 + results[match]["files"].add(filepath) + except (SubprocessError, FileNotFoundError) as e: + if hasattr(e, 'returncode') and e.returncode == 1: + return {} + else: + logger.warning(f"Failed to run ripgrep: {e} - falling back to pure Python regex") + return FileSystemHelper._find_regex_python(target_regex, extract_root, ignore) + + return results + + @staticmethod + def _find_regex_python( + target_regex: re.Pattern, + extract_root: str, + ignore: list | None = None + ) -> dict: + results = {} + if not ignore: + ignore = [] + + for root, dirs, files in os.walk(extract_root): + for filename in files: + filepath = os.path.join(root, filename) + + if filepath.startswith(os.path.join(extract_root, "igloo")): + continue + + if not os.path.isfile(filepath) or os.path.islink(filepath): + continue + + try: + with open(filepath, "r", encoding="utf-8", errors="replace") as f: + content = f.read() + except Exception as e: + logger.warning(f"failed to read file {filepath}: {e}") + continue + + matches = target_regex.findall(content) + for match in matches: + if match in ignore: + continue + if match not in results: + results[match] = {"count": 0, "files": set()} + results[match]["count"] += 1 + results[match]["files"].add(filepath) + + return results diff --git a/pyplugins/static_analysis/init_finder.py b/pyplugins/static_analysis/init_finder.py new file mode 100644 index 000000000..97b31367e --- /dev/null +++ b/pyplugins/static_analysis/init_finder.py @@ -0,0 +1,79 @@ +import os +import re +import stat +from penguin.static_plugin import StaticAnalysisPlugin +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.static_analyses") + +class InitFinder(StaticAnalysisPlugin): + """ + Find potential init scripts and binaries in an extracted filesystem. + """ + def run(self) -> list[str]: + inits = [] + + for root, dirs, files in os.walk(self.extracted_fs): + for filename in files: + filepath = os.path.join(root, filename) + if self._is_init_script(filepath, self.extracted_fs): + inits.append("/" + os.path.relpath(filepath, self.extracted_fs)) + + inits.sort(key=lambda x: len(x)) + + target_inits = ["preinit", "init", "rcS"] + for potential in target_inits[::-1]: + try: + idx = [x.split("/")[-1] for x in inits].index(potential) + except ValueError: + continue + match = inits.pop(idx) + inits.insert(0, match) + + inits = [i for i in inits if len(i) <= 32] + + inits = [ + i for i in inits + if os.stat(os.path.join(self.extracted_fs, i.lstrip("/"))).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + ] + + return inits + + @staticmethod + def _is_init_script(filepath: str, fsroot: str) -> bool: + if filepath.startswith(os.path.join(fsroot, "igloo")): + return False + + if not os.path.isfile(filepath) and not os.path.islink(filepath): + return False + + name = os.path.basename(filepath) + if any([x in name for x in ["init", "start"]]) and not any( + [x in name for x in ["inittab", "telinit", "initd"]] + ): + if "start" in name and not re.search(r"[\W_\-\.]start[\W_\-\.]", name): + return False + + if os.path.islink(filepath): + link_target = os.readlink(filepath) + if os.path.isabs(link_target): + result = os.path.join(fsroot, "./"+link_target) + else: + result = os.path.join(os.path.dirname(filepath), link_target) + if not os.path.exists(result): + logger.warning( + f"Potential init '{filepath}' is a symlink to '{link_target}' which does not exist in the filesystem" + ) + return False + + if "init" in name and name.endswith(".init"): + return False + + if os.path.isfile(filepath) and os.stat(filepath).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): + return True + + elif "rcS" in name: + if os.path.isfile(filepath) and os.stat(filepath).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): + return True + + return False diff --git a/pyplugins/static_analysis/interface_finder.py b/pyplugins/static_analysis/interface_finder.py new file mode 100644 index 000000000..426cd0ba1 --- /dev/null +++ b/pyplugins/static_analysis/interface_finder.py @@ -0,0 +1,55 @@ +import re +from penguin.static_plugin import StaticAnalysisPlugin + +class InterfaceFinder(StaticAnalysisPlugin): + """ + Identify network interfaces in the filesystem. + """ + def run(self) -> dict[str, list[str]] | None: + from .file_system_helper import FileSystemHelper + + pattern = re.compile(r"/sys/class/net/([a-zA-Z0-9_]+)", re.MULTILINE) + sys_net_ifaces = FileSystemHelper.find_regex(pattern, self.extracted_fs).keys() + + sys_net_ifaces = [i for i in sys_net_ifaces if not i.startswith("veth") and not i.startswith("br") + and not i == "lo"] + + interfaces = set() + + interface_regex = r"([a-zA-Z0-9][a-zA-Z0-9_-]{2,15})" + + ifconfig_matches = re.compile(rf"ifconfig\s+{interface_regex}") + ip_link_matches = re.compile(rf"ip\s+(?:addr|link|route|add|set|show)\s+{interface_regex}") + ifup_down_matches = re.compile(rf"if(?:up|down)\s+{interface_regex}") + ethtool_matches = re.compile(rf"ethtool\s+{interface_regex}") + route_matches = re.compile(rf"route\s+(?:add|del)\s+{interface_regex}") + iwconfig_matches = re.compile(rf"iwconfig\s+{interface_regex}") + netstat_matches = re.compile(rf"netstat\s+-r\s+{interface_regex}") + ss_matches = re.compile(rf"ss\s+-i\s+{interface_regex}") + + patterns = [ + ifconfig_matches, ip_link_matches, ifup_down_matches, ethtool_matches, + route_matches, iwconfig_matches, netstat_matches, ss_matches + ] + + for p in patterns: + interfaces.update(FileSystemHelper.find_regex(p, self.extracted_fs).keys()) + + bad_prefixes = ["veth", "br"] + bad_vals = ["lo", "set", "add", "del", "route", "show", "addr", "link", "up", "down", + "flush", "help", "default"] + + interfaces = [iface for iface in interfaces if + not any([x in iface for x in bad_vals]) and + not any([iface.startswith(x) for x in bad_prefixes]) and + not iface.isnumeric()] + + result = {} + if len(sys_net_ifaces): + result["sysfs"] = list(sys_net_ifaces) + + if len(interfaces): + result["commands"] = list(interfaces) + + if len(result): + return result diff --git a/pyplugins/static_analysis/kernel_version_finder.py b/pyplugins/static_analysis/kernel_version_finder.py new file mode 100644 index 000000000..41a545619 --- /dev/null +++ b/pyplugins/static_analysis/kernel_version_finder.py @@ -0,0 +1,65 @@ +import os +import re +from penguin.static_plugin import StaticAnalysisPlugin +from penguin.utils import get_available_kernel_versions +from penguin.defaults import DEFAULT_KERNEL + +class KernelVersionFinder(StaticAnalysisPlugin): + """ + Find and select the best kernel version from extracted filesystem. + """ + @staticmethod + def is_kernel_version(name: str) -> bool: + return re.match(r"^\d+\.\d+\.\d+(-[\w\.]+)?$", name) is not None + + @staticmethod + def select_best_kernel(kernel_versions: set[str]) -> str: + if not kernel_versions: + return DEFAULT_KERNEL + + def parse_version(ver): + base = ver.split("-", 1)[0] + return tuple(int(t) for t in base.split(".") if t.isdigit()) + + sorted_versions = sorted(kernel_versions, key=parse_version, reverse=True) + most_recent = sorted_versions[0] + + base_version = most_recent.split("-", 1)[0] + guest_tokens = base_version.split(".") + guest_version = tuple(int(t) for t in guest_tokens if t.isdigit()) + guest_major = guest_version[0] if guest_version else None + + available_versions = get_available_kernel_versions() + + major_matches = [v for v in available_versions if v[0] == guest_major] + + def version_distance(v): + maxlen = max(len(v), len(guest_version)) + v_pad = v + (0,) * (maxlen - len(v)) + g_pad = guest_version + (0,) * (maxlen - len(guest_version)) + return sum(abs(a - b) for a, b in zip(v_pad, g_pad)) + + if major_matches: + best = min(major_matches, key=version_distance) + else: + best = min(available_versions, key=version_distance) + + best_str = ".".join(str(x) for x in best) + return best_str + + def run(self) -> dict[str, list[str] | str]: + potential_kernels = set() + + modules_path = os.path.join(self.extracted_fs, "lib/modules") + if os.path.exists(modules_path): + for d in os.listdir(modules_path): + d_path = os.path.join(modules_path, d) + if os.path.isdir(d_path): + potential_kernels.add(d) + + potential_kernels = {d for d in potential_kernels if self.is_kernel_version(d)} + selected_kernel = self.select_best_kernel(potential_kernels) + return { + "potential_kernels": sorted(potential_kernels), + "selected_kernel": selected_kernel, + } diff --git a/pyplugins/static_analysis/library_symbols.py b/pyplugins/static_analysis/library_symbols.py new file mode 100644 index 000000000..714040887 --- /dev/null +++ b/pyplugins/static_analysis/library_symbols.py @@ -0,0 +1,244 @@ +import os +import struct +import tempfile +import subprocess +from pathlib import Path +from subprocess import check_output, STDOUT, CalledProcessError +from elftools.common.exceptions import ELFError, ELFParseError +from elftools.elf.elffile import ELFFile +from elftools.elf.sections import SymbolTableSection +from penguin.static_plugin import StaticAnalysisPlugin +from penguin.arch import arch_end +from penguin import getColoredLogger + +logger = getColoredLogger("penguin.static_analyses") + +class LibrarySymbols(StaticAnalysisPlugin): + """ + Examine libraries in the filesystem for NVRAM keys and exported symbols. + """ + depends_on = ['ArchId'] + NVRAM_KEYS: list[str] = ["Nvrams", "router_defaults"] + + def run(self) -> dict[str, dict]: + self.archend = arch_end(self.prior_results.get('ArchId')) + + if any([x is None for x in self.archend]): + self.enabled = False + print(f"Warning: Unknown architecture/endianness: {self.archend}. Cannot run NVRAM recovery Static Analysis") + return + + symbols = {} + nvram = {} + sym_paths = {} + + for root, _, files in os.walk(self.extracted_fs): + for file in files: + file_path = Path(root) / file + if file_path.is_file() and \ + (str(file_path).endswith(".so") or ".so." in str(file_path)): + try: + found_nvram, found_syms = self._analyze_library(str(file_path), + self.archend) + except Exception as e: + logger.error( + f"Unhandled exception in _analyze_library for {file_path}: {e}" + ) + continue + tmpless_path = str(file_path).replace(str(self.extracted_fs), "") + sym_paths[tmpless_path] = found_syms + for symname, offset in found_syms.items(): + symbols[(tmpless_path, symname)] = offset + for key, value in found_nvram.items(): + nvram_key = key.rsplit(":", 1)[-1] + nvram[(tmpless_path, nvram_key)] = value + + nvram_values = {} + for (path, key), value in nvram.items(): + if path not in nvram_values: + nvram_values[path] = {} + if key is not None and len(key) and value is not None: + nvram_values[path][key] = value + + return {'nvram': nvram_values, + 'symbols': sym_paths} + + @staticmethod + def _find_symbol_address( + elffile: ELFFile, + symbol_name: str + ) -> tuple[int | None, int | str | None]: + try: + symbol_tables = [ + s + for s in elffile.iter_sections() + if isinstance(s, SymbolTableSection) + ] + except ELFParseError: + return None, None + + for section in symbol_tables: + if symbol := section.get_symbol_by_name(symbol_name): + symbol = symbol[0] + return ( + symbol["st_value"], + symbol["st_shndx"], + ) + return None, None + + @staticmethod + def _get_string_from_address( + elffile: ELFFile, + address: int, + is_64: bool = False, + is_eb: bool = False + ) -> str | None: + for section in elffile.iter_sections(): + start_addr = section["sh_addr"] + end_addr = start_addr + section.data_size + if start_addr <= address < end_addr: + offset_within_section = address - start_addr + data = section.data()[offset_within_section:] + str_end = data.find(b"\x00") + if str_end != -1: + try: + return data[:str_end].decode("utf-8") + except UnicodeDecodeError: + pass + return None + + @staticmethod + def _is_elf(filename: str) -> bool: + try: + with open(filename, "rb") as f: + magic = f.read(4) + return magic == b"\x7fELF" + except IOError: + return False + + @staticmethod + def get_nvram_info( + elf_path: str, + archend: tuple + ) -> dict[str, str | None]: + nvram_data = {} + is_eb = "eb" in archend + is_64 = "64" in archend + with open(elf_path, "rb") as f: + try: + elffile = ELFFile(f) + except ELFError: + if LibrarySymbols._is_elf(elf_path): + logger.warning( + f"Failed to parse {elf_path} as an ELF file when analyzing libraries" + ) + return nvram_data + + for nvram_key in LibrarySymbols.NVRAM_KEYS: + address, section_index = LibrarySymbols._find_symbol_address(elffile, nvram_key) + if address is None: + continue + + if section_index == "SHN_UNDEF": + continue + + try: + section = elffile.get_section(section_index) + except TypeError: + logger.warning( + f"Failed to get section {section_index} for symbol {nvram_key} in {elf_path} when analyzing libraries" + ) + continue + data = section.data() + start_addr = section["sh_addr"] + offset = address - start_addr + + pointer_size = 8 if is_64 else 4 + unpack_format = f"{'>' if is_eb else '<'}{'Q' if is_64 else 'I'}" + + fail_count = 0 + while offset + (pointer_size * 3) < len(data): + ptrs = [ + struct.unpack( + unpack_format, + data[ + offset + i * pointer_size: offset + (i + 1) * pointer_size + ], + )[0] + for i in range(3) + ] + if ptrs[0] != 0: + key = LibrarySymbols._get_string_from_address(elffile, ptrs[0], is_64, is_eb) + val = LibrarySymbols._get_string_from_address(elffile, ptrs[1], is_64, is_eb) + + if ( + key + and not any([x in key for x in ' /\t\n\r<>"']) + and not key[0].isnumeric() + ): + fail_count = 0 + if key not in nvram_data: + nvram_data[key] = val + else: + fail_count += 1 + else: + pass + + if fail_count > 5: + break + + offset += pointer_size * 3 + return nvram_data + + @staticmethod + def _analyze_library( + elf_path: str, + archend: tuple + ) -> tuple[dict, dict]: + + symbols = {} + nvram_data = {} + + try: + with open(elf_path, 'rb') as f: + archive = f.read(8) == b"!\n" + + if archive: + with tempfile.TemporaryDirectory() as temp_dir: + subprocess.run(["ar", "x", elf_path], cwd=temp_dir, check=True) + for obj_file in os.listdir(temp_dir): + obj_path = os.path.join(temp_dir, obj_file) + found_nvram, found_syms = LibrarySymbols._analyze_library(obj_path, archend) + archive_key = f"{os.path.basename(elf_path)}:{obj_file}" + symbols.update({f"{archive_key}:{k}": v for k, v in found_syms.items()}) + nvram_data.update({f"{archive_key}:{k}": v for k, v in found_nvram.items()}) + return nvram_data, symbols + except CalledProcessError as e: + logger.error(f"Error processing archive {elf_path}: {e.output.decode('utf-8', errors='ignore')}") + + try: + if nm_out := check_output(["nm", "-D", "--defined-only", elf_path], + stderr=STDOUT): + for line in nm_out.decode("utf8", errors="ignore").split("\n"): + if line: + parts = line.split() + if len(parts) == 3: + addr, _, name = parts + if '@' in name: + name = name.split("@")[0] + addr = int(addr, 16) + if addr != 0: + symbols[name] = addr + elif line.strip().endswith("no symbols"): + continue + else: + logger.warning(f"Unexpected nm output format: {line}") + except CalledProcessError as e: + if LibrarySymbols._is_elf(elf_path): + logger.error(f"Error running nm on {elf_path}: {e.output.decode('utf-8', errors='ignore')}") + return nvram_data, symbols + + if any(sym in symbols for sym in LibrarySymbols.NVRAM_KEYS): + nvram_data = LibrarySymbols.get_nvram_info(elf_path, archend) + + return nvram_data, symbols diff --git a/pyplugins/static_analysis/pseudofile_finder.py b/pyplugins/static_analysis/pseudofile_finder.py new file mode 100644 index 000000000..40d404d7e --- /dev/null +++ b/pyplugins/static_analysis/pseudofile_finder.py @@ -0,0 +1,216 @@ +import os +import re +from penguin.static_plugin import StaticAnalysisPlugin + +class PseudofileFinder(StaticAnalysisPlugin): + """ + Find device and proc pseudofiles in the extracted filesystem. + """ + IGLOO_ADDED_DEVICES: list[str] = [ + "autofs", "btrfs-control", "cfs0", "cfs1", "cfs2", "cfs3", + "cfs4", "console", "cpu_dma_latency", "full", "fuse", "input", "kmsg", + "loop-control", "loop0", "loop1", "loop2", "loop3", "loop4", + "loop5", "loop6", "loop7", "mem", "memory_bandwidth", "mice", "net", + "network_latency", "network_throughput", "null", "port", "ppp", + "psaux", "ptmx", "pts", "ptyp0", "ptyp1", "ptyp2", "ptyp3", "ptyp4", + "ptyp5", "ptyp6", "ptyp7", "ptyp8", "ptyp9", "ptypa", "ptypb", + "ptypc", "ptypd", "ptype", "ptypf", "ram", "ram0", "ram1", "ram10", + "ram11", "ram12", "ram13", "ram14", "ram15", "ram2", "ram3", + "ram4", "ram5", "ram6", "ram7", "ram8", "ram9", "random", "root", + "tty", "tty0", "tty1", "tty10", "tty11", "tty12", "tty13", + "tty14", "tty15", "tty16", "tty17", "tty18", "tty19", "tty2", + "tty20", "tty21", "tty22", "tty23", "tty24", "tty25", "tty26", + "tty27", "tty28", "tty29", "tty3", "tty30", "tty31", "tty32", + "tty33", "tty34", "tty35", "tty36", "tty37", "tty38", "tty39", + "tty4", "tty40", "tty41", "tty42", "tty43", "tty44", "tty45", + "tty46", "tty47", "tty48", "tty49", "tty5", "tty50", "tty51", + "tty52", "tty53", "tty54", "tty55", "tty56", "tty57", "tty58", + "tty59", "tty6", "tty60", "tty61", "tty62", "tty63", "tty7", + "tty8", "tty9", + "ttyS0", "ttyS1", "ttyS2", "ttyS3", + "ttyp0", + "ttyp1", "ttyp2", "ttyp3", "ttyp4", "ttyp5", "ttyp6", "ttyp7", + "ttyp8", "ttyp9", "ttypa", "ttypb", "ttypc", "ttypd", "ttype", + "ttypf", "tun", "urandom", "vcs", "vcs1", "vcsa", "vcsa1", "vda", + "vga_arbiter", "vsock", "zero", + "root", "pts", # Added in init + "ttyAMA0", "ttyAMA1", # ARM + "stdin", "stdout", "stderr", # Symlinks to /proc/self/fd/X + ] + + IGLOO_PROCFS: list[str] = [ + "buddyinfo", + "cgroups", + "cmdline", + "config.gz", + "consoles", + "cpuinfo", + "crypto", + "devices", + "diskstats", + "execdomains", + "fb", + "filesystems", + "interrupts", + "iomem", + "ioports", + "kallsyms", + "key-users", + "keys", + "kmsg", + "kpagecount", + "kpageflags", + "loadavg", + "locks", + "meminfo", + "misc", + "modules", + "mounts", + "mtd", # We might shadow this later intentionally, but not by default + "net", + "pagetypeinfo", + "partitions", + "penguin_net", # This is custom and unique but we shouldn't ever shadow it + "sched_debug", + "slabinfo", + "softirqs", + "stat", + "swaps", + "sysrq-trigger", + "thread-self", + "timer_list", + "uptime", + "version", + "vmallocinfo", + "vmstat", + "zoneinfo", + + # Directories + "bus", + "bus/pci", + "bus/pci/00", + "bus/pci/00/00.0", + "bus/pci/00/0a.0", + "bus/pci/00/0a.1 ", + "bus/pci/00/0a.2 ", + "bus/pci/00/0a.3 ", + "bus/pci/00/0b.0 ", + "bus/pci/00/12.0 ", + "bus/pci/00/13.0 ", + "bus/pci/00/14.0 ", + "bus/pci/devices ", + "bus/input", + "bus/input/devices", + "bus/input/handlers", + + "cpu", + "cpu/alignment", + + "driver", + "driver/rtc", + + "fs", + "fs/afs", + "fs/afs/cells", + "fs/afs/rootcell", + "fs/ext4", + "fs/f2fs", + "fs/jbd2", + "fs/nfsd", + "fs/lockd", + "fs/lockd/nlm_end_grace", + "fs/nfsfs", + "fs/nfsfs/servers", + "fs/nfsfs/volumes", + + # Sys is special, loaded dynamically + + + # sysvipc, driver (empty), scsi, tty, sys (big), irq (numbers), bus, fs + "sysvipc/shm", + "sysvipc/sem", + "sysvipc/msg", + + "scsi/device_info", + "scsi/scsi", + + "tty/drivers", + "tty/ldisc", + "tty/driver", + "tty/driver/serial", + "tty/ldisc", + ] + + PROC_IGNORE: list[str] = ["irq", "self", "PID", "device-tree", "net", "vmcore"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Load resources... For static plugins we'll need to figure out the right path. + # Assuming penguin module's resources dir: + import penguin + resources = os.path.join(os.path.dirname(penguin.__file__), "resources") + proc_sys_txt = os.path.join(resources, "proc_sys.txt") + if os.path.exists(proc_sys_txt): + with open(proc_sys_txt, "r") as f: + for line in f.readlines(): + self.IGLOO_PROCFS.append(line.strip()) + + def _filter_files( + self, + extract_dir: str, + pattern: re.Pattern, + ignore_list: list[str], + remove_list: list[str] + ) -> list[str]: + from .file_system_helper import FileSystemHelper + found_files = list(FileSystemHelper.find_regex(pattern, extract_dir).keys()) + + filtered_files = [] + for x in found_files: + for f in ignore_list: + if x == f or x.startswith(f + "/"): + break + else: + filtered_files.append(x) + + for f in remove_list: + if f in filtered_files: + filtered_files.remove(f) + + directories_to_remove = { + "/".join(k.split("/")[:i + 1]) + for k in filtered_files + for i in range(len(k.split("/")[:-1])) + } + + return [k for k in filtered_files if k not in directories_to_remove] + + def run(self) -> dict[str, list[str]]: + dev_pattern = re.compile(r"/dev/([a-zA-Z0-9_/]+)", re.MULTILINE) + proc_pattern = re.compile(r"/proc/([a-zA-Z0-9_/]+)", re.MULTILINE) + + dev_files = self._filter_files( + self.extracted_fs, dev_pattern, [], self.IGLOO_ADDED_DEVICES + ) + + proc_files = self._filter_files( + self.extracted_fs, proc_pattern, self.PROC_IGNORE, self.IGLOO_PROCFS + ) + + return { + "dev": [f"/dev/{x}" for x in dev_files], + "proc": [f"/proc/{x}" for x in proc_files], + } + + @staticmethod + def _get_devfiles_in_fs(extracted_dir: str) -> list[str]: + dev_dir = os.path.join(extracted_dir, "dev") + results = [] + + if os.path.exists(dev_dir): + for root, _, files in os.walk(dev_dir): + for f in files: + relative_path = os.path.join("/dev", os.path.relpath(os.path.join(root, f), dev_dir)) + results.append(relative_path) + + return results diff --git a/src/penguin/config_patchers.py b/src/penguin/config_patchers.py deleted file mode 100644 index 2cee964d9..000000000 --- a/src/penguin/config_patchers.py +++ /dev/null @@ -1,1670 +0,0 @@ -""" -penguin.config_patchers -======================= - -Configuration patch generation utilities for the Penguin emulation environment. - -This module provides classes and helpers for generating configuration patches, -handling static and dynamic pseudofiles, network devices, library injection, -NVRAM defaults, and other config modifications. -""" - -import os -import re -import stat -import subprocess -import tarfile -from elftools.common.exceptions import ELFError -from elftools.elf.elffile import ELFFile - -from abc import ABC, abstractmethod -from collections import defaultdict -from pathlib import Path - -from penguin import getColoredLogger -from .arch import arch_filter, arch_end -from .defaults import ( - default_init_script, - default_lib_aliases, - default_netdevs, - default_plugins, - expert_knowledge_pseudofiles, - default_libinject_string_introspection, - static_dir as STATIC_DIR -) -from .utils import get_arch_subdir - -logger = getColoredLogger("penguin.config_patchers") - -RESOURCES: str = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources") - - -class PatchGenerator(ABC): - def __init__(self) -> None: - self.enabled: bool = True - self.patch_name: str | None = None - - @abstractmethod - def generate(self, patches: dict) -> dict | None: - """ - Generate a patch dictionary. - - :param patches: Existing patches dictionary. - :type patches: dict - :return: Patch dictionary or None. - :rtype: dict or None - """ - raise NotImplementedError("Subclasses should implement this method") - - -class TarHelper: - ''' - Collection of static method to help find files in a tar archive - ''' - @staticmethod - def get_symlink_members(tarfile_path: str) -> dict[str, str]: - with tarfile.open(tarfile_path, "r") as tar: - # Trim leading . from path, everything is ./ - return { - member.name[1:]: member.linkname - for member in tar.getmembers() - if member.issym() - } - - @staticmethod - def get_all_members(tarfile_path: str): - with tarfile.open(tarfile_path, "r") as tar: - # Trim leading . from path, everything is ./ - # return {member.name[1:] for member in tar.getmembers()} - return tar.getmembers() - - @staticmethod - def get_other_members(tarfile_path: str): - # Get things that aren't files nor directories - devices, symlinnks, etc - with tarfile.open(tarfile_path, "r") as tar: - # Trim leading . from path, everything is ./ - return { - member.name[1:] - for member in tar.getmembers() - if not member.isfile() and not member.isdir - } - - @staticmethod - def get_directory_members(tarfile_path: str) -> set[str]: - with tarfile.open(tarfile_path, "r") as tar: - # Trim leading . from path, everything is ./ - results = {member.name[1:] for member in tar.getmembers() if member.isdir()} - # For each result, recursively add all parent directories - # e.g., /etc/hosts -> /etc, / - for r in list(results): - parts = r.split("/") - for i in range(len(parts)): - results.add("/".join(parts[: i + 1])) - return results - - @staticmethod - def get_file_members(tarfile_path: str) -> set[str]: - with tarfile.open(tarfile_path, "r") as tar: - # Trim leading . from path, everything is ./ - return {member.name[1:] for member in tar.getmembers() if member.isfile()} - - -class FileHelper: - @staticmethod - def find_executables(tmp_dir: str, target_dirs: set[str] | None = None): - if not target_dirs: - target_dirs = {"/"} - for root, _, files in os.walk(tmp_dir): - # Exclude the '/igloo' path - if "/igloo" in root: - continue - - for file in files: - file_path = Path(root) / file - # Check if the file is executable and in one of the target directories - if ( - file_path.is_file() - and os.access(file_path, os.X_OK) - and any(str(file_path).endswith(d) for d in target_dirs) - ): - yield file_path - - @staticmethod - def find_strings_in_file(file_path: str, pattern: str) -> list[str]: - result = subprocess.run(["strings", file_path], capture_output=True, text=True) - return [line for line in result.stdout.splitlines() if re.search(pattern, line)] - - @staticmethod - def find_shell_scripts(tmp_dir: str): - for root, _, files in os.walk(tmp_dir): - # Exclude the '/igloo' path - if "/igloo" in root: - continue - - for file in files: - file_path = Path(root) / file - # Check if the file is executable and in one of the target directories - if ( - file_path.is_file() - and os.access(file_path, os.X_OK) - and str(file_path).endswith(".sh") - ): - yield file_path - - @staticmethod - def exists(tmp_dir: str, target: str) -> bool: - """ - Check if the target exists within the extracted filesystem in tmp_dir, - handling symlinks correctly. - - :param tmp_dir: The root of the extracted filesystem (e.g., /tmp/extracted) - :type tmp_dir: str - :param target: The target path to check (e.g., /foo/zoo) - :type target: str - :return: True if the target exists within tmp_dir, False otherwise - :rtype: bool - """ - assert target.startswith("/") - assert os.path.exists(tmp_dir) - - # Strip the leading slash from the target to work with relative paths - target = target[1:] # Remove leading '/' - parts = target.split("/") - - # Initialize path traversal from tmp_dir - current_path = tmp_dir - - for part in parts: - next_path = os.path.join(current_path, part) - - if os.path.islink(next_path): - # Resolve symlink - resolved = os.readlink(next_path) - - # If symlink is absolute, restart from tmp_dir - if resolved.startswith("/"): - current_path = os.path.realpath(os.path.join(tmp_dir, resolved[1:])) - else: - # Resolve relative symlink against the current path - current_path = os.path.realpath(os.path.join(current_path, resolved)) - else: - # Move one level deeper in the path - current_path = next_path - - # If the resolved path doesn't exist at any point, return False - if not os.path.exists(current_path): - return False - - # Final check: Ensure the fully resolved path exists - return os.path.exists(current_path) - - -class NvramHelper: - @staticmethod - def _get_default_nvram_values() -> dict[str, str]: - """ - Default nvram values from Firmadyne and FirmAE. - - :return: Dictionary of default NVRAM values. - :rtype: dict[str, str] - """ - nvram = { - "console_loglevel": "7", - "restore_defaults": "1", - "sku_name": "", - "wla_wlanstate": "", - "lan_if": "br0", - "lan_ipaddr": "192.168.0.50", - "lan_bipaddr": "192.168.0.255", - "lan_netmask": "255.255.255.0", - "time_zone": "PST8PDT", - "wan_hwaddr_def": "01:23:45:67:89:ab", - "wan_ifname": "eth0", - "lan_ifnames": "eth1 eth2 eth3 eth4", - "ethConver": "1", - "lan_proto": "dhcp", - "wan_ipaddr": "0.0.0.0", - "wan_netmask": "255.255.255.0", - "wanif": "eth0", - "time_zone_x": "0", - "rip_multicast": "0", - "bs_trustedip_enable": "0", - "et0macaddr": "01:23:45:67:89:ab", - "filter_rule_tbl": "", - "pppoe2_schedule_config": "127:0:0:23:59", - "schedule_config": "127:0:0:23:59", - "access_control_mode": "0", - "fwpt_df_count": "0", - "static_if_status": "1", - "www_relocation": "", - } - - # Helper function add default entries from firmae - def _add_firmae_for_entries(config_dict, pattern, value, start, end): - for index in range(start, end + 1): - config_dict[pattern % index] = value - - # TODO: do we want a config toggle for these entires seprately from the other defaults? - _add_firmae_for_entries( - nvram, - "usb_info_dev%d", - "A200396E0402FF83@1@14.4G@U@1@USB_Storage;U:;0;0@", - 0, - 101, - ) - _add_firmae_for_entries(nvram, "wla_ap_isolate_%d", "", 1, 5) - _add_firmae_for_entries(nvram, "wlg_ap_isolate_%d", "", 1, 5) - _add_firmae_for_entries(nvram, "wlg_allow_access_%d", "", 1, 5) - _add_firmae_for_entries(nvram, "%d:macaddr", "01:23:45:67:89:ab", 0, 3) - _add_firmae_for_entries(nvram, "lan%d_ifnames", "", 1, 10) - - return nvram - - @staticmethod - def parse_nvram_file(path: str, f) -> dict: - """ - Parse a NVRAM file and return key-value pairs. - - :param path: Path to NVRAM file. - :type path: str - :param f: File object. - :return: Dictionary of key-value pairs. - :rtype: dict - """ - file_content = f.read() - key_val_pairs = file_content.split(b"\x00") - results_null = {} - results_lines = {} - - # print(f"Parsing potential nvram file {path}") - # print(f"Found {len(key_val_pairs)} null terminators pairs vs {len(file_content.splitlines())} lines") - - for pair in key_val_pairs[:-1]: # Exclude the last split as it might be empty - try: - key, val = pair.split(b"=", 1) - # It's safe to set val as a stirng, even when it's an int - if key.startswith(b"#"): - continue - results_null[key] = val - except ValueError: - logger.warning(f"could not process default nvram file {path} for {pair}") - continue - - # Second pass, if there are a lot of lines, let's try that way - for line in file_content.split(b"\n"): - if line.startswith(b"#"): - continue - if b"=" not in line: - continue - key, val = line.split(b"=", 1) - results_lines[key] = val - - # Do we have more results in one than the other? Either should have at least 5 for us to have any confidence - if len(results_null) > 5 and len(results_null) > len(results_lines): - return results_null - elif len(results_lines) > 5 and len(results_lines) > len(results_null): - return results_lines - else: - return {} - - @staticmethod - def nvram_config_analysis(fs_path: str, full_path: bool = True) -> dict[str, str]: - # Nvram source 2: standard nvram paths with plaintext data - # If we have a hit, we combine with any existing values - # These are notionally sorted - if an earlier path provides a value, we won't clobber - # but we will consume keys from all paths that we can find and parse - # If full_path, we check the whole path, otherwise just the basename - nvram_paths = [ - "./var/etc/nvram.default", - "./etc/nvram.default", - "./etc/nvram.conf", - "./etc/nvram.deft", - "./etc/nvram.update", - "./etc/wlan/nvram_params", - "./etc/system_nvram_defaults", - "./image/mnt/nvram_ap.default", - "./etc_ro/Wireless/RT2860AP/RT2860_default_vlan", - "./etc_ro/Wireless/RT2860AP/RT2860_default_novlan", - "./image/mnt/nvram_whp.default", - "./image/mnt/nvram_rt.default", - "./image/mnt/nvram_rpt.default", - "./image/mnt/nvram.default", - ] - nvram_basenames = set([os.path.basename(x) for x in nvram_paths]) - - path_nvrams = {} - # XXX: Should we store the source filename somewhere? Maybe - # move this to a static analysis that spits out more verbose data - # and then only some turns into a config patch? - if full_path: - # Check the exact paths - for path in nvram_paths: - abs_path = os.path.join(fs_path, path.lstrip("/")) - if os.path.exists(abs_path): - # Found a default nvram file, parse it - with open(abs_path, "rb") as f: - result = NvramHelper.parse_nvram_file(path, f) - # result is key -> value. We want to store path as well - for k, v in result.items(): - path_nvrams[k.decode()] = v.decode() - else: - # Check every file to see if it has a matching basename - for root, _, files in os.walk(fs_path): - for file in files: - abs_path = os.path.join(root, file) - rel_path = "./" + os.path.relpath(abs_path, fs_path) - - if rel_path in nvram_paths: - # Exact match - we already checked this - continue - - if any(file == fname for fname in nvram_basenames): - # Found a matching basename, parse the file - with open(abs_path, "rb") as f: - result = NvramHelper.parse_nvram_file(rel_path, f) - for k, v in result.items(): - path_nvrams[k.decode()] = v.decode() - - return path_nvrams - - -class BasePatch(PatchGenerator): - ''' - Generate base config for static_files and default plugins - ''' - UNKNOWN_INIT: str = "UNKNOWN_FIX_ME" - - def __init__(self, arch_info: str, inits: list, kernel_versions: dict) -> None: - self.patch_name = "base" - self.enabled = True - - self.set_arch_info(arch_info) - self.kernel_versions = kernel_versions - - if len(inits): - self.igloo_init = inits[0] - else: - self.igloo_init = self.UNKNOWN_INIT - logger.warning("Failed to find any init programs - config will need manual refinement") - - def set_arch_info(self, arch_identified: str) -> None: - ''' - Set architecture info for config patch. - - :param arch_identified: Identified architecture string. - :type arch_identified: str - ''' - # TODO: should we allow a config to be generated for an unsupported architecture? - # For example, what if we're wrong and a user wants to customize this. - arch, endian = arch_end(arch_identified) - if arch is None: - raise NotImplementedError(f"Architecture {arch_identified} not supported ({arch}, {endian})") - - # Map architecture names to config schema valid names - if arch == "aarch64": - self.arch_name = "aarch64" - elif arch == "intel64": - self.arch_name = "intel64" - elif arch == "loongarch64": - self.arch_name = "loongarch64" - elif arch == "riscv64": - self.arch_name = "riscv64" - elif arch == "powerpc": - self.arch_name = "powerpc" - elif arch == "powerpc64": - if endian == "el": - self.arch_name = "powerpc64le" # powerpc64el -> powerpc64le for config schema - else: - self.arch_name = "powerpc64" # powerpc64eb -> powerpc64 - else: - # For architectures like mips with endianness, construct the name - self.arch_name = arch + endian - - mock_config = {"core": {"arch": self.arch_name}} - self.arch_dir = get_arch_subdir(mock_config) - - if arch_identified == "aarch64": - self.dylib_dir = "arm64" - elif arch_identified == "intel64": - self.dylib_dir = "x86_64" - elif arch_identified == "loongarch64": - self.dylib_dir = "loongarch" - elif "powerpc" in self.arch_name: - self.dylib_dir = self.arch_name.replace("powerpc", "ppc") # dylibs are built with short names - else: - self.dylib_dir = self.arch_dir - - def generate(self, patches: dict) -> dict: - # Add serial device in pseudofiles - # This is because arm uses ttyAMA (major 204) and mips uses ttyS (major 4). - # XXX: For mips we use major 4, minor 65. For arm we use major 204, minor 65. - # For powerpc: major 229, minor 1 (hvc1) - if 'mips' in self.arch_name or self.arch_name == "intel64": - igloo_serial_major = 4 - igloo_serial_minor = 65 - elif self.arch_name in ['armel', 'aarch64']: - igloo_serial_major = 204 - igloo_serial_minor = 65 - elif "powerpc" in self.arch_name: - igloo_serial_major = 229 - igloo_serial_minor = 1 - elif self.arch_name == "loongarch64": - igloo_serial_major = 4 - igloo_serial_minor = 65 - else: - igloo_serial_major = 204 - igloo_serial_minor = 65 - - result = { - "core": { - "arch": self.arch_name, - "kernel": self.kernel_versions["selected_kernel"], - }, - "env": { - "igloo_init": self.igloo_init, - }, - "pseudofiles": { - # Ensure guest can't interfere with our 2nd serial console - make it a null device - "/dev/ttyS1": { - "read": { - "model": "zero", - }, - "write": { - "model": "discard", - }, - "ioctl": { - "*": { - "model": "return_const", - "val": 0, - } - } - }, - "/dev/ttyAMA1": { - "read": { - "model": "zero", - }, - "write": { - "model": "discard", - }, - "ioctl": { - "*": { - "model": "return_const", - "val": 0, - } - } - } - }, - "static_files": { - "/igloo/init": { - "type": "inline_file", - "contents": default_init_script, - "mode": 0o111, - }, - "/igloo/utils/sh": { - "type": "symlink", - "target": "/igloo/utils/busybox", - }, - "/igloo/utils/sleep": { - "type": "symlink", - "target": "/igloo/utils/busybox", - }, - # Add ltrace prototype files. They go in /igloo/ltrace because /igloo is treated as ltrace's /usr/share, and the files are normally in /usr/share/ltrace. - "/igloo/ltrace/*": { - "type": "host_file", - "mode": 0o444, - "host_path": os.path.join(*[STATIC_DIR, "ltrace", "*"]), - }, - - # Dynamic libraries - "/igloo/dylibs/*": { - "type": "host_file", - "mode": 0o755, - "host_path": os.path.join(STATIC_DIR, "dylibs", self.dylib_dir or self.arch_dir, "*"), - }, - - # Startup scripts - "/igloo/source.d/*": { - "type": "host_file", - "mode": 0o755, - "host_path": os.path.join(*[RESOURCES, "source.d", "*"]), - }, - "/igloo/serial": { - "type": "dev", - "devtype": "char", - "major": igloo_serial_major, - "minor": igloo_serial_minor, - "mode": 0o666, - } - }, - "plugins": default_plugins, - } - - # Always add our utilities into static files - guest_scripts_dir = os.path.join(STATIC_DIR, "guest-utils", "scripts") - for f in os.listdir(guest_scripts_dir): - result["static_files"][f"/igloo/utils/{f}"] = { - "type": "host_file", - "host_path": f"{guest_scripts_dir}/{f}", - "mode": 0o755, - } - result["static_files"]["/igloo/utils/*"] = { - "type": "host_file", - "host_path": f"{STATIC_DIR}/{self.arch_dir}/*", - "mode": 0o755, - } - - return result - - -class RootShell(PatchGenerator): - ''' - Add root shell - ''' - def __init__(self) -> None: - self.patch_name = "root_shell" - self.enabled = False - - def generate(self, patches: dict) -> dict: - return { - "core": { - "root_shell": False, - }, - } - - -class DynamicExploration(PatchGenerator): - ''' - We are dynamically evaluating and refining a configuration. We need - to collect data programatically. Disable root shell, enable - coverage-tracking and nmap for coverage generation. Enable VPN - so nmap has something to talk to. - - Ideally this will also be paired with ShimBusybox to get shell-level - instrumentation. - ''' - def __init__(self) -> None: - self.patch_name = "auto_explore" - self.enabled = False - - def generate(self, patches: dict) -> dict: - return { - "core": { - "root_shell": False, - }, - "plugins": { - "nmap": { - "enabled": True, - }, - "vpn": { - "enabled": True, - "log": True, - }, - "netbinds": - { - "enabled": True, - "shutdown_on_www": False, - }, - } - } - - -class SingleShotFICD(PatchGenerator): - ''' - We are doing a single-shot, automated evaluation. Disable root shell, - but keep VPN on and measure FICD - ''' - def __init__(self) -> None: - self.patch_name = "single_shot_ficd" - self.enabled = False - - def generate(self, patches: dict) -> dict: - return { - "core": { - "root_shell": False, - }, - "plugins": { - "nmap": { - "enabled": False, - }, - "vpn": { - "enabled": True, - }, - "netbinds": - { - "enabled": True, - "shutdown_on_www": False, # FICD or www success results in shutdown - }, - "ficd": { - "enabled": True, - "stop_on_if": True, - }, - "fetch_web": { - "enabled": True, - "shutdown_after_www": True, # FICD or www success results in shutdown - }, - } - } - - -class SingleShot(PatchGenerator): - ''' - We are doing a single-shot, automated evaluation. Disable root shell, - leave coverage/nmap, but keep VPN on and use fetch_web to collect responses - ''' - def __init__(self) -> None: - self.patch_name = "single_shot" - self.enabled = False - - def generate(self, patches: dict) -> dict: - return { - "core": { - "root_shell": False, - }, - "plugins": { - "nmap": { - "enabled": False, - }, - "vpn": { - "enabled": True, - }, - "netbinds": - { - "enabled": True, - "shutdown_on_www": False, # We want fetch_web to do the shutdown - }, - "fetch_web": { - "enabled": True, - "shutdown_after_www": True, - }, - - } - } - - -class ManualInteract(PatchGenerator): - ''' - Interactive for manual exploration. Enable root shell, enable - vpn. Do not terminate on www bind. - ''' - def __init__(self) -> None: - self.patch_name = "manual" - self.enabled = True - - def generate(self, patches: dict) -> dict: - return { - "core": { - "root_shell": True - }, - "plugins": { - "nmap": { - "enabled": False, - }, - "vpn": { - "enabled": True, - }, - "netbinds": - { - "enabled": True, - "shutdown_on_www": False, - }, - - } - } - - -class NetdevsDefault(PatchGenerator): - ''' - Add list of default network device names. - ''' - def __init__(self) -> None: - self.enabled = True - self.patch_name = "netdevs.default" - - def generate(self, patches: dict) -> dict: - return {'netdevs': default_netdevs} - - -class NetdevsTailored(PatchGenerator): - ''' - Add list of network device names observed in static analysis. - ''' - def __init__(self, netdevs: dict) -> None: - self.enabled = True - self.patch_name = "netdevs.dynamic" - self.netdevs = netdevs - - def generate(self, patches: dict) -> dict | None: - values = set() - if not self.netdevs: - return - for src, devs in self.netdevs.items(): - values.update(devs) - if len(values): - return {'netdevs': sorted(list(values))} - - -class PseudofilesExpert(PatchGenerator): - ''' - Fixed set of pseudofile models from FirmAE. - ''' - def __init__(self) -> None: - self.enabled = True - self.patch_name = "pseudofiles.expert_knowledge" - - def generate(self, patches: dict) -> dict: - return {'pseudofiles': expert_knowledge_pseudofiles} - - -class LibInjectSymlinks(PatchGenerator): - ''' - Detect the ABI of all libc.so files and place a symlink in the same - directory to lib_inject of the same ABI. - ''' - def __init__(self, filesystem_root_path: str) -> None: - self.enabled = True - self.patch_name = 'lib_inject.core' - self.filesystem_root_path = filesystem_root_path - - def generate(self, patches: dict) -> dict: - libc_paths = [] - result = defaultdict(dict) - - # Walk through the filesystem root to find all "libc.so" files - for root, dirs, files in os.walk(self.filesystem_root_path): - for filename in files: - if filename.startswith("libc.so"): - libc_paths.append(Path(os.path.join(root, filename))) - - # Iterate over the found libc.so files to generate symlinks based on ABI - for p in libc_paths: - if not os.path.isfile(p) or (os.path.islink(p) and not os.path.exists(p)): - # Skip broken symlinks - continue - - with open(p, 'rb') as file: - try: - e = ELFFile(file) - except ELFError: - # Not an ELF. It could be, for example, a GNU ld script. - continue - - # Assume `arch_filter` is a function that extracts the ABI from an ELF file. - abi = arch_filter(e).abi - - # Ensure dest starts with a / - dest = Path("/") / \ - p.relative_to(self.filesystem_root_path).parent / \ - "lib_inject.so" - - result["static_files"][str(dest)] = { - "type": "symlink", - "target": f"/igloo/lib_inject_{abi}.so", - } - - if len(result.get("static_files", [])): - # LD_PRELOAD if we set any symlinks - result["env"] = {"LD_PRELOAD": "lib_inject.so"} - - return result - - -class LibInjectStringIntrospection(PatchGenerator): - ''' - Add LibInject aliases for string introspection (e.g., for comparison detection). - For each method we see in the filesystem that's in our list of shim targets, add the shim - ''' - def __init__(self, library_info: dict) -> None: - self.enabled = True - self.patch_name = 'lib_inject.string_introspection' - self.library_info = library_info - - def generate(self, patches: dict) -> dict: - aliases = {} - for _, exported_syms in self.library_info.get("symbols", {}).items(): - for sym in exported_syms: - if sym in default_libinject_string_introspection: - aliases[sym] = default_libinject_string_introspection[sym] - - return {'lib_inject': {'aliases': aliases}} - - -class LibInjectTailoredAliases(PatchGenerator): - ''' - Set default aliases in libinject based on library analysis. If one of the defaults - is present in a library, we'll add it to the libinject alias list - ''' - - def __init__(self, library_info: dict) -> None: - self.enabled = True - self.patch_name = 'lib_inject.dynamic_models' - self.library_info = library_info - self.unmodeled = set() - - def generate(self, patches: dict) -> dict | None: - aliases = {} - - # Only copy values from our defaults if we see that same symbol exported - for _, exported_syms in self.library_info.get("symbols", {}).items(): - for sym in exported_syms: - if sym in default_lib_aliases: - aliases[sym] = default_lib_aliases[sym] - elif "nvram" in sym and sym not in self.unmodeled: - self.unmodeled.add(sym) - - if len(self.unmodeled): - logger.info(f"Detected {len(self.unmodeled)} unmodeled symbols around nvram. You may wish to create libinject models for these:") - for sym in self.unmodeled: - logger.info(f"\t{sym}") - - if len(aliases): - return {'lib_inject': {'aliases': aliases}} - - -class LibInjectFixedAliases(PatchGenerator): - ''' - Set all aliases in libinject from our defaults. - ''' - def __init__(self) -> None: - self.enabled = False - self.patch_name = 'lib_inject.fixed_models' - - def generate(self, patches: dict) -> dict: - return {'lib_inject': {'aliases': default_lib_aliases}} - - -""" -class LibInjectJITAliases(PatchGenerator): - ''' - For nvram methods that we don't have shims for, try throwing some defaults - based on symbol names. This is probably going to break things but could be interesting - ''' - def __init__(self, library_info): - self.enabled = True - self.patch_name = 'lib_inject.jit_models' - self.library_info = library_info - self.unmodeled = set() - - def generate(self, patches): - aliases = {} - - # Only copy values from our defaults if we see that same symbol exported - for _, exported_syms in self.library_info.get("symbols", {}).items(): - for sym in exported_syms: - if "nvram" in sym and sym not in default_lib_aliases: - if "_get" in sym: - target = "libinject_nvram_get" - elif "_set" in sym: - target = "libinject_nvram_get" - else: - target = "libinject_ret_0" - aliases[sym] = target - logger.info(f"\tJIT mapping {sym} -> {target}") - - if len(aliases): - return {'lib_inject': {'aliases': aliases}} -""" - - -class ForceWWW(PatchGenerator): - ''' - This is a hacky FirmAE approach to identify webservers and just start - them. Unsurprisingly, it increases the rate of web servers starting. - We'll export this into our static files section so we could later decide - to try it. We'll enable this by default here. - ''' - def __init__(self, fs_path: str) -> None: - self.enabled = False - self.patch_name = 'force_www' - self.fs_path = fs_path - - def generate(self, patches: dict) -> dict | None: - # Map between filename and command - file2cmd = { - "./etc/init.d/uhttpd": "/etc/init.d/uhttpd start", - "./usr/bin/httpd": "/usr/bin/httpd", - "./usr/sbin/httpd": "/usr/sbin/httpd", - "./bin/goahead": "/bin/goahead", - "./bin/alphapd": "/bin/alphapd", - "./bin/boa": "/bin/boa", - "./usr/sbin/lighttpd": "/usr/sbin/lighttpd -f /etc/lighttpd/lighttpd.conf", - } - - www_cmds = [] - www_paths = [] - - # Do we have lighttpd.conf? - have_lighttpd_conf = os.path.isfile(os.path.join(self.fs_path, "./etc/lighttpd/lighttpd.conf")) - - for file, cmd in file2cmd.items(): - if os.path.isfile(os.path.join(self.fs_path, file)): - if file == "./usr/sbin/lighttpd" and not have_lighttpd_conf: - # Lighttpd only valid if there's a config file - continue - www_cmds.append(cmd) - www_paths.append(file) - - if not len(www_cmds): - return - - # Start of the shell script - # We want to start each identified webserver in a loop - cmd_str = """#!/igloo/utils/sh - /igloo/utils/busybox sleep 120 - - while true; do - """ - - # Loop through the commands to add them to the script - for cmd in www_cmds: - cmd_str += f""" - if ! (/igloo/utils/busybox ps | /igloo/utils/busybox grep -v grep | /igloo/utils/busybox grep -sqi "{cmd}"); then - {cmd} & - fi - """ - # Close the loop - cmd_str += """ - /igloo/utils/busybox sleep 30 - done - """ - - return { - "core": { - 'force_www': True - }, - "static_files": { - "/igloo/utils/www_cmds": { - "type": "inline_file", - "contents": cmd_str, - "mode": 0o755, - } - } - } - - -class GenerateMissingDirs(PatchGenerator): - ''' - Examine the fs archive to identify missing directories - We ignore the extracted filesystem because we want to - ensure symlinks are handled correctly - ''' - TARGET_DIRECTORIES: list[str] = [ - "/proc", - "/etc_ro", - "/tmp", - "/var", - "/run", - "/sys", - "/root", - "/tmp/var", - "/tmp/media", - "/tmp/etc", - "/tmp/var/run", - "/tmp/home", - "/tmp/home/root", - "/tmp/mnt", - "/tmp/opt", - "/tmp/www", - "/var/run", - "/var/lock", - "/usr/bin", - "/usr/sbin", - ] - - def __init__(self, archive_path: str, archive_files: list) -> None: - self.patch_name = "static.missing_dirs" - self.enabled = True - self.archive_path = archive_path - self.archive_files = {member.name[1:] for member in archive_files} - - @staticmethod - def _resolve_path(d: str, symlinks: dict, depth: int = 0) -> str: - parts = d.split("/") - for i in range(len(parts), 1, -1): - sub_path = "/".join(parts[:i]) - if sub_path in symlinks: - if depth > 10 or d == symlinks[sub_path]: - logger.warning(f"Symlink loop detected for {d}") - return d - return GenerateMissingDirs._resolve_path( - d.replace(sub_path, symlinks[sub_path], 1), symlinks, depth=depth+1 - ) - if not d.startswith("/"): - d = "/" + d - - if d in symlinks: - # We resolved a symlink to another symlink, need to recurse - # XXX: What if our resolved path contains a symlink earlier in the path TODO - if depth > 10 or d == symlinks[d]: - logger.warning(f"Symlink loop detected for {d}") - return d - else: - # Recurse - return GenerateMissingDirs._resolve_path(symlinks[d], symlinks, depth=depth+1) - - return d - - def generate(self, patches: dict) -> dict: - # XXX: Do we want to operate on archives to ensure symlinks behave as expected? - symlinks = TarHelper.get_symlink_members(self.archive_path) - result = defaultdict(dict) - - for d in self.TARGET_DIRECTORIES: - # It's not already in there, add it as a world-readable directory - # Handle symlinks. If we have a directory like /tmp/var and /tmp is a symlink to /asdf, we want to make /asdf/var - - resolved_path = self._resolve_path(d, symlinks) - # Try handling ../s by resolving the path - if ".." in resolved_path.split("/"): - resolved_path = os.path.normpath(resolved_path) - - if ".." in resolved_path.split("/"): - logger.debug("Skipping directory with .. in path: " + resolved_path) - continue - - while resolved_path.endswith("/"): - resolved_path = resolved_path[:-1] - - # Check if this directory looks like / - it might be ./ or something else - if resolved_path == ".": - continue - - # Guestfs gets mad if there's a /. in the path - if resolved_path.endswith("/."): - resolved_path = resolved_path[:-2] - - # Look at each parent directory, is it a symlink? - for i in range(1, len(resolved_path.split("/"))): - parent = "/".join(resolved_path.split("/")[:i]) - if parent in symlinks: - logger.debug( - f"Skipping {resolved_path} because parent {parent} is a symlink" - ) - continue - - # Clean up the path - while "/./" in resolved_path: - resolved_path = resolved_path.replace("/./", "/") - while "//" in resolved_path: - resolved_path = resolved_path.replace("//", "/") - while resolved_path.endswith("/"): - resolved_path = resolved_path[:-1] - - # If this path is in the archive OR any existing patches, skip - # Note we're ignoring the enabled flag of patches - if resolved_path in self.archive_files: - continue - if any([resolved_path in p[0].get('static_files', {}).keys() for p in patches.values()]): - continue - - # Add path and parents (as necessary) - path_parts = resolved_path.split("/") - # If any parts are just .// - for i in range(1, len(path_parts) + 1): - subdir = "/".join(path_parts[:i]) - if subdir not in self.archive_files: - result['static_files'][subdir] = { - "type": "dir", - "mode": 0o755, - } - return result - - -class GenerateReferencedDirs(PatchGenerator): - ''' - FirmAE "Boot mitigation": find path strings in binaries, make their directories - if they don't already exist. - ''' - - def __init__(self, extract_dir): - self.patch_name = "static.binary_paths" - self.enabled = True - self.extract_dir = extract_dir - - def generate(self, patches: dict) -> dict: - result = defaultdict(dict) - for f in FileHelper.find_executables( - self.extract_dir, {"/bin", "/sbin", "/usr/bin", "/usr/sbin"} - ): - # For things that look like binaries, find unique strings that look like paths - for dest in list( - set(FileHelper.find_strings_in_file(f, "^(/var|/etc|/tmp)(.+)([^\\/]+)$")) - ): - if any([x in dest for x in ["%s", "%c", "%d", "/tmp/services"]]): - # Ignore these paths, printf format strings aren't real directories to create - # Not sure what /tmp/services is or where we got that from? - continue - result["static_files"][dest] = { - "type": "dir", - "mode": 0o755, - } - return result - - -class GenerateShellMounts(PatchGenerator): - """ - Ensure we have /mnt/* directories referenced by shell scripts. - """ - - def __init__(self, extract_dir, existing): - self.patch_name = "static.shell_script_mounts" - self.extract_dir = extract_dir - self.enabled = True - self.existing = {member.name[1:] for member in existing} - - def generate(self, patches: dict) -> dict: - result = defaultdict(dict) - - for f in FileHelper.find_shell_scripts(self.extract_dir): - for dest in list( - set(FileHelper.find_strings_in_file(f, "^/mnt/[a-zA-Z0-9._/]+$")) - ): - if not dest.endswith("/"): - dest = os.path.dirname(dest) - # We're making the directory in which the file we saw referenced - # will be - - # Does this file exist in the filesystem or in any existing patches? - if dest in self.existing: - continue - if any([dest in p[0].get('static_files', {}).keys() for p in patches.values()]): - continue - - # Try resolving the dest (to handle symlinks more correctly than the existing check) - if FileHelper.exists(self.extract_dir, dest): - # Directory already exists - don't clobber! - continue - - result['static_files'][dest] = { - "type": "dir", - "mode": 0o755, - } - return result - - -class GenerateMissingFiles(PatchGenerator): - ''' - Ensure we have /bin/sh, /etc/TZ, /var/run/nvramd.pid, and localhost in /etc/hosts. - ''' - def __init__(self, extract_dir: str) -> None: - self.patch_name = "static.missing_files" - self.enabled = True - self.extract_dir = extract_dir - - def generate(self, patches: dict) -> dict: - # Firmadyne/FirmAE mitigation, ensure these 3 files always exist - # Note including /bin/sh here means we'll add it if it's missing and as a symlink to /igloo/utils/busybox - # this is similar to how we can shim an (existing) /bin/sh to point to /igloo/utils/busybox but here we - # only add it if it's missing - result = defaultdict(dict) - - model = { - # Ensure /bin/sh exists if not already present - "/bin/sh": { - "type": "symlink", - "target": "/igloo/utils/busybox" - }, - - # Set timezone to EST - "/etc/TZ": { - "type": "inline_file", - "contents": "EST5EDT", - "mode": 0o755, - }, - - # Needed for Ralink and D-Link - # See https://github.com/firmadyne/libnvram/blob/e33692277d475d61a03e0772efeef5c829872f34/nvram.c#L189 - "/var/run/nvramd.pid": { - "type": "inline_file", - "contents": "", - "mode": 0o644, - }, - } - - for fname, data in model.items(): - if not os.path.isfile(os.path.join(self.extract_dir, fname[1:])): - result['static_files'][fname] = data - - # Ensure we have an entry for localhost in /etc/hosts. So long as we have an /etc/ directory - hosts = "" - if os.path.isfile(os.path.join(self.extract_dir, "etc/hosts")): - with open(os.path.join(self.extract_dir, "etc/hosts"), "r") as f: - hosts = f.read() - - # if '127.0.0.1 localhost' not in hosts: - # Regex with whitespace and newlines - if not re.search(r"^127\.0\.0\.1\s+localhost\s*$", hosts, re.MULTILINE): - if len(hosts) and not hosts.endswith("\n"): - hosts += "\n" - hosts += "127.0.0.1 localhost\n" - - result["static_files"]["/etc/hosts"] = { - "type": "inline_file", - "contents": hosts, - "mode": 0o755, - } - return result - - -class DeleteFiles(PatchGenerator): - ''' - Delete some files we don't want. - ''' - def __init__(self, extract_dir: str) -> None: - self.patch_name = "static.delete_files" - self.enabled = True - self.extract_dir = extract_dir - - def generate(self, patches: dict) -> dict: - result = defaultdict(dict) - # Delete some files that we don't want. securetty is general, limits shell access. - # 'sys_resetbutton' is some FW-specific hack from FirmAE - - # TODO: does securetty matter if our root shell is disabled? - for f in ["/etc/securetty", "/etc/scripts/sys_resetbutton"]: - if os.path.isfile(os.path.join(self.extract_dir, f[1:])): - result["static_files"][f] = { - "type": "delete", - } - return result - - -class LinksysHack(PatchGenerator): - ''' - Linksys specific hack from FirmAE with pseudofile model. - ''' - def __init__(self, extract_dir: str) -> None: - self.patch_name = "pseudofiles.linksys" - self.enabled = True - self.extract_dir = extract_dir - - def generate(self, patches: dict) -> dict: - result = defaultdict(dict) - # TODO: The following changes from FirmAE should likely be disabled by default - # as we can't consider this information as part of our search if it's in the initial config - # Linksys specific hack from firmae - if all( - os.path.isfile(os.path.join(self.extract_dir, x[1:])) - for x in ["/bin/gpio", "/usr/lib/libcm.so", "/usr/lib/libshared.so"] - ): - result["pseudofiles"]["/dev/gpio/in"] = { - "read": { - "model": "return_const", - "val": 0xFFFFFFFF, - } - } - - return result - - -class KernelModules(PatchGenerator): - """ - Create a symlink from the guest kernel module path to our kernel's module path. - (ie.., /lib/modules/1.2.0-custom -> /lib/modules/4.10.0) - """ - def __init__(self, extract_dir: str, kernel_version: dict) -> None: - self.patch_name = "static.kernel_modules" - self.enabled = True - self.extract_dir = extract_dir - self.kernel_version = kernel_version - - @staticmethod - def is_kernel_version(name: str) -> bool: - # Regex to match typical kernel version patterns - return re.match(r"^\d+\.\d+\.\d+(-[\w\.]+)?$", name) is not None - - # Always use a.b.c format for the symlink target - @staticmethod - def pad_kernel_version(ver: str) -> str: - base = ver.split("-", 1)[0] - tokens = base.split(".") - while len(tokens) < 3: - tokens.append("0") - return ".".join(tokens) - - def generate(self, patches: dict) -> dict: - result = defaultdict(dict) - - # Identify original kernel version and create a symlink to /lib/modules - kernel_version = None - potential_kernels = set() - - # Only look at the top-level directories in self.extract_dir / lib / modules - modules_path = os.path.join(self.extract_dir, "lib/modules") - if os.path.exists(modules_path): - for d in os.listdir(modules_path): - d_path = os.path.join(modules_path, d) - if os.path.isdir(d_path): - potential_kernels.add(d) - - # Filter potential kernels to match the expected version pattern - potential_kernels = {d for d in potential_kernels if self.is_kernel_version(d)} - - # Determine the kernel version to use - if len(potential_kernels) == 1: - kernel_version = potential_kernels.pop() - elif len(potential_kernels) > 1: - # Prioritize the version names that match more complex patterns with dashes - for potential_name in potential_kernels: - if "." in potential_name and "-" in potential_name: - kernel_version = potential_name - break - if not kernel_version: - # Fallback to a simpler version matching pattern - for potential_name in potential_kernels: - if "." in potential_name: - kernel_version = potential_name - break - - # Fallback to picking the first one (could improve this further) - if not kernel_version: - logger.warning( - "Multiple kernel versions look valid (TODO improve selection logic, grabbing first)" - ) - logger.warning(potential_kernels) - kernel_version = potential_kernels.pop() - - if kernel_version: - # We have a kernel version, add it to our config - padded_selected = self.pad_kernel_version(self.kernel_version["selected_kernel"]) - padded_target = self.pad_kernel_version(kernel_version) - result["static_files"][f"/lib/modules/{padded_selected}"] = { - "type": "symlink", - "target": f"/lib/modules/{padded_target}", - } - - return result - - -class ShimBinaries: - ''' - Identify binaries in the guest FS that we want to shim - and add symlinks to go from guest bin -> igloo bin - into our config. - ''' - - def __init__(self, files): - self.files = files - - def make_shims(self, shim_targets: dict[str, str]) -> dict: - result = defaultdict(dict) - for fname in self.files: - path = fname.path.lstrip('.') # Trim leading . - basename = os.path.basename(path) - - if path.startswith("/igloo/utils/"): - raise ValueError( - "Unexpected /igloo/utils present in input filesystem archive" - ) - - # It's a guest file/symlink. If it's one of our targets and executable, we want to shim! - if not (fname.isfile() or fname.issym()) or not fname.mode & ( - stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH - ): - # Skip if it's not a file or non-executable - continue - - # Is the current file one we want to shim? - if basename in shim_targets: - logger.debug(f"making shim for {basename}, full path: {path}, fname.path: {fname.path}") - result["static_files"][path] = { - "type": "shim", - "target": f"/igloo/utils/{shim_targets[basename]}", - } - return result - - -class ShimStopBins(ShimBinaries, PatchGenerator): - def __init__(self, files: list) -> None: - super().__init__(files) - self.patch_name = "static.shims.stop_bins" - self.enabled = True - - def generate(self, patches: dict) -> dict: - return self.make_shims({ - "reboot": "exit0.sh", - "halt": "exit0.sh", - }) - - -class ShimNoModules(ShimBinaries, PatchGenerator): - def __init__(self, files: list) -> None: - super().__init__(files) - self.patch_name = "static.shims.no_modules" - self.enabled = True - - def generate(self, patches: dict) -> dict: - return self.make_shims({ - "insmod": "exit0.sh" - }) - - -class ShimBusybox(ShimBinaries, PatchGenerator): - def __init__(self, files: list) -> None: - super().__init__(files) - self.patch_name = "static.shims.busybox" - self.enabled = False - - def generate(self, patches: dict) -> dict: - return self.make_shims({ - "ash": "busybox", - "sh": "busybox", - "bash": "bash", - }) - - -class ShimCrypto(ShimBinaries, PatchGenerator): - def __init__(self, files: list) -> None: - super().__init__(files) - self.patch_name = "static.shims.crypto" - self.enabled = False - - def generate(self, patches: dict) -> dict | None: - result = self.make_shims({ - "openssl": "openssl", - "ssh-keygen": "ssh-keygen" - }) - - if not len(result.get("static_files", [])): - # Nothing to shim, don't add the key copy - return - - result["static_files"]["/igloo/keys/*"] = { - "type": "host_file", - "mode": 0o444, - "host_path": os.path.join(*[RESOURCES, "static_keys", "*"]) - } - - return result - - -class ShimFwEnv(ShimBinaries, PatchGenerator): - ''' - Replace fw_printenv/getenv/setenv with hypercall based alternatives - Work in progress. Needs testing - ''' - def __init__(self, files: list) -> None: - raise NotImplementedError("Untested shim type") - super().__init__(files) - self.patch_name = "static.shims.fw_env" - - def generate(self, patches: dict) -> dict: - return self.make_shims({ - "fw_printenv": "fw_printenv", - "fw_getenv": "fw_printenv", - "fw_setenv": "fw_printenv", - }) - - -class NvramLibraryRecovery(PatchGenerator): - ''' - During static analysis the LibrarySymbols class collected - key->value mappings from libraries exporting some common nvram - defaults symbols ("Nvrams", "router_defaults") - add these to our - nvram config if we have any. - - TODO: if we find multiple nvram source files here, we should generate multiple patches. - Then we should consider these during search. For now we just take non-conflicting values - from largest to smallest source files. More realistic might be to try each file individually. - ''' - - def __init__(self, library_info): - self.library_info = library_info - self.patch_name = "nvram.01_library" - self.enabled = True - - def generate(self, patches: dict) -> dict | None: - sources = self.library_info.get("nvram", {}) - if not len(sources): - return - - # Sources is source filename -> key -> value - # First we want to sort sources from most to least keys - sorted_sources = sorted(sources.items(), key=lambda x: len(x[1]), reverse=True) - - nvram_defaults = {} - for source, nvram in sorted_sources: - for key, value in nvram.items(): - if key not in nvram_defaults: - nvram_defaults[key] = value - - if len(nvram_defaults): - return {'nvram': nvram_defaults} - - -class NvramConfigRecovery(PatchGenerator): - """ - Search for files that contain nvram keys and values to populate NVRAM defaults - """ - def __init__(self, extract_dir: str) -> None: - self.extract_dir = extract_dir - self.patch_name = "nvram.02_config_paths" - self.enabled = True - - def generate(self, patches: dict) -> dict | None: - result = NvramHelper.nvram_config_analysis(self.extract_dir, True) - if len(result): - return {'nvram': result} - - -class NvramConfigRecoveryWild(PatchGenerator): - """ - Search for files that contain nvram keys and values to populate NVRAM defaults. - This version relaxes the search to allow for basename matches instead of full path - matches. - """ - def __init__(self, extract_dir: str) -> None: - self.extract_dir = extract_dir - self.patch_name = "nvram.03_config_paths_basename" - self.enabled = True - - def generate(self, patches: dict) -> dict | None: - result = NvramHelper.nvram_config_analysis(self.extract_dir, False) - if len(result): - return {'nvram': result} - - -class NvramDefaults(PatchGenerator): - """ - Add default nvram values from Firmadyne and FirmAE - """ - def __init__(self) -> None: - self.patch_name = "nvram.04_defaults" - self.enabled = True - - def generate(self, patches: dict) -> dict | None: - result = NvramHelper._get_default_nvram_values() - if len(result): - return {'nvram': result} - - -class NvramFirmAEFileSpecific(PatchGenerator): - """ - Apply FW-specific nvram patches based on presence of hardcoded strings in files from FirmAE. - """ - FIRMAE_TARGETS: dict[str, list[tuple[str, str]]] = { - "./sbin/rc": [("ipv6_6to4_lan_ip", "2002:7f00:0001::")], - "./lib/libacos_shared.so": [("time_zone_x", "0")], - "./sbin/acos_service": [("rip_enable", "0")], - "./usr/sbin/httpd": [ - ("rip_multicast", "0"), - ("bs_trustedip_enable", "0"), - ("filter_rule_tbl", ""), - ], - } - - def __init__(self, fs_path: str) -> None: - self.fs_path = fs_path - self.patch_name = "nvram.05_firmae_file_specific" - - def generate(self, patches: dict) -> dict | None: - result = {} - - # For each key in static_targets, check if the query is in the file - # TODO: Should we be operating on an archive to better handle symlinks? - for key, queries in self.FIRMAE_TARGETS.items(): - if not os.path.isfile(os.path.join(self.fs_path, key[1:])): - continue - - try: - with open(os.path.join(self.fs_path, key[1:]), "rb") as f: - for query, _ in queries: - # Check if query is in file - if query.encode() in f.read(): - result[key] = query - except Exception as e: - # Not sure what kind of errors we could encounter here, missing files? perms? - logger.error(f"Failed to read {key} for nvram key check: {e}") - - if len(result): - return {'nvram': result} - - -class PseudofilesTailored(PatchGenerator): - ''' - For all missing pseudofiles we saw referenced during static analysis, - try adding them with a default model - ''' - def __init__(self, pseudofiles: dict) -> None: - self.patch_name = "pseudofiles.dynamic" - self.pseudofiles = pseudofiles - self.enabled = True - - def generate(self, patches: dict) -> dict | None: - results = {} - mtd_count = 0 - - for section, file_names in self.pseudofiles.items(): - for file_name in file_names: - if section == 'dev' and file_name.startswith("/dev/mtd"): - # TODO: do we want to make placeholders for MTD or not? - continue - - if file_name.endswith("/"): - # We don't want to treat a directory as a pseudofile, instead we'll - # add a placehodler into the directory. This ensures the directory is created - # XXX: hyperfs doesn't allow userspace to create files in these directories yet - # https://github.com/rehosting/hyperfs/issues/20 - file_name += ".placeholder" - results[file_name] = { - 'read': { - "model": "zero", - }, - 'write': { - "model": "discard", - } - } - - if section == "dev": - # /dev files get a default IOCTL model - results[file_name]['ioctl'] = { - '*': {"model": "return_const", "val": 0} - } - - if file_name.startswith("/dev/mtd"): - # MTD devices get a name (shows up in /proc/mtd) - # Note 'uboot' probably isn't right, but we need something - results[file_name]['name'] = f"uboot.{mtd_count}" - mtd_count += 1 - - if len(results): - return {'pseudofiles': results} diff --git a/src/penguin/gen_config.py b/src/penguin/gen_config.py index c7a4b9b3f..95202b57d 100644 --- a/src/penguin/gen_config.py +++ b/src/penguin/gen_config.py @@ -24,8 +24,8 @@ from pathlib import Path from penguin import getColoredLogger -from . import config_patchers as CP -from . import static_analyses as STATIC +from .static_plugin import StaticAnalysisPlugin, ConfigPatcherPlugin +from .static_plugin_manager import StaticPluginManager from .defaults import ( default_version as DEFAULT_VERSION, @@ -63,10 +63,18 @@ def __init__(self, fs_archive: str, output_dir: str | Path) -> None: ["find", str(extracted_fs), "-type", "d", "-exec", "chmod", "u+rx", "{}", "+"]) try: + # Initialize StaticPluginManager + # We look for plugins in pyplugins/static_analysis and pyplugins/config_patchers + plugin_dirs = [ + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "pyplugins", "static_analysis"), + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "pyplugins", "config_patchers"), + ] + self.plugin_manager = StaticPluginManager(plugin_dirs) + # First run static analyses and produce info about the filesystem # This informs how we generate configs (e.g., what's the arch, what's the init prog) # and also subsequent analyses after a run (i.e., guiding refinement) - static_results = self.run_static_analyses(output_dir, extracted_fs) + static_results = self.run_static_analyses(output_dir, archive_fs, extracted_fs) # TODO: Is there a better way to manage order of patches? patches = self.create_patches(archive_fs, static_results, extracted_fs) @@ -85,6 +93,7 @@ def __init__(self, fs_archive: str, output_dir: str | Path) -> None: def run_static_analyses( self, output_dir: str | Path, + fs_archive: str | Path, extracted_dir: str | Path, static_dir_name: str = "static" ) -> dict: @@ -93,6 +102,8 @@ def run_static_analyses( :param output_dir: Output directory for results. :type output_dir: str or Path + :param fs_archive: Path to filesystem archive. + :type fs_archive: str or Path :param extracted_dir: Directory containing extracted filesystem. :type extracted_dir: str or Path :param static_dir_name: Name of static results subdirectory. @@ -103,42 +114,34 @@ def run_static_analyses( results_dir = Path(output_dir, static_dir_name) results_dir.mkdir(exist_ok=True, parents=True) - # Collect a list of all files in advance so we don't regenerate - # archive_files = TarHelper.get_all_members(fs_archive) - - # Ordered list of static analyses to run (from static_analyses.py) - # Each has an init method that can return results - # If any raises an exception, it will be fatal to config generation and shown - # to a user - static_analyses = [ - STATIC.ArchId, - STATIC.InitFinder, - STATIC.EnvFinder, - STATIC.PseudofileFinder, - STATIC.InterfaceFinder, - STATIC.ClusterCollector, - STATIC.LibrarySymbols, - STATIC.KernelVersionFinder, - ] - USE_JSON_XZ = [ - STATIC.LibrarySymbols + "LibrarySymbols" ] results = {} - for analysis in static_analyses: - # Call each analysis and store results - this_result = analysis().run(extracted_dir, results) - results[analysis.__name__] = this_result - - # If we have results, store on disk. Always store in results dict, even if empty - if this_result: - if analysis in USE_JSON_XZ: - with lzma.open(results_dir / f"{analysis.__name__}.json.xz", "wt", encoding="utf-8") as f: - json.dump(this_result, f) - else: - with open(results_dir / f"{analysis.__name__}.yaml", "w") as f: - yaml.dump(this_result, f) + ordered_plugins = self.plugin_manager.get_ordered_plugins() + + for plugin_cls in ordered_plugins: + if not issubclass(plugin_cls, StaticAnalysisPlugin): + continue + + logger.info(f"Running static analysis: {plugin_cls.__name__}") + try: + plugin_instance = plugin_cls(str(fs_archive), str(extracted_dir), results) + this_result = plugin_instance.run() + results[plugin_cls.__name__] = this_result + + # If we have results, store on disk. Always store in results dict, even if empty + if this_result: + if plugin_cls.__name__ in USE_JSON_XZ: + with lzma.open(results_dir / f"{plugin_cls.__name__}.json.xz", "wt", encoding="utf-8") as f: + json.dump(this_result, f) + else: + with open(results_dir / f"{plugin_cls.__name__}.yaml", "w") as f: + yaml.dump(this_result, f) + except Exception as e: + logger.error(f"Error running static analysis {plugin_cls.__name__}: {e}") + raise e return results @@ -234,54 +237,26 @@ def create_patches( :rtype: dict """ - # Collect a list of all files in advance so we don't regenerate - archive_files = CP.TarHelper.get_all_members(fs_archive) - - # Instantiate and apply patch generators - # Later patches will override earlier ones - patch_generators = [ - CP.BasePatch(static_results['ArchId'], static_results['InitFinder'], static_results['KernelVersionFinder']), - CP.RootShell(), - CP.DynamicExploration(), - CP.SingleShotFICD(), - CP.ManualInteract(), - CP.NetdevsDefault(), - CP.NetdevsTailored(static_results['InterfaceFinder']), - CP.PseudofilesExpert(), - CP.PseudofilesTailored(static_results['PseudofileFinder']), - CP.LibInjectSymlinks(extract_dir), - CP.LibInjectStringIntrospection(static_results['LibrarySymbols']), - CP.LibInjectTailoredAliases(static_results['LibrarySymbols']), - CP.LibInjectFixedAliases(), - CP.ForceWWW(extract_dir), - CP.GenerateMissingDirs(fs_archive, archive_files), - CP.GenerateReferencedDirs(extract_dir), - CP.GenerateShellMounts(extract_dir, archive_files), - CP.GenerateMissingFiles(extract_dir), - CP.DeleteFiles(extract_dir), - CP.LinksysHack(extract_dir), - CP.KernelModules(extract_dir, static_results['KernelVersionFinder']), - CP.ShimStopBins(archive_files), - CP.ShimNoModules(archive_files), - CP.ShimBusybox(archive_files), - CP.ShimCrypto(archive_files), - # ShimFwEnv(archive_files), - CP.NvramFirmAEFileSpecific(extract_dir), - CP.NvramDefaults(), - CP.NvramConfigRecoveryWild(extract_dir), - CP.NvramConfigRecovery(extract_dir), - CP.NvramLibraryRecovery(static_results['LibrarySymbols']), - ] - # collect patches in patches[patchfile_name] -> {section -> {key -> value}} patches = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) - for generator in patch_generators: - if result := generator.generate(patches): - if len(result): - patches[generator.patch_name] = (result, generator.enabled) - if not generator.enabled: - logger.info(f"{generator.patch_name} patch generated but disabled") + ordered_plugins = self.plugin_manager.get_ordered_plugins() + + for plugin_cls in ordered_plugins: + if not issubclass(plugin_cls, ConfigPatcherPlugin): + continue + + logger.info(f"Running config patcher: {plugin_cls.__name__}") + try: + generator = plugin_cls(str(fs_archive), str(extract_dir), static_results) + if result := generator.generate(patches): + if len(result): + patches[generator.patch_name] = (result, generator.enabled) + if not generator.enabled: + logger.info(f"{generator.patch_name} patch generated but disabled") + except Exception as e: + logger.error(f"Error running config patcher {plugin_cls.__name__}: {e}") + raise e return patches @@ -335,7 +310,14 @@ def initialize_and_build_config( os.umask(0o000) # Generate our config and patches - ConfigBuilder(fs, output_dir) + builder = ConfigBuilder(fs, output_dir) + + # Save the hash of the static plugins to detect changes later + state_hash = builder.plugin_manager.get_state_hash() + # Save .plugin_cache in the same directory as the output config + cache_dir = os.path.dirname(out) if out else str(output_dir) + with open(os.path.join(cache_dir, ".plugin_cache"), "w") as f: + f.write(state_hash) outfile = os.path.join(output_dir, "config.yaml") diff --git a/src/penguin/penguin_run.py b/src/penguin/penguin_run.py index 59d4da677..f8f8554d2 100755 --- a/src/penguin/penguin_run.py +++ b/src/penguin/penguin_run.py @@ -108,6 +108,34 @@ def run_config( # Image isn't in our config, but the path we use is a property # of configs files section - we'll hash it to get a path # Read input config and validate + + # Check if static plugins have changed and we need to regenerate the config + cache_file = os.path.join(proj_dir, ".plugin_cache") + if os.path.isfile(cache_file): + try: + with open(cache_file, "r") as f: + saved_hash = f.read().strip() + + from .static_plugin_manager import StaticPluginManager + plugin_dirs = [ + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "pyplugins", "static_analysis"), + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "pyplugins", "config_patchers"), + ] + current_hash = StaticPluginManager(plugin_dirs).get_state_hash() + + if saved_hash != current_hash: + logger.warning("Static plugins have changed. Auto-regenerating the config...") + from .gen_config import initialize_and_build_config + fs_tar = conf["core"].get("fs", "") + if fs_tar: + full_fs_tar = os.path.join(proj_dir, fs_tar) + # Use a temporary directory for artifacts to avoid FileExistsError + # if proj_dir has conflicting contents, and let initialize_and_build_config + # write out safely to conf_yaml. + initialize_and_build_config(full_fs_tar, out=conf_yaml, artifacts_dir=None) + except Exception as e: + logger.warning(f"Could not check plugin cache: {e}") + if resolved_kernel: logger.info(f"Using pre-resolved kernel: {resolved_kernel}") conf = load_config(proj_dir, conf_yaml, resolved_kernel=resolved_kernel, verbose=True) diff --git a/src/penguin/static_analyses.py b/src/penguin/static_analyses.py deleted file mode 100644 index 7b8676a0c..000000000 --- a/src/penguin/static_analyses.py +++ /dev/null @@ -1,1190 +0,0 @@ -""" -penguin.static_analyses -======================= - -Static analysis utilities for the Penguin emulation environment. - -This module provides classes and helpers for analyzing extracted filesystems. -""" - -import os -import re -import stat -import struct -from subprocess import check_output, CalledProcessError, STDOUT, PIPE, SubprocessError - -from abc import ABC -from elftools.common.exceptions import ELFError, ELFParseError -from elftools.elf.elffile import ELFFile -from elftools.elf.sections import SymbolTableSection -from collections import Counter -from pathlib import Path -from penguin import getColoredLogger -from penguin.utils import get_available_kernel_versions -from penguin.defaults import DEFAULT_KERNEL -import tempfile -import subprocess - -from .arch import arch_filter, arch_end -logger = getColoredLogger("penguin.static_analyses") - - -class FileSystemHelper: - @staticmethod - def find_regex( - target_regex: re.Pattern, - extract_root: str, - ignore: list | tuple | None = None - ) -> dict: - """ - Search the filesystem for matches to a regex pattern using ripgrep. - - :param target_regex: Compiled regex pattern to match. - :param extract_root: Root directory to search. - :param ignore: Optional list/tuple of matches to ignore. - :return: Dict of {match: {"count": int, "files": [str]}} - """ - results = {} - if not ignore: - ignore = tuple() - elif isinstance(ignore, list): - ignore = tuple(ignore) - - pattern_str = target_regex.pattern - extract_path_str = str(extract_root) - - try: - # Get list of files containing matches - file_list_output = check_output( - f"rg --files-with-matches -a '{pattern_str}' '{extract_path_str}'", - stderr=PIPE, - shell=True, - ) - - # Process each file with Python's regex to extract actual matches - if file_list_output: - for filepath in file_list_output.decode().splitlines(): - if not os.path.isfile(filepath) or os.path.islink(filepath): - continue - - # open the file and read the content - try: - with open(filepath, "r", encoding="utf-8", errors="replace") as f: - content = f.read() - except Exception as e: - logger.warning(f"failed to read file {filepath}: {e}") - continue - # apply regex pattern to find matches - matches = target_regex.findall(content) - for match in matches: - if match in ignore: - continue - if match not in results: - results[match] = {"count": 0, "files": set()} - results[match]["count"] += 1 - results[match]["files"].add(filepath) - except (SubprocessError, FileNotFoundError) as e: - if e.returncode == 1: - return {} - else: - logger.warning(f"Failed to run ripgrep: {e} - falling back to pure Python regex") - return FileSystemHelper._find_regex_python(target_regex, extract_root, ignore) - - return results - - @staticmethod - def _find_regex_python( - target_regex: re.Pattern, - extract_root: str, - ignore: list | None = None - ) -> dict: - """ - Fallback implementation using Python's built-in regex. - - :param target_regex: Compiled regex pattern to match. - :param extract_root: Root directory to search. - :param ignore: Optional list of matches to ignore. - :return: Dict of {match: {"count": int, "files": [str]}} - """ - results = {} - if not ignore: - ignore = [] - - # iterate through each file in the extracted root directory - for root, dirs, files in os.walk(extract_root): - for filename in files: - filepath = os.path.join(root, filename) - - # skip our files in the "./igloo" path - if filepath.startswith(os.path.join(extract_root, "igloo")): - continue - - # skip non-regular files if `only_files` is true - if not os.path.isfile(filepath) or os.path.islink(filepath): - continue - - # open the file and read the content - try: - with open(filepath, "r", encoding="utf-8", errors="replace") as f: - content = f.read() - except Exception as e: - logger.warning(f"failed to read file {filepath}: {e}") - continue - - # apply regex pattern to find matches - matches = target_regex.findall(content) - for match in matches: - if match in ignore: - continue - if match not in results: - results[match] = {"count": 0, "files": set()} - results[match]["count"] += 1 - results[match]["files"].add(filepath) - - return results - - -class StaticAnalysis(ABC): - """ - Abstract base class for static analyses. - """ - def __init__(self) -> None: - """ - Initialize the static analysis. - """ - pass - - def run(self, extract_dir: str, prior_results: dict) -> None: - """ - Run the static analysis. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - """ - pass - - -class ArchId(StaticAnalysis): - """ - Identify the most common architecture in the extracted filesystem. - """ - def run(self, extracted_fs: str, prior_results: dict) -> str: - ''' - Count architectures to identify most common. - - If both 32 and 64 bit binaries from the most common architecture are present, - prefer 64-bit. Raise an error if architecture cannot be determined or is unsupported. - - :param extracted_fs: Path to extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Most common architecture string. - :raises ValueError: If unable to determine architecture. - ''' - - arch_counts = {32: Counter(), 64: Counter(), "unknown": 0} - for root, _, files in os.walk(extracted_fs): - for file_name in files: - path = os.path.join(root, file_name) - - if ( - os.path.isfile(path) - and not os.path.islink(path) - and self._binary_filter(extracted_fs, path) - ): - logger.debug(f"Checking architecture in {path}") - with open(path, "rb") as f: - if f.read(4) != b"\x7fELF": - continue - f.seek(0) - try: - ef = ELFFile(f) - except ELFError as e: - logger.warning(f"Failed to parse ELF file {path}: {e}. Ignoring") - continue - info = arch_filter(ef) - if info.bits is None or info.arch is None: - arch_counts["unknown"] += 1 - else: - arch_counts[info.bits][info.arch] += 1 - - # If there is at least one intel and non-intel arch, - # filter out all the intel ones. - # Some firmwares include x86_64 binaries left-over from the build process that aren't run in the guest. - intel_archs = ("intel", "intel64") - archs_list = list(arch_counts[32].keys()) + list(arch_counts[64].keys()) - if any(arch in intel_archs for arch in archs_list) and any( - arch not in intel_archs for arch in archs_list - ): - del arch_counts[32]["intel"] - del arch_counts[64]["intel64"] - - # Now select the most common architecture. - # First try the most common 64-bit architecture. - # Then try the most common 32-bit one. - best_64 = arch_counts[64].most_common(1) - best_32 = arch_counts[32].most_common(1) - if len(best_64) != 0: - best = best_64[0][0] - best_count = best_64[0][1] - elif len(best_32) != 0: - best = best_32[0][0] - best_count = best_32[0][1] - else: - raise ValueError("Failed to determine architecture of filesystem") - - # If unknown is the most common, we'll raise an error - if arch_counts["unknown"] > best_count: - # Dump debug info - which arches have what counts? - for arch, count in arch_counts[32].items(): - logger.info(f"32-bit arch {arch} has {count} files") - - for arch, count in arch_counts[64].items(): - logger.info(f"64-bit arch {arch} has {count} files") - - # Finally, report unknown count - logger.info(f"Unknown architecture count: {arch_counts['unknown']}") - raise ValueError("Failed to determine architecture of filesystem") - - logger.debug(f"Identified architecture: {best}") - return best - - @staticmethod - def _binary_filter(fsbase: str, name: str) -> bool: - """ - Filter for binary files of interest. - - :param fsbase: Base directory. - :param name: File path. - :return: True if file is a relevant binary. - """ - base_directories = ["sbin", "bin", "usr/sbin", "usr/bin"] - for base in base_directories: - if name.startswith(os.path.join(fsbase, base)): - return True - # Shared libraries, kernel modules, or busybox - return name.endswith((".so", ".ko")) or \ - ".so." in name or \ - name.endswith("busybox") - - -class InitFinder(StaticAnalysis): - ''' - Find potential init scripts and binaries in an extracted filesystem. - ''' - def run(self, filesystem_root_path: str, prior_results: dict) -> list[str]: - ''' - Search the filesystem for binaries that might be init scripts. - - :param filesystem_root_path: Root path of extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Sorted list of init script paths. - ''' - inits = [] - - # Walk through the filesystem root and find potential init scripts. - for root, dirs, files in os.walk(filesystem_root_path): - for filename in files: - filepath = os.path.join(root, filename) - if self._is_init_script(filepath, filesystem_root_path): - inits.append("/" + os.path.relpath(filepath, filesystem_root_path)) - - # Sort inits by length, shortest to longest. - inits.sort(key=lambda x: len(x)) - - # Deprecated: kernel_inits. Filesystem extraction could try analyzing kernel binary - # to find init argument built into the kernel. We do not currently do this or have a - # way to pass this information through - ''' - # Examine `init.txt` in the output directory, if it exists. - kernel_inits = [] - try: - with open(os.path.join(output_dir, "init.txt"), "r") as f: - kernel_inits = [x.strip() for x in f.readlines()] - os.remove(os.path.join(output_dir, "init.txt")) - except FileNotFoundError: - # No `init.txt`, it's okay. - pass - - if kernel_inits: - # Combine `kernel_inits` with `inits`, prioritizing `kernel_inits`. - common_inits = [x for x in kernel_inits if x in inits] - only_fs_inits = [x for x in inits if x not in common_inits] - common_inits.sort(key=lambda x: len(x)) - only_fs_inits.sort(key=lambda x: len(x)) - inits = common_inits + only_fs_inits - ''' - - # Now rank our init options, using the same ranking as Firmadyne/Firmae where - # a few specific inits are prioritized, then fallback to others - - target_inits = ["preinit", "init", "rcS"] - # If any of these are in our init list, move them to the front - # but maintain this order (i.e., preinit goes before /init so loop backwards) - for potential in target_inits[::-1]: - try: - idx = [x.split("/")[-1] for x in inits].index(potential) - except ValueError: - # No match - continue - # Move to front - match = inits.pop(idx) - inits.insert(0, match) - - # Remove entries longer than 32 characters. - inits = [i for i in inits if len(i) <= 32] - - # Final pass to ensure all inits are executable. - # Trim the first / in the path to ensure it's relative to our extract dir - inits = [ - i for i in inits - if os.stat(os.path.join(filesystem_root_path, i[:1])).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - ] - - return inits - - @staticmethod - def _is_init_script(filepath: str, fsroot: str) -> bool: - ''' - Determine if a file is a potential init script. - - :param filepath: Path to file. - :param fsroot: Filesystem root. - :return: True if file is a potential init script. - ''' - if filepath.startswith("./igloo"): - return False - - if not os.path.isfile(filepath) and not os.path.islink(filepath): - return False - - name = os.path.basename(filepath) - if any([x in name for x in ["init", "start"]]) and not any( - [x in name for x in ["inittab", "telinit", "initd"]] - ): - # If 'start' is in the name, ensure it's not part of "restart" or "startup". - if "start" in name and not re.search(r"[\W_\-\.]start[\W_\-\.]", name): - return False - - # Handle symlinks: make sure the link target exists. - if os.path.islink(filepath): - link_target = os.readlink(filepath) - if os.path.isabs(link_target): - result = os.path.join(fsroot, "./"+link_target) - else: - result = os.path.join(os.path.dirname(filepath), link_target) - if not os.path.exists(result): - logger.warning( - f"Potential init '{filepath}' is a symlink to '{link_target}' which does not exist in the filesystem" - ) - return False - - # If 'init' is in the name, ensure it's not named `.init`. - if "init" in name and name.endswith(".init"): - return False - - # Check if the file is executable. - if os.path.isfile(filepath) and os.stat(filepath).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): - return True - - elif "rcS" in name: - if os.path.isfile(filepath) and os.stat(filepath).st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): - return True - - return False - - -class KernelVersionFinder(StaticAnalysis): - """ - Find and select the best kernel version from extracted filesystem. - """ - @staticmethod - def is_kernel_version(name: str) -> bool: - """ - Check if a string matches a kernel version pattern. - - :param name: Version string. - :return: True if matches kernel version pattern. - """ - return re.match(r"^\d+\.\d+\.\d+(-[\w\.]+)?$", name) is not None - - @staticmethod - def select_best_kernel(kernel_versions: set[str]) -> str: - """ - Select the most recent kernel version and match to available kernels. - - :param kernel_versions: Iterable of kernel version strings. - :return: Best matching kernel version string. - """ - if not kernel_versions: - return DEFAULT_KERNEL - - # Parse kernel versions into tuples for comparison - def parse_version(ver): - base = ver.split("-", 1)[0] - return tuple(int(t) for t in base.split(".") if t.isdigit()) - - # Sort kernel_versions by parsed version, descending - sorted_versions = sorted(kernel_versions, key=parse_version, reverse=True) - most_recent = sorted_versions[0] - - # Now use the logic from the previous select_best_kernel - base_version = most_recent.split("-", 1)[0] - guest_tokens = base_version.split(".") - guest_version = tuple(int(t) for t in guest_tokens if t.isdigit()) - guest_major = guest_version[0] if guest_version else None - - available_versions = get_available_kernel_versions() - - major_matches = [v for v in available_versions if v[0] == guest_major] - - def version_distance(v): - maxlen = max(len(v), len(guest_version)) - v_pad = v + (0,) * (maxlen - len(v)) - g_pad = guest_version + (0,) * (maxlen - len(guest_version)) - return sum(abs(a - b) for a, b in zip(v_pad, g_pad)) - - if major_matches: - best = min(major_matches, key=version_distance) - else: - best = min(available_versions, key=version_distance) - - best_str = ".".join(str(x) for x in best) - return best_str - - def run(self, extract_dir: str, prior_results: dict) -> dict[str, list[str] | str]: - """ - Run kernel version analysis. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict with potential and selected kernel versions. - """ - potential_kernels = set() - - # Only look at the top-level directories in self.extract_dir / lib / modules - modules_path = os.path.join(extract_dir, "lib/modules") - if os.path.exists(modules_path): - for d in os.listdir(modules_path): - d_path = os.path.join(modules_path, d) - if os.path.isdir(d_path): - potential_kernels.add(d) - - # Filter potential kernels to match the expected version pattern - potential_kernels = {d for d in potential_kernels if self.is_kernel_version(d)} - selected_kernel = self.select_best_kernel(potential_kernels) - return { - "potential_kernels": sorted(potential_kernels), - "selected_kernel": selected_kernel, - } - - -class EnvFinder(StaticAnalysis): - """ - Identify potential environment variables and their values in the filesystem. - """ - BORING_VARS: list[str] = ["TERM"] - - def run(self, extract_dir: str, prior_results: dict) -> dict[str, list | None]: - """ - Find environment variables and their possible values. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict of environment variable names to possible values. - """ - - # To start, we know there's `igloo_task_size` (a knob we created to configure), and - # igloo_init (another knob we created) to specify the init program. We'll find - # values for both - # Three magic values for igloo_task_size - task_options = [0xBF000000, 0x7F000000, 0x3F000000] - - potential_env = { - "igloo_task_size": task_options, - "igloo_init": prior_results['InitFinder'] - } - - # Now search the filesystem for shell scripts accessing /proc/cmdline - pattern = re.compile(r"\/proc\/cmdline.*?([A-Za-z0-9_]+)=", re.MULTILINE) - potential_keys = FileSystemHelper.find_regex(pattern, extract_dir, ignore=self.BORING_VARS).keys() - - # For each key, try pulling out potential values from the filesystem - for k in potential_keys: - known_vals = None - pattern = re.compile(k + r"=([A-Za-z0-9_]+)", re.MULTILINE) - potential_vals = FileSystemHelper.find_regex(pattern, extract_dir, - ignore=self.BORING_VARS).keys() - - if len(potential_vals): - known_vals = list(potential_vals) - - potential_env[k] = known_vals - - return potential_env - - -class PseudofileFinder(StaticAnalysis): - """ - Find device and proc pseudofiles in the extracted filesystem. - """ - IGLOO_ADDED_DEVICES: list[str] = [ - "autofs", "btrfs-control", "cfs0", "cfs1", "cfs2", "cfs3", - "cfs4", "console", "cpu_dma_latency", "full", "fuse", "input", "kmsg", - "loop-control", "loop0", "loop1", "loop2", "loop3", "loop4", - "loop5", "loop6", "loop7", "mem", "memory_bandwidth", "mice", "net", - "network_latency", "network_throughput", "null", "port", "ppp", - "psaux", "ptmx", "pts", "ptyp0", "ptyp1", "ptyp2", "ptyp3", "ptyp4", - "ptyp5", "ptyp6", "ptyp7", "ptyp8", "ptyp9", "ptypa", "ptypb", - "ptypc", "ptypd", "ptype", "ptypf", "ram", "ram0", "ram1", "ram10", - "ram11", "ram12", "ram13", "ram14", "ram15", "ram2", "ram3", - "ram4", "ram5", "ram6", "ram7", "ram8", "ram9", "random", "root", - "tty", "tty0", "tty1", "tty10", "tty11", "tty12", "tty13", - "tty14", "tty15", "tty16", "tty17", "tty18", "tty19", "tty2", - "tty20", "tty21", "tty22", "tty23", "tty24", "tty25", "tty26", - "tty27", "tty28", "tty29", "tty3", "tty30", "tty31", "tty32", - "tty33", "tty34", "tty35", "tty36", "tty37", "tty38", "tty39", - "tty4", "tty40", "tty41", "tty42", "tty43", "tty44", "tty45", - "tty46", "tty47", "tty48", "tty49", "tty5", "tty50", "tty51", - "tty52", "tty53", "tty54", "tty55", "tty56", "tty57", "tty58", - "tty59", "tty6", "tty60", "tty61", "tty62", "tty63", "tty7", - "tty8", "tty9", - "ttyS0", "ttyS1", "ttyS2", "ttyS3", - "ttyp0", - "ttyp1", "ttyp2", "ttyp3", "ttyp4", "ttyp5", "ttyp6", "ttyp7", - "ttyp8", "ttyp9", "ttypa", "ttypb", "ttypc", "ttypd", "ttype", - "ttypf", "tun", "urandom", "vcs", "vcs1", "vcsa", "vcsa1", "vda", - "vga_arbiter", "vsock", "zero", - "root", "pts", # Added in init - "ttyAMA0", "ttyAMA1", # ARM - "stdin", "stdout", "stderr", # Symlinks to /proc/self/fd/X - ] - - IGLOO_PROCFS: list[str] = [ - "buddyinfo", - "cgroups", - "cmdline", - "config.gz", - "consoles", - "cpuinfo", - "crypto", - "devices", - "diskstats", - "execdomains", - "fb", - "filesystems", - "interrupts", - "iomem", - "ioports", - "kallsyms", - "key-users", - "keys", - "kmsg", - "kpagecount", - "kpageflags", - "loadavg", - "locks", - "meminfo", - "misc", - "modules", - "mounts", - "mtd", # We might shadow this later intentionally, but not by default - "net", - "pagetypeinfo", - "partitions", - "penguin_net", # This is custom and unique but we shouldn't ever shadow it - "sched_debug", - "slabinfo", - "softirqs", - "stat", - "swaps", - "sysrq-trigger", - "thread-self", - "timer_list", - "uptime", - "version", - "vmallocinfo", - "vmstat", - "zoneinfo", - - # Directories - "bus", - "bus/pci", - "bus/pci/00", - "bus/pci/00/00.0", - "bus/pci/00/0a.0", - "bus/pci/00/0a.1 ", - "bus/pci/00/0a.2 ", - "bus/pci/00/0a.3 ", - "bus/pci/00/0b.0 ", - "bus/pci/00/12.0 ", - "bus/pci/00/13.0 ", - "bus/pci/00/14.0 ", - "bus/pci/devices ", - "bus/input", - "bus/input/devices", - "bus/input/handlers", - - "cpu", - "cpu/alignment", - - "driver", - "driver/rtc", - - "fs", - "fs/afs", - "fs/afs/cells", - "fs/afs/rootcell", - "fs/ext4", - "fs/f2fs", - "fs/jbd2", - "fs/nfsd", - "fs/lockd", - "fs/lockd/nlm_end_grace", - "fs/nfsfs", - "fs/nfsfs/servers", - "fs/nfsfs/volumes", - - # Sys is special, loaded dynamically - - - # sysvipc, driver (empty), scsi, tty, sys (big), irq (numbers), bus, fs - "sysvipc/shm", - "sysvipc/sem", - "sysvipc/msg", - - "scsi/device_info", - "scsi/scsi", - - "tty/drivers", - "tty/ldisc", - "tty/driver", - "tty/driver/serial", - "tty/ldisc", - ] - - # Directories that we want to just ignore entirely - don't create any entries - # within these directories. IRQs and device-tree are related to the emulated CPU - # self and PID are related to the process itself and dynamically created - PROC_IGNORE: list[str] = ["irq", "self", "PID", "device-tree", "net", "vmcore"] - - def __init__(self) -> None: - """ - Initialize PseudofileFinder and load additional procfs entries. - """ - # Load ../resources/proc_sys.txt, add each line to IGLOO_PROCFS - resources = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources") - with open(os.path.join(resources, "proc_sys.txt"), "r") as f: - for line in f.readlines(): - self.IGLOO_PROCFS.append(line.strip()) - - def _filter_files( - self, - extract_dir: str, - pattern: re.Pattern, - ignore_list: list[str], - remove_list: list[str] - ) -> list[str]: - """ - Filter files in a directory based on regex, ignore, and remove lists. - - :param extract_dir: Directory to search. - :param pattern: Regex pattern to match. - :param ignore_list: List of prefixes to ignore. - :param remove_list: List of absolute matches to remove. - :return: Filtered list of file paths. - """ - # Find all files matching the pattern - found_files = list(FileSystemHelper.find_regex(pattern, extract_dir).keys()) - - # Apply ignore filters: these are paths we'll ignore entirely - # filtered_files = [ - # f for f in found_files if not any(f == ignored or f.startswith(ignored +"/") for ignored in ignore_list) - # ] - filtered_files = [] - for x in found_files: - for f in ignore_list: - if x == f or x.startswith(f + "/"): - # print(f"Ignoring {x}") - break - else: - filtered_files.append(x) - - # Remove items from remove_list (like IGLOO_ADDED_DEVICES or IGLOO_PROCFS) - # filtered_files = [f for f in filtered_files if \ - # f not in remove_list] - for f in remove_list: - if f in filtered_files: - # print(f"Removing {f}") - filtered_files.remove(f) - - # Remove directories that have subpaths - directories_to_remove = { - "/".join(k.split("/")[:i + 1]) # get parent directories - for k in filtered_files - for i in range(len(k.split("/")[:-1])) # only consider parent parts - } - - return [k for k in filtered_files if k not in directories_to_remove] - - def run(self, extract_dir: str, prior_results: dict) -> dict[str, list[str]]: - """ - Run pseudofile analysis. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict with lists of device and proc files. - """ - # Regex patterns for dev and proc files - dev_pattern = re.compile(r"/dev/([a-zA-Z0-9_/]+)", re.MULTILINE) - proc_pattern = re.compile(r"/proc/([a-zA-Z0-9_/]+)", re.MULTILINE) - - # Filter device files - dev_files = self._filter_files( - extract_dir, dev_pattern, [], self.IGLOO_ADDED_DEVICES - ) - - # Filter proc files, applying PROC_IGNORE and IGLOO_PROCFS - proc_files = self._filter_files( - extract_dir, proc_pattern, self.PROC_IGNORE, self.IGLOO_PROCFS - ) - - # Return dev and proc files in the appropriate format - return { - "dev": [f"/dev/{x}" for x in dev_files], - "proc": [f"/proc/{x}" for x in proc_files], - } - - @staticmethod - def _get_devfiles_in_fs(extracted_dir: str) -> list[str]: - """ - Get all device files in extracted_dir/dev. - - :param extracted_dir: Directory containing extracted filesystem. - :return: List of device file paths. - """ - dev_dir = os.path.join(extracted_dir, "dev") - results = [] - - if os.path.exists(dev_dir): - for root, _, files in os.walk(dev_dir): - for f in files: - relative_path = os.path.join("/dev", os.path.relpath(os.path.join(root, f), dev_dir)) - results.append(relative_path) - - return results - - -class InterfaceFinder(StaticAnalysis): - """ - Identify network interfaces in the filesystem. - """ - def run(self, extract_dir: str, prior_results: dict) -> dict[str, list[str]] | None: - """ - Find network interfaces using sysfs and command references. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict of interfaces found via sysfs and commands. - """ - # Find all network interfaces in the filesystem - pattern = re.compile(r"/sys/class/net/([a-zA-Z0-9_]+)", re.MULTILINE) - sys_net_ifaces = FileSystemHelper.find_regex(pattern, extract_dir).keys() - - # Filter out the default network interfaces - sys_net_ifaces = [i for i in sys_net_ifaces if not i.startswith("veth") and not i.startswith("br") - and not i == "lo"] - - # Now search for references to standard network commands: ifconfig, ip, brctl - # We'll use these to identify interfaces - interfaces = set() - - # Look for patterns that match network interface names in the context of commands - interface_regex = r"([a-zA-Z0-9][a-zA-Z0-9_-]{2,15})" - - ifconfig_matches = re.compile(rf"ifconfig\s+{interface_regex}") - ip_link_matches = re.compile(rf"ip\s+(?:addr|link|route|add|set|show)\s+{interface_regex}") - ifup_down_matches = re.compile(rf"if(?:up|down)\s+{interface_regex}") - ethtool_matches = re.compile(rf"ethtool\s+{interface_regex}") - route_matches = re.compile(rf"route\s+(?:add|del)\s+{interface_regex}") - iwconfig_matches = re.compile(rf"iwconfig\s+{interface_regex}") - netstat_matches = re.compile(rf"netstat\s+-r\s+{interface_regex}") - ss_matches = re.compile(rf"ss\s+-i\s+{interface_regex}") - - # Aggregate all patterns - patterns = [ - ifconfig_matches, ip_link_matches, ifup_down_matches, ethtool_matches, - route_matches, iwconfig_matches, netstat_matches, ss_matches - ] - - for p in patterns: - interfaces.update(FileSystemHelper.find_regex(p, extract_dir).keys()) - - bad_prefixes = ["veth", "br"] - bad_vals = ["lo", "set", "add", "del", "route", "show", "addr", "link", "up", "down", - "flush", "help", "default"] - - # Filter out the default network interfaces - interfaces = [iface for iface in interfaces if - not any([x in iface for x in bad_vals]) and - not any([iface.startswith(x) for x in bad_prefixes]) and - not iface.isnumeric()] - - result = {} - if len(sys_net_ifaces): - result["sysfs"] = list(sys_net_ifaces) - - if len(interfaces): - result["commands"] = list(interfaces) - - if len(result): - return result - - -class ClusterCollector(StaticAnalysis): - ''' - Collect summary statistics for the filesystem to help identify clusters. - ''' - def run(self, extract_dir: str, prior_results: dict) -> dict[str, list[str]]: - """ - Collect basename and hash of every executable file. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict with lists of files, executables, and hashes. - """ - # Collect the basename + hash of every executable file in the system - all_files = set() - executables = set() - executable_hashes = set() - - for root, _, files in os.walk(extract_dir): - for f in files: - file_path = os.path.join(root, f) - - if os.path.isfile(file_path): - all_files.add(os.path.basename(f)) - - if os.path.isfile(file_path) and os.access(file_path, os.X_OK): - executables.add(os.path.basename(f)) - - hash_value = self.compute_file_hash(file_path) - if hash_value: - executable_hashes.add(hash_value) - - return { - 'files': list(all_files), - 'executables': list(executables), - 'executable_hashes': list(executable_hashes) - } - - @staticmethod - def compute_file_hash(file_path: str) -> str | None: - """ - Compute SHA256 hash of a file. - - :param file_path: Path to file. - :return: Hex digest string or None on failure. - """ - try: - # Use the system's sha256sum binary for better performance - output = check_output(["sha256sum", file_path], stderr=STDOUT) - # sha256sum output format: ' ' - return output.decode('utf-8').split()[0] - except (CalledProcessError, FileNotFoundError, IOError) as e: - logger.debug(f"Failed to hash file {file_path}: {e}") - return None - - -class LibrarySymbols(StaticAnalysis): - """ - Examine libraries in the filesystem for NVRAM keys and exported symbols. - - Uses pyelftools to find definitions for NVRAM_KEYS variables and tracks exported function names. - """ - NVRAM_KEYS: list[str] = ["Nvrams", "router_defaults"] - - def run(self, extract_dir: str, prior_results: dict) -> dict[str, dict]: - """ - Analyze libraries for NVRAM keys and symbols. - - :param extract_dir: Directory containing extracted filesystem. - :param prior_results: Results from previous analyses. - :return: Dict with nvram values and symbol paths. - """ - self.extract_dir = extract_dir - self.archend = arch_end(prior_results['ArchId']) - - if any([x is None for x in self.archend]): - self.enabled = False - print(f"Warning: Unknown architecture/endianness: {self.archend}. Cannot run NVRAM recovery Static Analysis") - return - - symbols = {} - nvram = {} - sym_paths = {} # path -> symbol names - - # Now let's examine each extracted library - for root, _, files in os.walk(self.extract_dir): - for file in files: - file_path = Path(root) / file - if file_path.is_file() and \ - (str(file_path).endswith(".so") or ".so." in str(file_path)): - try: - found_nvram, found_syms = self._analyze_library(file_path, - self.archend) - except Exception as e: - logger.error( - f"Unhandled exception in _analyze_library for {file_path}: {e}" - ) - continue - tmpless_path = str(file_path).replace(str(self.extract_dir), "") - sym_paths[tmpless_path] = found_syms - for symname, offset in found_syms.items(): - symbols[(tmpless_path, symname)] = offset - for key, value in found_nvram.items(): - nvram_key = key.rsplit(":", 1)[-1] # Handle case of value coming from ar - nvram[(tmpless_path, nvram_key)] = value - - # Raw data will be library path -> key -> value - nvram_values = {} - for (path, key), value in nvram.items(): - if path not in nvram_values: - nvram_values[path] = {} - if key is not None and len(key) and value is not None: - nvram_values[path][key] = value - - # nvram is key of filepath -> nvram key -> nvram value - # We should 1) generate patches for each possible non-conflicting source - return {'nvram': nvram_values, - 'symbols': sym_paths} - - @staticmethod - def _find_symbol_address( - elffile: ELFFile, - symbol_name: str - ) -> tuple[int | None, int | str | None]: - """ - Find the address and section index of a symbol in an ELF file. - - :param elffile: ELFFile object. - :param symbol_name: Name of the symbol. - :return: Tuple of (address, section_index) or (None, None). - """ - try: - symbol_tables = [ - s - for s in elffile.iter_sections() - if isinstance(s, SymbolTableSection) - ] - except ELFParseError: - return None, None - - for section in symbol_tables: - if symbol := section.get_symbol_by_name(symbol_name): - symbol = symbol[0] - return ( - symbol["st_value"], - symbol["st_shndx"], - ) # Return symbol address and section index - return None, None - - @staticmethod - def _get_string_from_address( - elffile: ELFFile, - address: int, - is_64: bool = False, - is_eb: bool = False - ) -> str | None: - """ - Get a string from a given address in an ELF file. - - :param elffile: ELFFile object. - :param address: Address to read string from. - :param is_64: True if 64-bit ELF. - :param is_eb: True if big-endian. - :return: Decoded string or None. - """ - for section in elffile.iter_sections(): - start_addr = section["sh_addr"] - end_addr = start_addr + section.data_size - if start_addr <= address < end_addr: - offset_within_section = address - start_addr - data = section.data()[offset_within_section:] - str_end = data.find(b"\x00") - if str_end != -1: - try: - return data[:str_end].decode("utf-8") - except UnicodeDecodeError: - # print(f"Failed to decode string: {data[:str_end]}") - pass - return None - - @staticmethod - def _is_elf(filename: str) -> bool: - """ - Check if a file is an ELF binary. - - :param filename: Path to file. - :return: True if ELF, False otherwise. - """ - try: - with open(filename, "rb") as f: - magic = f.read(4) - return magic == b"\x7fELF" - except IOError: - return False - - @staticmethod - def get_nvram_info( - elf_path: str, - archend: str - ) -> dict[str, str | None]: - """ - Extract NVRAM key-value pairs from an ELF file. - - :param elf_path: Path to ELF file. - :param archend: Architecture/endianness info. - :return: Dict of NVRAM key-value pairs. - """ - nvram_data = {} - is_eb = "eb" in archend - is_64 = "64" in archend - with open(elf_path, "rb") as f: - try: - elffile = ELFFile(f) - except ELFError: - # elftools failed to parse our file. If it's actually an ELF, warn - if LibrarySymbols._is_elf(elf_path): - logger.warning( - f"Failed to parse {elf_path} as an ELF file when analyzing libraries" - ) - return nvram_data - - # Check for nvram keys - for nvram_key in LibrarySymbols.NVRAM_KEYS: - address, section_index = LibrarySymbols._find_symbol_address(elffile, nvram_key) - if address is None: - continue - - if section_index == "SHN_UNDEF": - # This is a common case for shared libraries, it means - # the symbol is defined in another library? - continue - - try: - section = elffile.get_section(section_index) - except TypeError: - logger.warning( - f"Failed to get section {section_index} for symbol {nvram_key} in {elf_path} when analyzing libraries" - ) - continue - data = section.data() - start_addr = section["sh_addr"] - offset = address - start_addr - - pointer_size = 8 if is_64 else 4 - unpack_format = f"{'>' if is_eb else '<'}{'Q' if is_64 else 'I'}" - - # We expect key_ptr, value_ptr, NULL, ... - # note that we could have key_ptr, NULL, NULL - # end when we get a NULL key - - fail_count = 0 - while offset + (pointer_size * 3) < len(data): - ptrs = [ - struct.unpack( - unpack_format, - data[ - offset + i * pointer_size: offset + (i + 1) * pointer_size - ], - )[0] - for i in range(3) - ] - if ptrs[0] != 0: - key = LibrarySymbols._get_string_from_address(elffile, ptrs[0], is_64, is_eb) - val = LibrarySymbols._get_string_from_address(elffile, ptrs[1], is_64, is_eb) - - if ( - key - and not any([x in key for x in ' /\t\n\r<>"']) - and not key[0].isnumeric() - ): - fail_count = 0 - if key not in nvram_data: - nvram_data[key] = val - else: - fail_count += 1 - else: - # Should we break here? - # For now let's just keep going (be sure to keep offset increment below) - # so we're more likely to find additional keys - might get false positives though - pass - - if fail_count > 5: - # Probably just outside of the table? - break - - offset += pointer_size * 3 - return nvram_data - - @staticmethod - def _analyze_library( - elf_path: str, - archend: str - ) -> tuple[dict, dict]: - """ - Analyze a single library for exported tables and function names. - - :param elf_path: Path to library file. - :param archend: Architecture/endianness info. - :return: Tuple of (nvram_data, symbols). - """ - - symbols = {} # Symbol name -> relative(?) address - nvram_data = {} # key -> value (may be empty string) - - # Check if the file is an ar archive - try: - with open(elf_path, 'rb') as f: - archive = f.read(8) == b"!\n" - - if archive: - with tempfile.TemporaryDirectory() as temp_dir: - subprocess.run(["ar", "x", elf_path], cwd=temp_dir, check=True) - for obj_file in os.listdir(temp_dir): - obj_path = os.path.join(temp_dir, obj_file) - found_nvram, found_syms = LibrarySymbols._analyze_library(obj_path, archend) - archive_key = f"{os.path.basename(elf_path)}:{obj_file}" - symbols.update({f"{archive_key}:{k}": v for k, v in found_syms.items()}) - nvram_data.update({f"{archive_key}:{k}": v for k, v in found_nvram.items()}) - return nvram_data, symbols - except CalledProcessError as e: - logger.error(f"Error processing archive {elf_path}: {e.output.decode('utf-8', errors='ignore')}") - - # Handle ELF files - try: - if nm_out := check_output(["nm", "-D", "--defined-only", elf_path], - stderr=STDOUT): - for line in nm_out.decode("utf8", errors="ignore").split("\n"): - if line: - parts = line.split() - if len(parts) == 3: - addr, _, name = parts - if '@' in name: - name = name.split("@")[0] - addr = int(addr, 16) - if addr != 0: - symbols[name] = addr - elif line.strip().endswith("no symbols"): - continue - else: - logger.warning(f"Unexpected nm output format: {line}") - except CalledProcessError as e: - if LibrarySymbols._is_elf(elf_path): - logger.error(f"Error running nm on {elf_path}: {e.output.decode('utf-8', errors='ignore')}") - return nvram_data, symbols - - if any(sym in symbols for sym in LibrarySymbols.NVRAM_KEYS): - nvram_data = LibrarySymbols.get_nvram_info(elf_path, archend) - - return nvram_data, symbols diff --git a/src/penguin/static_plugin.py b/src/penguin/static_plugin.py new file mode 100644 index 000000000..73d99fb1a --- /dev/null +++ b/src/penguin/static_plugin.py @@ -0,0 +1,37 @@ +from abc import ABC + +class StaticPlugin(ABC): + """ + Base class for all static plugins (analyses and patchers). + """ + # List of plugin class names that this plugin depends on. + depends_on = [] + + def __init__(self, fs_archive: str, extracted_fs: str, prior_results: dict): + self.enabled = True + # Set a default name based on the class name, but allow override + self.plugin_name = self.__class__.__name__ + self.fs_archive = fs_archive + self.extracted_fs = extracted_fs + self.prior_results = prior_results + + +class StaticAnalysisPlugin(StaticPlugin): + """ + Base class for static analyses. + """ + def run(self) -> any: + """ + Run the static analysis and return the result. + """ + pass + +class ConfigPatcherPlugin(StaticPlugin): + """ + Base class for config patchers. + """ + def generate(self, patches: dict) -> dict | None: + """ + Generate a patch dictionary. + """ + pass diff --git a/src/penguin/static_plugin_manager.py b/src/penguin/static_plugin_manager.py new file mode 100644 index 000000000..960360d89 --- /dev/null +++ b/src/penguin/static_plugin_manager.py @@ -0,0 +1,123 @@ +import os +import importlib.util +import inspect +import sys +from typing import List, Type, Dict +from penguin import getColoredLogger +from .static_plugin import StaticPlugin, StaticAnalysisPlugin, ConfigPatcherPlugin +from graphlib import TopologicalSorter + +logger = getColoredLogger("penguin.static_plugin_manager") + +class StaticPluginManager: + """ + Manages loading and resolving dependencies for static plugins. + """ + def __init__(self, plugin_dirs: List[str]): + self.plugin_dirs = plugin_dirs + self.plugins: Dict[str, Type[StaticPlugin]] = {} + self.plugin_hashes: Dict[str, str] = {} + self.load_plugins() + + def _hash_file(self, filepath: str) -> str: + import hashlib + hasher = hashlib.sha256() + try: + with open(filepath, 'rb') as afile: + buf = afile.read() + hasher.update(buf) + except Exception as e: + logger.error(f"Error hashing {filepath}: {e}") + return hasher.hexdigest() + + def load_plugins(self): + """ + Dynamically loads all plugins in the specified directories. + """ + for plugin_dir in self.plugin_dirs: + if not os.path.exists(plugin_dir): + logger.warning(f"Plugin directory {plugin_dir} does not exist.") + continue + + for root, _, files in os.walk(plugin_dir): + for file in files: + if file.endswith('.py') and file != '__init__.py': + filepath = os.path.join(root, file) + self._load_plugin_file(filepath) + + def _load_plugin_file(self, filepath: str): + # Determine the package name based on the directory structure + # assuming pyplugins/static_analysis or pyplugins/config_patchers + # e.g., if filepath is /app/pyplugins/static_analysis/arch_id.py + # package will be pyplugins.static_analysis + abs_filepath = os.path.abspath(filepath) + path_parts = abs_filepath.split(os.sep) + + # Try to find 'pyplugins' in the path to determine the package hierarchy + if 'pyplugins' in path_parts: + idx = path_parts.index('pyplugins') + pkg_parts = path_parts[idx:-1] + package_name = '.'.join(pkg_parts) + module_base = os.path.splitext(os.path.basename(filepath))[0] + module_name = f"{package_name}.{module_base}" + else: + module_name = os.path.splitext(os.path.basename(filepath))[0] + package_name = '' + + spec = importlib.util.spec_from_file_location(module_name, filepath) + if spec and spec.loader: + try: + module = importlib.util.module_from_spec(spec) + module.__package__ = package_name + sys.modules[module_name] = module + spec.loader.exec_module(module) + + for name, cls in inspect.getmembers(module, inspect.isclass): + # Check if it's a subclass of StaticPlugin but not the base class itself + if issubclass(cls, StaticPlugin) and cls not in (StaticPlugin, StaticAnalysisPlugin, ConfigPatcherPlugin): + # Ensure we don't load classes from imported modules (e.g. if they import the base classes) + if cls.__module__ == module_name: + self.plugins[name] = cls + self.plugin_hashes[name] = self._hash_file(filepath) + logger.debug(f"Loaded static plugin: {name}") + except Exception as e: + logger.error(f"Failed to load static plugin from {filepath}: {e}") + + def get_ordered_plugins(self) -> List[Type[StaticPlugin]]: + """ + Returns a list of loaded plugin classes ordered by their dependencies. + """ + graph = {} + for name, cls in self.plugins.items(): + depends_on = getattr(cls, 'depends_on', []) + graph[name] = set(depends_on) + + sorter = TopologicalSorter(graph) + try: + # We want deterministic ordering, but TopologicalSorter may output nodes + # in an arbitrary order if they have the same depth. + # While Python's graphlib.TopologicalSorter maintains insertion order internally, + # we should sort keys alphabetically before building the graph to ensure determinism. + + # To strictly control the order, we can prepare the graph with sorted keys + sorted_graph = {k: graph[k] for k in sorted(graph.keys())} + sorter = TopologicalSorter(sorted_graph) + + ordered_names = list(sorter.static_order()) + # Some dependencies might not be plugins themselves (or not loaded), + # so we only yield the ones we actually loaded. + return [self.plugins[name] for name in ordered_names if name in self.plugins] + except Exception as e: + logger.error(f"Failed to resolve plugin dependencies: {e}") + return [] + + def get_state_hash(self) -> str: + """ + Returns a single hash representing the state of all loaded plugins. + """ + import hashlib + hasher = hashlib.sha256() + # Sort keys to ensure consistent hashing + for name in sorted(self.plugin_hashes.keys()): + hasher.update(self.plugin_hashes[name].encode('utf-8')) + return hasher.hexdigest()