diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9f9ef28d4..5c0e35327 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,6 +1,14 @@ name: Test Container on: + # issue831 CI debug: this branch (issue831-ci-debug) is based on + # config-reshape-patches and conflicts with main, so a pull_request event + # can't build a merge ref and never runs CI. Trigger on direct pushes to the + # branch instead, which run against the branch head. Remove before merge. + push: + branches: + - issue831-ci-debug + workflow_dispatch: pull_request: branches: - main diff --git a/Dockerfile b/Dockerfile index 6ade466f1..7f56d8ad8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,8 @@ ARG LTRACE_PROTOTYPES_HASH="9db3bdee7cf3e11c87d8cc7673d4d25b" ARG MUSL_VERSION="1.2.5" ARG VHOST_DEVICE_VERSION="vhost-device-vsock-v0.2.0" ARG FW2TAR_TAG="v2.0.6" -ARG QEMU_VERSION="0.0.6" +# issue831 CI debug: pull the instrumented dev build (release vdev_issue831). +ARG QEMU_VERSION="dev_issue831" ARG RIPGREP_VERSION="14.1.1" ARG APT_MIRROR="ubuntu" diff --git a/docs/pyplugin_architecture.md b/docs/pyplugin_architecture.md index 2f0f8bcd3..15a452587 100644 --- a/docs/pyplugin_architecture.md +++ b/docs/pyplugin_architecture.md @@ -103,9 +103,14 @@ both at the top level and under `plugins:` is an error. Config and patch files support Jinja2 templating so values that depend on the architecture (or other settings) don't have to be hardcoded in multiple places. -Available variables: `{{ arch }}`, `{{ core. }}` (e.g. `{{ core.mem }}`), +Available variables: `{{ arch }}`, the arch-derived static-layout subdirs +`{{ arch_dir }}` and `{{ dylib_dir }}`, `{{ core. }}` (e.g. `{{ core.mem }}`), `{{ kernel_version }}`, and anything you define in a top-level `vars:` section. +Penguin's own auto-generated base patch uses this: instead of baking the arch +subdir into host paths, it emits `{{ arch_dir }}` / `{{ dylib_dir }}` so changing +`core.arch` reconfigures those paths automatically. + ```yaml core: arch: mipsel diff --git a/pyplugins/apis/mem.py b/pyplugins/apis/mem.py index 2d804c43d..6e25af4b1 100644 --- a/pyplugins/apis/mem.py +++ b/pyplugins/apis/mem.py @@ -113,7 +113,8 @@ def _get_rsize(self) -> int: return 4096 def write_bytes(self, addr: Union[int, Ptr], data: bytes, - pid: Optional[int] = None) -> Generator[Any, Any, int]: + pid: Optional[int] = None, + prefer_portal: bool = False) -> Generator[Any, Any, int]: """ Write bytes to guest memory. @@ -127,6 +128,12 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes, Data to write. pid : int, optional Process ID for context. + prefer_portal : bool, optional + Skip the PANDA virtual-memory write fast path and write via the + portal (guest-executed) path instead. Use for writes the PANDA + host-side path handles unreliably (e.g. writes into a guest kernel + buffer on ppc64, which trigger a timing-sensitive fault); see + issue #831. Returns ------- @@ -142,10 +149,11 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes, rsize = self._get_rsize() cpu = None + use_panda = self.try_panda and not prefer_portal # Handle single chunk (Fast Path) if total_len <= rsize: - if self.try_panda: + if use_panda: if cpu is None: cpu = self._get_cpu() try: @@ -169,7 +177,7 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes, chunk_len = len(chunk_view) success = False - if self.try_panda: + if use_panda: if cpu is None: cpu = self._get_cpu() try: diff --git a/pyplugins/testing/native_mmap.py b/pyplugins/testing/native_mmap.py index 94c1172b7..a44d75877 100644 --- a/pyplugins/testing/native_mmap.py +++ b/pyplugins/testing/native_mmap.py @@ -61,9 +61,18 @@ def read(self, ptregs, offset: LoffT, length: SizeT, buf_ptr: CharPtr): ptregs.retval = 0 return 0 chunk = min(size, self.SIZE - off) - yield from plugins.mem.write( + # issue #831: deliver the read-back via the guest-executed portal path + # rather than the PANDA virtual-memory write fast path. On ppc64 the + # PANDA host-side write (cpu_memory_rw_debug) into the guest kernel + # read buffer triggers a timing-sensitive fault that segfaults the + # cat|strings|grep read-back; the portal path avoids that host write. + yield from plugins.mem.write_bytes( buf_ptr, bytes(self.data[off:off + chunk]), + # issue831 CI debug: force the ORIGINAL PANDA host-write path so the + # ppc64 fault reproduces and the instrumented dev_issue831 QEMU can + # observe it. Revert to prefer_portal=True (the fix) before merging. + prefer_portal=False, ) ptregs.retval = 0 return 0 diff --git a/src/penguin/arch.py b/src/penguin/arch.py index af2235850..6bcd976da 100644 --- a/src/penguin/arch.py +++ b/src/penguin/arch.py @@ -6,6 +6,31 @@ logger = getColoredLogger("penguin.arch") +def get_dylib_subdir(arch_name: str) -> str: + """ + Map a config arch name (e.g. "aarch64", "powerpc64le") to the subdirectory + under ``/dylibs`` holding that arch's prebuilt dynamic libraries. + + Dylibs are built/published under short, sometimes-distinct names that differ + from both the config arch name and the generic arch subdir, so this mapping + is the single source of truth for both patch generation and config templating + (the ``{{ dylib_dir }}`` template variable). + """ + if arch_name == "aarch64": + return "arm64" + if arch_name == "intel64": + return "x86_64" + if arch_name == "loongarch64": + return "loongarch" + if arch_name == "powerpc64le": + return "ppc64el" + if "powerpc" in arch_name: + return arch_name.replace("powerpc", "ppc") # dylibs use short names + # Fall back to the generic arch subdir (e.g. intel64 -> x86_64, mips* as-is). + from .utils import get_arch_subdir + return get_arch_subdir({"core": {"arch": arch_name}}) + + @dataclasses.dataclass class ArchInfo: arch: Optional[str] = None diff --git a/src/penguin/config_patchers.py b/src/penguin/config_patchers.py index d5fbd146b..9075691ec 100644 --- a/src/penguin/config_patchers.py +++ b/src/penguin/config_patchers.py @@ -22,7 +22,7 @@ from pathlib import Path from penguin import getColoredLogger -from .arch import arch_filter, arch_end +from .arch import arch_filter, arch_end, get_dylib_subdir from .defaults import ( default_init_script, default_lib_aliases, @@ -422,19 +422,7 @@ def set_arch_info(self, arch_identified: str) -> None: mock_config = {"core": {"arch": self.arch_name}} self.arch_dir = get_arch_subdir(mock_config) - - if arch_identified == "aarch64": - self.dylib_dir = "arm64" - elif arch_identified == "intel64": - self.dylib_dir = "x86_64" - elif arch_identified == "loongarch64": - self.dylib_dir = "loongarch" - elif self.arch_name == "powerpc64le": - self.dylib_dir = "ppc64el" - elif "powerpc" in self.arch_name: - self.dylib_dir = self.arch_name.replace("powerpc", "ppc") # dylibs are built with short names - else: - self.dylib_dir = self.arch_dir + self.dylib_dir = get_dylib_subdir(self.arch_name) def generate(self, patches: dict) -> dict: # Add serial device in pseudofiles @@ -517,11 +505,14 @@ def generate(self, patches: dict) -> dict: "host_path": os.path.join(*[STATIC_DIR, "ltrace", "*"]), }, - # Dynamic libraries + # Dynamic libraries. The arch-specific subdir is emitted as a + # Jinja template ({{ dylib_dir }}) and resolved at config-load + # time from core.arch, so changing the arch reconfigures the path + # instead of it being baked in here. "/igloo/dylibs/*": { "type": "host_file", "mode": 0o755, - "host_path": os.path.join(STATIC_DIR, "dylibs", self.dylib_dir or self.arch_dir, "*"), + "host_path": os.path.join(STATIC_DIR, "dylibs", "{{ dylib_dir }}", "*"), }, # Startup scripts @@ -562,8 +553,9 @@ def generate(self, patches: dict) -> dict: "mode": 0o755, } result["static_files"]["/igloo/utils/*"] = { + # {{ arch_dir }} is resolved at config-load time from core.arch. "type": "host_file", - "host_path": f"{STATIC_DIR}/{self.arch_dir}/*", + "host_path": f"{STATIC_DIR}/{{{{ arch_dir }}}}/*", "mode": 0o755, } diff --git a/src/penguin/penguin_config/templating.py b/src/penguin/penguin_config/templating.py index c41195d8b..5a9c7e786 100644 --- a/src/penguin/penguin_config/templating.py +++ b/src/penguin/penguin_config/templating.py @@ -69,11 +69,36 @@ def substitute(obj, ctx, env=None, where="config"): return obj +def _arch_derived_vars(arch): + """ + Compute arch-derived template variables for a given config arch name. + + Returns a dict with ``arch_dir`` (generic static subdir) and ``dylib_dir`` + (prebuilt-dylib subdir). Imports are lazy and failures are swallowed so the + templating module stays usable in self-contained contexts (e.g. schema + generation) where penguin.utils/arch aren't importable; in that case these + variables are simply absent and a template referencing them errors clearly. + """ + out = {} + try: + from penguin.utils import get_arch_subdir + out["arch_dir"] = get_arch_subdir({"core": {"arch": arch}}) + except Exception: + pass + try: + from penguin.arch import get_dylib_subdir + out["dylib_dir"] = get_dylib_subdir(arch) + except Exception: + pass + return out + + def build_context(raw_config, extra=None, env=None, where="vars"): """ Build the Jinja context for a config/patch dict. - Exposes ``arch``, ``core`` (so ``{{ core.mem }}`` works), the late-bound + Exposes ``arch``, the arch-derived ``arch_dir`` / ``dylib_dir`` static-layout + subdirs, ``core`` (so ``{{ core.mem }}`` works), the late-bound ``kernel_version`` sentinel, anything in ``extra`` (e.g. the main config's context when rendering a patch), and the file's own ``vars:`` (which may themselves reference earlier vars / arch). @@ -85,8 +110,10 @@ def build_context(raw_config, extra=None, env=None, where="vars"): core = raw_config.get("core") if isinstance(raw_config, dict) else None if isinstance(core, dict): ctx["core"] = dict(core) - if core.get("arch"): - ctx["arch"] = core["arch"] + arch = core.get("arch") + if arch: + ctx["arch"] = arch + ctx.update(_arch_derived_vars(arch)) user_vars = raw_config.get("vars") if isinstance(raw_config, dict) else None if isinstance(user_vars, dict): for k, v in user_vars.items(): diff --git a/tests/unit_tests/test_config.py b/tests/unit_tests/test_config.py index 21b963ffd..ce69a9f32 100644 --- a/tests/unit_tests/test_config.py +++ b/tests/unit_tests/test_config.py @@ -409,6 +409,57 @@ def test_legacy_at_placeholders_untouched(): assert out["k"] == "/kernels/@ARCH@/vmlinux.@ARCH@" +# --------------------------------------------------------------------------- # +# arch-derived template variables (used by auto-generated patches) +# --------------------------------------------------------------------------- # +@pytest.mark.parametrize("arch,arch_dir,dylib_dir", [ + ("armel", "armel", "armel"), + ("aarch64", "aarch64", "arm64"), + ("intel64", "x86_64", "x86_64"), + ("powerpc64le", "powerpc64", "ppc64el"), + ("powerpc", "powerpc", "ppc"), + ("mipsel", "mipsel", "mipsel"), + ("loongarch64", "loongarch64", "loongarch"), +]) +def test_arch_derived_context_vars(arch, arch_dir, dylib_dir): + ctx = templating.build_context({"core": {"arch": arch}}) + assert ctx["arch"] == arch + assert ctx["arch_dir"] == arch_dir + assert ctx["dylib_dir"] == dylib_dir + + +def test_get_dylib_subdir_matches_context(): + from penguin.arch import get_dylib_subdir + assert get_dylib_subdir("aarch64") == "arm64" + assert get_dylib_subdir("powerpc64le") == "ppc64el" + assert get_dylib_subdir("mipsel") == "mipsel" + + +def test_arch_dir_template_resolves_in_patch(): + # Mirrors the generated base patch: defines core.arch and uses the derived + # subdir variables in host_paths. + patch = { + "core": {"arch": "aarch64"}, + "static_files": { + "/igloo/dylibs/*": {"type": "host_file", "host_path": "/s/dylibs/{{ dylib_dir }}/*"}, + "/igloo/utils/*": {"type": "host_file", "host_path": "/s/{{ arch_dir }}/*"}, + }, + } + out = templating.substitute(patch, templating.build_context(patch)) + assert out["static_files"]["/igloo/dylibs/*"]["host_path"] == "/s/dylibs/arm64/*" + assert out["static_files"]["/igloo/utils/*"]["host_path"] == "/s/aarch64/*" + + +def test_generated_base_patch_emits_arch_templates(): + # The BasePatch generator should emit Jinja placeholders (not baked subdirs) + # for the arch-specific host_paths, so load-time templating resolves them. + import inspect + from penguin import config_patchers + src = inspect.getsource(config_patchers.BasePatch.generate) + assert "{{ dylib_dir }}" in src + assert "{{ arch_dir }}" in src + + # --------------------------------------------------------------------------- # # PR1+PR3+PR4 end-to-end through load_config (no kernel/container needed) # --------------------------------------------------------------------------- #