Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
name: Test Container

on:
# issue831 CI debug: this branch (issue831-ci-debug) is based on
# config-reshape-patches and conflicts with main, so a pull_request event
# can't build a merge ref and never runs CI. Trigger on direct pushes to the
# branch instead, which run against the branch head. Remove before merge.
push:
branches:
- issue831-ci-debug
workflow_dispatch:
pull_request:
branches:
- main
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ ARG LTRACE_PROTOTYPES_HASH="9db3bdee7cf3e11c87d8cc7673d4d25b"
ARG MUSL_VERSION="1.2.5"
ARG VHOST_DEVICE_VERSION="vhost-device-vsock-v0.2.0"
ARG FW2TAR_TAG="v2.0.6"
ARG QEMU_VERSION="0.0.6"
# issue831 CI debug: pull the instrumented dev build (release vdev_issue831).
ARG QEMU_VERSION="dev_issue831"
ARG RIPGREP_VERSION="14.1.1"
ARG APT_MIRROR="ubuntu"

Expand Down
7 changes: 6 additions & 1 deletion docs/pyplugin_architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,14 @@ both at the top level and under `plugins:` is an error.

Config and patch files support Jinja2 templating so values that depend on the
architecture (or other settings) don't have to be hardcoded in multiple places.
Available variables: `{{ arch }}`, `{{ core.<field> }}` (e.g. `{{ core.mem }}`),
Available variables: `{{ arch }}`, the arch-derived static-layout subdirs
`{{ arch_dir }}` and `{{ dylib_dir }}`, `{{ core.<field> }}` (e.g. `{{ core.mem }}`),
`{{ kernel_version }}`, and anything you define in a top-level `vars:` section.

Penguin's own auto-generated base patch uses this: instead of baking the arch
subdir into host paths, it emits `{{ arch_dir }}` / `{{ dylib_dir }}` so changing
`core.arch` reconfigures those paths automatically.

```yaml
core:
arch: mipsel
Expand Down
14 changes: 11 additions & 3 deletions pyplugins/apis/mem.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def _get_rsize(self) -> int:
return 4096

def write_bytes(self, addr: Union[int, Ptr], data: bytes,
pid: Optional[int] = None) -> Generator[Any, Any, int]:
pid: Optional[int] = None,
prefer_portal: bool = False) -> Generator[Any, Any, int]:
"""
Write bytes to guest memory.

Expand All @@ -127,6 +128,12 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes,
Data to write.
pid : int, optional
Process ID for context.
prefer_portal : bool, optional
Skip the PANDA virtual-memory write fast path and write via the
portal (guest-executed) path instead. Use for writes the PANDA
host-side path handles unreliably (e.g. writes into a guest kernel
buffer on ppc64, which trigger a timing-sensitive fault); see
issue #831.

Returns
-------
Expand All @@ -142,10 +149,11 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes,

rsize = self._get_rsize()
cpu = None
use_panda = self.try_panda and not prefer_portal

# Handle single chunk (Fast Path)
if total_len <= rsize:
if self.try_panda:
if use_panda:
if cpu is None:
cpu = self._get_cpu()
try:
Expand All @@ -169,7 +177,7 @@ def write_bytes(self, addr: Union[int, Ptr], data: bytes,
chunk_len = len(chunk_view)

success = False
if self.try_panda:
if use_panda:
if cpu is None:
cpu = self._get_cpu()
try:
Expand Down
11 changes: 10 additions & 1 deletion pyplugins/testing/native_mmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,18 @@ def read(self, ptregs, offset: LoffT, length: SizeT, buf_ptr: CharPtr):
ptregs.retval = 0
return 0
chunk = min(size, self.SIZE - off)
yield from plugins.mem.write(
# issue #831: deliver the read-back via the guest-executed portal path
# rather than the PANDA virtual-memory write fast path. On ppc64 the
# PANDA host-side write (cpu_memory_rw_debug) into the guest kernel
# read buffer triggers a timing-sensitive fault that segfaults the
# cat|strings|grep read-back; the portal path avoids that host write.
yield from plugins.mem.write_bytes(
buf_ptr,
bytes(self.data[off:off + chunk]),
# issue831 CI debug: force the ORIGINAL PANDA host-write path so the
# ppc64 fault reproduces and the instrumented dev_issue831 QEMU can
# observe it. Revert to prefer_portal=True (the fix) before merging.
prefer_portal=False,
)
ptregs.retval = 0
return 0
Expand Down
25 changes: 25 additions & 0 deletions src/penguin/arch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,31 @@
logger = getColoredLogger("penguin.arch")


def get_dylib_subdir(arch_name: str) -> str:
"""
Map a config arch name (e.g. "aarch64", "powerpc64le") to the subdirectory
under ``<static>/dylibs`` holding that arch's prebuilt dynamic libraries.

Dylibs are built/published under short, sometimes-distinct names that differ
from both the config arch name and the generic arch subdir, so this mapping
is the single source of truth for both patch generation and config templating
(the ``{{ dylib_dir }}`` template variable).
"""
if arch_name == "aarch64":
return "arm64"
if arch_name == "intel64":
return "x86_64"
if arch_name == "loongarch64":
return "loongarch"
if arch_name == "powerpc64le":
return "ppc64el"
if "powerpc" in arch_name:
return arch_name.replace("powerpc", "ppc") # dylibs use short names
# Fall back to the generic arch subdir (e.g. intel64 -> x86_64, mips* as-is).
from .utils import get_arch_subdir
return get_arch_subdir({"core": {"arch": arch_name}})


@dataclasses.dataclass
class ArchInfo:
arch: Optional[str] = None
Expand Down
26 changes: 9 additions & 17 deletions src/penguin/config_patchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pathlib import Path

from penguin import getColoredLogger
from .arch import arch_filter, arch_end
from .arch import arch_filter, arch_end, get_dylib_subdir
from .defaults import (
default_init_script,
default_lib_aliases,
Expand Down Expand Up @@ -422,19 +422,7 @@ def set_arch_info(self, arch_identified: str) -> None:

mock_config = {"core": {"arch": self.arch_name}}
self.arch_dir = get_arch_subdir(mock_config)

if arch_identified == "aarch64":
self.dylib_dir = "arm64"
elif arch_identified == "intel64":
self.dylib_dir = "x86_64"
elif arch_identified == "loongarch64":
self.dylib_dir = "loongarch"
elif self.arch_name == "powerpc64le":
self.dylib_dir = "ppc64el"
elif "powerpc" in self.arch_name:
self.dylib_dir = self.arch_name.replace("powerpc", "ppc") # dylibs are built with short names
else:
self.dylib_dir = self.arch_dir
self.dylib_dir = get_dylib_subdir(self.arch_name)

def generate(self, patches: dict) -> dict:
# Add serial device in pseudofiles
Expand Down Expand Up @@ -517,11 +505,14 @@ def generate(self, patches: dict) -> dict:
"host_path": os.path.join(*[STATIC_DIR, "ltrace", "*"]),
},

# Dynamic libraries
# Dynamic libraries. The arch-specific subdir is emitted as a
# Jinja template ({{ dylib_dir }}) and resolved at config-load
# time from core.arch, so changing the arch reconfigures the path
# instead of it being baked in here.
"/igloo/dylibs/*": {
"type": "host_file",
"mode": 0o755,
"host_path": os.path.join(STATIC_DIR, "dylibs", self.dylib_dir or self.arch_dir, "*"),
"host_path": os.path.join(STATIC_DIR, "dylibs", "{{ dylib_dir }}", "*"),
},

# Startup scripts
Expand Down Expand Up @@ -562,8 +553,9 @@ def generate(self, patches: dict) -> dict:
"mode": 0o755,
}
result["static_files"]["/igloo/utils/*"] = {
# {{ arch_dir }} is resolved at config-load time from core.arch.
"type": "host_file",
"host_path": f"{STATIC_DIR}/{self.arch_dir}/*",
"host_path": f"{STATIC_DIR}/{{{{ arch_dir }}}}/*",
"mode": 0o755,
}

Expand Down
33 changes: 30 additions & 3 deletions src/penguin/penguin_config/templating.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,36 @@ def substitute(obj, ctx, env=None, where="config"):
return obj


def _arch_derived_vars(arch):
"""
Compute arch-derived template variables for a given config arch name.

Returns a dict with ``arch_dir`` (generic static subdir) and ``dylib_dir``
(prebuilt-dylib subdir). Imports are lazy and failures are swallowed so the
templating module stays usable in self-contained contexts (e.g. schema
generation) where penguin.utils/arch aren't importable; in that case these
variables are simply absent and a template referencing them errors clearly.
"""
out = {}
try:
from penguin.utils import get_arch_subdir
out["arch_dir"] = get_arch_subdir({"core": {"arch": arch}})
except Exception:
pass
try:
from penguin.arch import get_dylib_subdir
out["dylib_dir"] = get_dylib_subdir(arch)
except Exception:
pass
return out


def build_context(raw_config, extra=None, env=None, where="vars"):
"""
Build the Jinja context for a config/patch dict.

Exposes ``arch``, ``core`` (so ``{{ core.mem }}`` works), the late-bound
Exposes ``arch``, the arch-derived ``arch_dir`` / ``dylib_dir`` static-layout
subdirs, ``core`` (so ``{{ core.mem }}`` works), the late-bound
``kernel_version`` sentinel, anything in ``extra`` (e.g. the main config's
context when rendering a patch), and the file's own ``vars:`` (which may
themselves reference earlier vars / arch).
Expand All @@ -85,8 +110,10 @@ def build_context(raw_config, extra=None, env=None, where="vars"):
core = raw_config.get("core") if isinstance(raw_config, dict) else None
if isinstance(core, dict):
ctx["core"] = dict(core)
if core.get("arch"):
ctx["arch"] = core["arch"]
arch = core.get("arch")
if arch:
ctx["arch"] = arch
ctx.update(_arch_derived_vars(arch))
user_vars = raw_config.get("vars") if isinstance(raw_config, dict) else None
if isinstance(user_vars, dict):
for k, v in user_vars.items():
Expand Down
51 changes: 51 additions & 0 deletions tests/unit_tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,57 @@ def test_legacy_at_placeholders_untouched():
assert out["k"] == "/kernels/@ARCH@/vmlinux.@ARCH@"


# --------------------------------------------------------------------------- #
# arch-derived template variables (used by auto-generated patches)
# --------------------------------------------------------------------------- #
@pytest.mark.parametrize("arch,arch_dir,dylib_dir", [
("armel", "armel", "armel"),
("aarch64", "aarch64", "arm64"),
("intel64", "x86_64", "x86_64"),
("powerpc64le", "powerpc64", "ppc64el"),
("powerpc", "powerpc", "ppc"),
("mipsel", "mipsel", "mipsel"),
("loongarch64", "loongarch64", "loongarch"),
])
def test_arch_derived_context_vars(arch, arch_dir, dylib_dir):
ctx = templating.build_context({"core": {"arch": arch}})
assert ctx["arch"] == arch
assert ctx["arch_dir"] == arch_dir
assert ctx["dylib_dir"] == dylib_dir


def test_get_dylib_subdir_matches_context():
from penguin.arch import get_dylib_subdir
assert get_dylib_subdir("aarch64") == "arm64"
assert get_dylib_subdir("powerpc64le") == "ppc64el"
assert get_dylib_subdir("mipsel") == "mipsel"


def test_arch_dir_template_resolves_in_patch():
# Mirrors the generated base patch: defines core.arch and uses the derived
# subdir variables in host_paths.
patch = {
"core": {"arch": "aarch64"},
"static_files": {
"/igloo/dylibs/*": {"type": "host_file", "host_path": "/s/dylibs/{{ dylib_dir }}/*"},
"/igloo/utils/*": {"type": "host_file", "host_path": "/s/{{ arch_dir }}/*"},
},
}
out = templating.substitute(patch, templating.build_context(patch))
assert out["static_files"]["/igloo/dylibs/*"]["host_path"] == "/s/dylibs/arm64/*"
assert out["static_files"]["/igloo/utils/*"]["host_path"] == "/s/aarch64/*"


def test_generated_base_patch_emits_arch_templates():
# The BasePatch generator should emit Jinja placeholders (not baked subdirs)
# for the arch-specific host_paths, so load-time templating resolves them.
import inspect
from penguin import config_patchers
src = inspect.getsource(config_patchers.BasePatch.generate)
assert "{{ dylib_dir }}" in src
assert "{{ arch_dir }}" in src


# --------------------------------------------------------------------------- #
# PR1+PR3+PR4 end-to-end through load_config (no kernel/container needed)
# --------------------------------------------------------------------------- #
Expand Down
Loading