From 04b6ade095f41bedc7d4e8cd2ac8ccb78d7c2a3b Mon Sep 17 00:00:00 2001 From: Steffen Beyer Date: Fri, 3 Apr 2026 06:06:49 +0200 Subject: [PATCH] Validate native FreeBSD boot assets --- docs/PROGRESS.md | 66 +++++++++ .../phase14-native-boot-assets-freebsd.md | 126 ++++++++++++++++++ ...4-native-boot-pid1-operating-system.scm.in | 71 ++++++++++ .../system/run-phase11-shepherd-pid1-qemu.sh | 5 +- tests/system/run-phase14-native-boot-qemu.sh | 103 ++++++++++++++ tests/system/run-phase14-native-boot-xcpng.sh | 109 +++++++++++++++ 6 files changed, 479 insertions(+), 1 deletion(-) create mode 100644 docs/reports/phase14-native-boot-assets-freebsd.md create mode 100644 tests/system/phase14-native-boot-pid1-operating-system.scm.in create mode 100755 tests/system/run-phase14-native-boot-qemu.sh create mode 100755 tests/system/run-phase14-native-boot-xcpng.sh diff --git a/docs/PROGRESS.md b/docs/PROGRESS.md index 661b00a..e8a0e4d 100644 --- a/docs/PROGRESS.md +++ b/docs/PROGRESS.md @@ -3158,3 +3158,69 @@ Next recommended step: 1. begin Phase 14 by replacing the remaining host-copy boot assets first 2. keep shrinking the host-staged base boundary around the now-working native world/kernel path 3. revisit cleaner runtime vs. development splits after the boot asset transition + +## 2026-04-03 — Phase 14.1: removed host-copied boot assets from the validated native boot path + +Completed work: + +- wrote the Phase 14.1 report: + - `docs/reports/phase14-native-boot-assets-freebsd.md` +- added a dedicated Phase 14.1 native-boot PID1 template: + - `tests/system/phase14-native-boot-pid1-operating-system.scm.in` +- added dedicated Phase 14.1 validation wrappers: + - `tests/system/run-phase14-native-boot-qemu.sh` + - `tests/system/run-phase14-native-boot-xcpng.sh` +- validated a cleaner native boot path by sourcing boot assets from the existing native world output instead of the host-staged `freebsd-bootloader` package: + - `#:kernel freebsd-native-kernel` + - `#:bootloader freebsd-native-world` + - `#:base-packages (list freebsd-native-world)` +- hardened the reusable local PID1 QEMU harness so it no longer boots the raw store image directly read/write from `/frx/store`: + - `tests/system/run-phase11-shepherd-pid1-qemu.sh` now copies the generated raw image to: + - `boot-disk.img` + in the workdir before launching QEMU + - this prevents repeated local boots from mutating the supposed store artifact and causing dirty-filesystem follow-up failures + +Validation: + +- local QEMU/UEFI/TCG native-boot wrapper passes: + - `tests/system/run-phase14-native-boot-qemu.sh` + - workdir: `/tmp/phase14-1-qemu2-1775188371` + - result: `PASS phase14-native-boot-qemu` + - confirmed: + - `native_base_store_count=2` + - `host_base_store_count=0` + - `shepherd_pid=1` + - `sshd_status=running` + - `native_boot_assets=freebsd-native-world` + - `native_base_boot=ok` +- real XCP-ng native-boot wrapper passes: + - `tests/system/run-phase14-native-boot-xcpng.sh` + - workdir: `/tmp/phase14-1-xcpng-1775188701` + - result: `PASS phase14-native-boot-xcpng` + - confirmed: + - `vm_id=90490f2e-e8fc-4b7a-388e-5c26f0157289` + - `vdi_id=0f1f90d3-48ca-4fa2-91d8-fc6339b95743` + - `guest_ip=192.168.213.62` + - `native_base_store_count=2` + - `host_base_store_count=0` + - `shepherd_pid=1` + - `sshd_status=running` + - `compat_prefix_shims=absent` + - `guile_module_smoke=ok` + - `native_boot_assets=freebsd-native-world` + - `native_base_boot=ok` + +Current assessment: + +- Phase 14.1 is complete +- the validated native boot path no longer depends on host-copied `/boot/...` material +- the current Phase-14.1 native boundary is now fully host-base-free for the validated path: + - native kernel + - native world supplying boot assets + - native world supplying runtime + +Next recommended step: + +1. introduce a clearer native runtime slice so runtime is no longer modeled by reusing the broader native world output for both boot and runtime +2. validate that explicit native runtime slice on QEMU and XCP-ng +3. then revisit headers/toolchain/development package boundaries diff --git a/docs/reports/phase14-native-boot-assets-freebsd.md b/docs/reports/phase14-native-boot-assets-freebsd.md new file mode 100644 index 0000000..b0d02fe --- /dev/null +++ b/docs/reports/phase14-native-boot-assets-freebsd.md @@ -0,0 +1,126 @@ +# Phase 14.1: validated native FreeBSD boot assets without host-copied `/boot` + +Date: 2026-04-03 + +## Goal + +The goal of Phase 14.1 was to stop relying on the host-staged `freebsd-bootloader` package for the validated boot path. + +Phase 13 had already proved that Fruix could boot with: + +- native kernel +- native world +- host-staged bootloader only + +Phase 14.1 moved the remaining boot assets onto the native side as well. + +## Approach + +Before introducing a cleaner package split, Fruix first reused the already-built native world output as the source of boot assets. + +The Phase 14.1 operating-system template now uses: + +- `#:kernel freebsd-native-kernel` +- `#:bootloader freebsd-native-world` +- `#:base-packages (list freebsd-native-world)` + +That means the validated image now gets both: + +- runtime files +- loader/boot assets + +from the native `/usr/src`-built world output already staged in `/frx/store`. + +This is slightly redundant at the model layer, but it is a clean way to prove the architectural point first: + +- the boot path no longer needs host-copied `/boot/...` material + +## Additional QEMU harness fix + +During repeated local validation, the existing QEMU PID1 harness was found to boot the raw store image directly from `/frx/store`. + +That meant each local boot mutated the supposedly immutable image artifact and could leave the filesystem dirty for later runs. + +To avoid that, `tests/system/run-phase11-shepherd-pid1-qemu.sh` now copies the built raw image to a temporary writable workdir file before launching QEMU: + +- source image remains in `/frx/store` +- QEMU now writes to `boot-disk.img` in the workdir + +This keeps the store image stable across repeated local boots. + +## New files + +Added: + +- `tests/system/phase14-native-boot-pid1-operating-system.scm.in` +- `tests/system/run-phase14-native-boot-qemu.sh` +- `tests/system/run-phase14-native-boot-xcpng.sh` + +These wrappers reuse the proven PID1 boot harnesses and assert the new boundary: + +- `host_base_store_count=0` +- native kernel present +- native world present +- boot assets come from native world + +## Validation + +### Local QEMU / UEFI / TCG + +Passing run: + +- `PASS phase14-native-boot-qemu` +- workdir: `/tmp/phase14-1-qemu2-1775188371` + +Confirmed: + +```text +native_base_store_count=2 +host_base_store_count=0 +shepherd_pid=1 +sshd_status=running +native_boot_assets=freebsd-native-world +native_base_boot=ok +``` + +### Real XCP-ng VM + +Passing run: + +- `PASS phase14-native-boot-xcpng` +- workdir: `/tmp/phase14-1-xcpng-1775188701` + +Confirmed: + +```text +vm_id=90490f2e-e8fc-4b7a-388e-5c26f0157289 +vdi_id=0f1f90d3-48ca-4fa2-91d8-fc6339b95743 +guest_ip=192.168.213.62 +native_base_store_count=2 +host_base_store_count=0 +shepherd_pid=1 +sshd_status=running +compat_prefix_shims=absent +guile_module_smoke=ok +native_boot_assets=freebsd-native-world +native_base_boot=ok +``` + +## Result + +Phase 14.1 is complete. + +For the validated boot path, Fruix no longer depends on a host-copied `freebsd-bootloader` store item. + +The currently validated native boot boundary is now: + +- native kernel +- native world providing boot assets +- native world providing runtime +- Fruix runtime stores for Guile/Shepherd + +That is already enough to say the image no longer relies on host-copied kernel/boot material. + +## Next step + +Phase 14.2 should remove the remaining model redundancy by introducing a clearer native runtime slice, so the booted guest reaches ready state using an explicit native runtime output rather than reusing the broader native world output for both boot and runtime roles. diff --git a/tests/system/phase14-native-boot-pid1-operating-system.scm.in b/tests/system/phase14-native-boot-pid1-operating-system.scm.in new file mode 100644 index 0000000..f4d5f2a --- /dev/null +++ b/tests/system/phase14-native-boot-pid1-operating-system.scm.in @@ -0,0 +1,71 @@ +(use-modules (fruix system freebsd) + (fruix packages freebsd)) + +(define phase14-operating-system + (operating-system + #:host-name "fruix-freebsd" + #:kernel freebsd-native-kernel + #:bootloader freebsd-native-world + #:base-packages (list freebsd-native-world) + #:groups (list (user-group #:name "wheel" #:gid 0 #:system? #t) + (user-group #:name "sshd" #:gid 22 #:system? #t) + (user-group #:name "_dhcp" #:gid 65 #:system? #t) + (user-group #:name "operator" #:gid 1000 #:system? #f)) + #:users (list (user-account #:name "root" + #:uid 0 + #:group "wheel" + #:comment "Charlie &" + #:home "/root" + #:shell "/bin/sh" + #:system? #t) + (user-account #:name "sshd" + #:uid 22 + #:group "sshd" + #:comment "Secure Shell Daemon" + #:home "/var/empty" + #:shell "/usr/sbin/nologin" + #:system? #t) + (user-account #:name "_dhcp" + #:uid 65 + #:group "_dhcp" + #:comment "dhcp programs" + #:home "/var/empty" + #:shell "/usr/sbin/nologin" + #:system? #t) + (user-account #:name "operator" + #:uid 1000 + #:group "operator" + #:supplementary-groups '("wheel") + #:comment "Fruix Operator" + #:home "/home/operator" + #:shell "/bin/sh" + #:system? #f)) + #:file-systems (list (file-system #:device "/dev/gpt/fruix-root" + #:mount-point "/" + #:type "ufs" + #:options "rw" + #:needed-for-boot? #t) + (file-system #:device "devfs" + #:mount-point "/dev" + #:type "devfs" + #:options "rw" + #:needed-for-boot? #t) + (file-system #:device "tmpfs" + #:mount-point "/tmp" + #:type "tmpfs" + #:options "rw,size=64m")) + #:services '(shepherd ready-marker sshd) + #:loader-entries '(("autoboot_delay" . "1") + ("boot_multicons" . "YES") + ("boot_serial" . "YES") + ("console" . "comconsole,vidconsole")) + #:rc-conf-entries '(("clear_tmp_enable" . "NO") + ("hostid_enable" . "NO") + ("sendmail_enable" . "NONE") + ("sshd_enable" . "YES") + ("ifconfig_xn0" . "SYNCDHCP") + ("ifconfig_em0" . "SYNCDHCP") + ("ifconfig_vtnet0" . "SYNCDHCP")) + #:init-mode 'shepherd-pid1 + #:ready-marker "/var/lib/fruix/ready" + #:root-authorized-keys '("__ROOT_AUTHORIZED_KEY__"))) diff --git a/tests/system/run-phase11-shepherd-pid1-qemu.sh b/tests/system/run-phase11-shepherd-pid1-qemu.sh index 8a029e2..17df2f8 100755 --- a/tests/system/run-phase11-shepherd-pid1-qemu.sh +++ b/tests/system/run-phase11-shepherd-pid1-qemu.sh @@ -68,6 +68,8 @@ closure_path=$(sed -n 's/^closure_path=//p' "$phase8_metadata") closure_base=$(basename "$closure_path") raw_sha256=$(sed -n 's/^raw_sha256=//p' "$phase8_metadata") image_store_path=$(sed -n 's/^image_store_path=//p' "$phase8_metadata") +boot_disk_image=$workdir/boot-disk.img +cp "$disk_image" "$boot_disk_image" sudo qemu-system-x86_64 \ -machine q35,accel=tcg \ @@ -81,7 +83,7 @@ sudo qemu-system-x86_64 \ -daemonize \ -drive if=pflash,format=raw,readonly=on,file=/usr/local/share/edk2-qemu/QEMU_UEFI_CODE-x86_64.fd \ -drive if=pflash,format=raw,file="$uefi_vars" \ - -drive if=virtio,format=raw,file="$disk_image" \ + -drive if=virtio,format=raw,file="$boot_disk_image" \ -netdev user,id=net0,hostfwd=tcp::${ssh_port}-:22 \ -device virtio-net-pci,netdev=net0 @@ -148,6 +150,7 @@ phase8_log=$phase8_log phase8_metadata=$phase8_metadata image_store_path=$image_store_path disk_image=$disk_image +boot_disk_image=$boot_disk_image closure_path=$closure_path closure_base=$closure_base raw_sha256=$raw_sha256 diff --git a/tests/system/run-phase14-native-boot-qemu.sh b/tests/system/run-phase14-native-boot-qemu.sh new file mode 100755 index 0000000..40ed907 --- /dev/null +++ b/tests/system/run-phase14-native-boot-qemu.sh @@ -0,0 +1,103 @@ +#!/bin/sh +set -eu + +repo_root=${PROJECT_ROOT:-$(pwd)} +os_template=${OS_TEMPLATE:-$repo_root/tests/system/phase14-native-boot-pid1-operating-system.scm.in} +system_name=${SYSTEM_NAME:-phase14-operating-system} +disk_capacity=${DISK_CAPACITY:-8g} +root_size=${ROOT_SIZE:-6g} +metadata_target=${METADATA_OUT:-} +cleanup=0 + +if [ -n "${WORKDIR:-}" ]; then + workdir=$WORKDIR + mkdir -p "$workdir" +else + workdir=$(mktemp -d /tmp/fruix-phase14-native-boot-qemu.XXXXXX) + cleanup=1 +fi +if [ "${KEEP_WORKDIR:-0}" -eq 1 ]; then + cleanup=0 +fi + +inner_metadata=$workdir/phase14-native-boot-qemu-inner-metadata.txt +metadata_file=$workdir/phase14-native-boot-qemu-metadata.txt + +cleanup_workdir() { + if [ "$cleanup" -eq 1 ]; then + rm -rf "$workdir" 2>/dev/null || sudo rm -rf "$workdir" + fi +} +trap cleanup_workdir EXIT INT TERM + +KEEP_WORKDIR=1 WORKDIR="$workdir/inner" METADATA_OUT="$inner_metadata" \ + OS_TEMPLATE="$os_template" SYSTEM_NAME="$system_name" DISK_CAPACITY="$disk_capacity" ROOT_SIZE="$root_size" \ + "$repo_root/tests/system/run-phase11-shepherd-pid1-qemu.sh" + +phase8_metadata=$(sed -n 's/^phase8_metadata=//p' "$inner_metadata") +closure_path=$(sed -n 's/^closure_path=//p' "$inner_metadata") +closure_base=$(sed -n 's/^closure_base=//p' "$inner_metadata") +serial_log=$(sed -n 's/^serial_log=//p' "$inner_metadata") +ssh_port=$(sed -n 's/^ssh_port=//p' "$inner_metadata") +shepherd_pid=$(sed -n 's/^shepherd_pid=//p' "$inner_metadata") +sshd_status=$(sed -n 's/^sshd_status=//p' "$inner_metadata") +activate_log=$(sed -n 's/^activate_log=//p' "$inner_metadata") + +native_base_store_count=$(sed -n 's/^native_base_store_count=//p' "$phase8_metadata") +native_base_stores=$(sed -n 's/^native_base_stores=//p' "$phase8_metadata") +host_base_store_count=$(sed -n 's/^host_base_store_count=//p' "$phase8_metadata") +host_base_stores=$(sed -n 's/^host_base_stores=//p' "$phase8_metadata") + +[ "$native_base_store_count" = 2 ] || { echo "expected 2 native base stores, got: $native_base_store_count" >&2; exit 1; } +[ "$host_base_store_count" = 0 ] || { echo "expected 0 host base stores, got: $host_base_store_count" >&2; exit 1; } +[ -z "$host_base_stores" ] || { echo "host base stores are not empty: $host_base_stores" >&2; exit 1; } +printf '%s\n' "$native_base_stores" | tr ',' '\n' | grep 'freebsd-native-kernel-15.0-STABLE$' >/dev/null || { + echo "native base stores do not include the native kernel" >&2 + exit 1 +} +printf '%s\n' "$native_base_stores" | tr ',' '\n' | grep 'freebsd-native-world-15.0-STABLE$' >/dev/null || { + echo "native base stores do not include the native world" >&2 + exit 1 +} +[ "$shepherd_pid" = 1 ] || { echo "shepherd was not PID 1" >&2; exit 1; } +[ "$sshd_status" = running ] || { echo "sshd is not running" >&2; exit 1; } +case "$activate_log" in + *fruix-activate:done*) : ;; + *) echo "activation log does not show success" >&2; exit 1 ;; +esac + +cat >"$metadata_file" <&2; exit 1; } +[ "$host_base_store_count" = 0 ] || { echo "expected 0 host base stores, got: $host_base_store_count" >&2; exit 1; } +[ -z "$host_base_stores" ] || { echo "host base stores are not empty: $host_base_stores" >&2; exit 1; } +printf '%s\n' "$native_base_stores" | tr ',' '\n' | grep 'freebsd-native-kernel-15.0-STABLE$' >/dev/null || { + echo "native base stores do not include the native kernel" >&2 + exit 1 +} +printf '%s\n' "$native_base_stores" | tr ',' '\n' | grep 'freebsd-native-world-15.0-STABLE$' >/dev/null || { + echo "native base stores do not include the native world" >&2 + exit 1 +} +[ "$shepherd_pid" = 1 ] || { echo "shepherd was not PID 1" >&2; exit 1; } +[ "$sshd_status" = running ] || { echo "sshd is not running" >&2; exit 1; } +[ "$compat_prefix_shims" = absent ] || { echo "compatibility prefix shims reappeared" >&2; exit 1; } +[ "$guile_module_smoke" = ok ] || { echo "guest Guile module smoke failed" >&2; exit 1; } +case "$activate_log" in + *fruix-activate:done*) : ;; + *) echo "activation log does not show success" >&2; exit 1 ;; +esac + +cat >"$metadata_file" <