From 02f7a7f57ba9395dc92a5e01763efd7783cee647 Mon Sep 17 00:00:00 2001 From: Steffen Beyer Date: Wed, 1 Apr 2026 08:52:10 +0200 Subject: [PATCH] Validate local Guile fix on FreeBSD --- docs/PROGRESS.md | 56 +++++- .../phase1-guile-local-build-validation.md | 171 ++++++++++++++++++ tests/guile/run-phase1-verification.sh | 23 ++- tests/guile/run-subprocess-diagnostics.sh | 43 ++++- 4 files changed, 283 insertions(+), 10 deletions(-) create mode 100644 docs/reports/phase1-guile-local-build-validation.md diff --git a/docs/PROGRESS.md b/docs/PROGRESS.md index 862d32d..11b1e6f 100644 --- a/docs/PROGRESS.md +++ b/docs/PROGRESS.md @@ -66,13 +66,63 @@ Current assessment: - the next practical step is to validate a workaround or patch in Guile so subprocess helpers stop crashing - after that, continue with Phase 1.2 (minimal native build environment / GNU Hello) +## 2026-04-01 — Phase 1.1 follow-up: local Guile build validated the fix + +Completed work: + +- installed the additional build tooling needed for a local Guile checkout build: + - `autoconf` + - `automake` + - `libtool` + - `gettext-tools` + - `texinfo` + - `help2man` + - `gperf` + - `pkgconf` +- confirmed a FreeBSD-specific bootstrap quirk: + - Guile `autogen.sh` needs GNU `m4` + - FreeBSD base `/usr/bin/m4` is not sufficient + - `M4=gm4 ./autogen.sh` works +- built a disposable validation copy from `~/repos/guile` +- confirmed `~/repos/guile` already contains upstream commit: + - `eb828801f621d3e130b6fe88cfc4acaa69b98a03` + - `Don't use posix_spawn_file_actions_addclosefrom_np with glib posix_spawn` +- updated the local test harnesses so they can test non-system Guile builds: + - `tests/guile/run-phase1-verification.sh` + - `tests/guile/run-subprocess-diagnostics.sh` + - both now accept `GUILE_BIN` + - both now prepend the sibling `../lib` directory to `LD_LIBRARY_PATH` when a matching local `libguile-3.0.so.1` exists + - subprocess diagnostics now supports `EXPECT_GUILE_SUBPROCESS_CRASH=0` for fixed builds +- validated that the packaged Guile still reproduces the crash +- validated that the locally built Guile succeeds for: + - `system*` + - `spawn` + - `open-pipe*` +- re-ran the broader Phase 1.1 Scheme verification suite successfully against the local Guile build +- wrote the results to `docs/reports/phase1-guile-local-build-validation.md` + +Important findings: + +- the local Guile executable initially still crashed until it was forced to load its matching local `libguile-3.0.so.1` +- once `LD_LIBRARY_PATH` pointed at the local install lib directory, subprocess helpers worked correctly +- this strongly supports the earlier diagnosis and shows that the upstream Guile fix resolves the problem in practice +- the local `~/repos/bdwgc` checkout was not needed for this step; packaged `boehm-gc-threaded` was sufficient so far + +Current assessment: + +- Phase 1.1 now has both a root-cause analysis and a working validated fix path on amd64 +- no source changes were needed in `~/repos/guile` because the local checkout already contains the relevant upstream fix +- no source changes were needed in `~/repos/bdwgc` yet, but the earlier FreeBSD warning keeps it on the watch list +- the project can now move on to Phase 1.2 with a known-good local Guile fallback + Recent commits: - `e380e88` — `Add FreeBSD Guile verification harness` - `cd721b1` — `Update progress after Guile verification` +- `27916cb` — `Diagnose Guile subprocess crash on FreeBSD` Next recommended step: -1. patch or locally validate a fix for the `addclosefrom_np` / `REPLACE_POSIX_SPAWN` mismatch -2. re-run both Guile test harnesses after the fix -3. once subprocess behavior is stable, continue with Phase 1.2 (minimal native build environment / GNU Hello) +1. begin Phase 1.2 by creating a minimal native FreeBSD build environment exercise (for example, GNU Hello or an even smaller autotools package) +2. use the local fixed Guile build when subprocess helpers are required +3. keep `bdwgc` in reserve if later FreeBSD-specific GC/thread issues appear diff --git a/docs/reports/phase1-guile-local-build-validation.md b/docs/reports/phase1-guile-local-build-validation.md new file mode 100644 index 0000000..9c03a69 --- /dev/null +++ b/docs/reports/phase1-guile-local-build-validation.md @@ -0,0 +1,171 @@ +# Phase 1.1 follow-up: local Guile build validates the FreeBSD subprocess fix + +Date: 2026-04-01 + +## Summary + +A local build of `~/repos/guile` was used to validate the previously identified FreeBSD subprocess crash. + +Result: + +- the packaged FreeBSD Guile (`/usr/local/bin/guile3`) still reproduces the crash +- a locally built Guile from `~/repos/guile` works correctly for: + - `system*` + - `spawn` + - `open-pipe*` +- no local source patch to `~/repos/guile` was required because the local checkout already contains the upstream fix commit + +The relevant upstream commit already present in `~/repos/guile` is: + +- `eb828801f621d3e130b6fe88cfc4acaa69b98a03` +- subject: `Don't use posix_spawn_file_actions_addclosefrom_np with glib posix_spawn` + +The tested local checkout revision was: + +- `bbf2baa10f6cc8dfdd9e4ea14b503d748287a03d` + +## Additional build tooling installed on FreeBSD + +To build Guile from the local checkout, these packages were installed: + +```sh +sudo pkg install -y autoconf automake libtool gettext-tools texinfo help2man gperf pkgconf +``` + +An extra FreeBSD-specific detail also mattered: + +- Guile's `autogen.sh` expects GNU `m4` +- FreeBSD base `/usr/bin/m4` is not GNU `m4` +- the installed GNU version is available as `gm4` +- `autogen.sh` therefore had to be run with `M4=gm4` + +## Build procedure used + +A disposable build copy was created from the local repo to avoid polluting `~/repos/guile`: + +```sh +git clone --shared ~/repos/guile /tmp/guile-freebsd-validate-src +``` + +Autotools bootstrap: + +```sh +cd /tmp/guile-freebsd-validate-src +M4=gm4 ./autogen.sh +``` + +Configure: + +```sh +cd /tmp/guile-freebsd-validate-build +env \ + M4=gm4 \ + MAKE=gmake \ + PKG_CONFIG=pkg-config \ + PKG_CONFIG_PATH=/usr/local/libdata/pkgconfig:/usr/local/lib/pkgconfig \ + CPPFLAGS='-I/usr/local/include' \ + LDFLAGS='-L/usr/local/lib -Wl,-rpath,/usr/local/lib' \ + /tmp/guile-freebsd-validate-src/configure \ + --prefix=/tmp/guile-freebsd-validate-install \ + --with-bdw-gc=bdw-gc-threaded \ + --with-libgmp-prefix=/usr/local \ + --with-libunistring-prefix=/usr/local \ + --with-libiconv-prefix=/usr/local \ + --with-libintl-prefix=/usr/local +``` + +Build and install: + +```sh +cd /tmp/guile-freebsd-validate-build +gmake -j4 +gmake install +``` + +## Important validation detail: the executable must load the matching local libguile + +Immediately after installation, running `/tmp/guile-freebsd-validate-install/bin/guile` without adjusting the dynamic linker path still loaded the system `libguile-3.0.so.1` from `/usr/local/lib`, so it still crashed. + +This was confirmed with: + +```sh +ldd /tmp/guile-freebsd-validate-install/bin/guile +``` + +To test the local build correctly, the process must use: + +```sh +LD_LIBRARY_PATH=/tmp/guile-freebsd-validate-install/lib +``` + +The project test harnesses were updated accordingly so that when `GUILE_BIN` points to a non-system installation, the sibling `../lib` directory is automatically prepended to `LD_LIBRARY_PATH`. + +## Validation results + +### 1. Packaged Guile still reproduces the problem + +```sh +./tests/guile/run-subprocess-diagnostics.sh +``` + +Observed result: + +- `system*` exits `139` +- `spawn` exits `139` +- `open-pipe*` exits `139` + +### 2. Local Guile succeeds when using the matching local libguile + +```sh +EXPECT_GUILE_SUBPROCESS_CRASH=0 \ +GUILE_BIN=/tmp/guile-freebsd-validate-install/bin/guile \ +./tests/guile/run-subprocess-diagnostics.sh +``` + +Observed result: + +```text +system-star exit=0 +spawn exit=0 +open-pipe-star exit=0 +Guile subprocess helpers succeeded as expected +``` + +### 3. The broader Phase 1.1 verification suite also passes with the local build + +```sh +GUILE_BIN=/tmp/guile-freebsd-validate-install/bin/guile \ +./tests/guile/run-phase1-verification.sh +``` + +Observed result: + +- module loading: pass +- deterministic output: pass +- file I/O: pass +- process fork/wait: pass +- loopback sockets: pass +- FFI: pass +- `(guix build make-bootstrap)` import and `copy-linux-headers`: pass + +## boehm-gc note + +The user noted a package warning that Boehm GC is unmaintained on FreeBSD. For this specific validation step: + +- the local `~/repos/bdwgc` checkout was not needed +- the packaged `boehm-gc-threaded` installation was sufficient to build and run the fixed local Guile +- no local bdwgc source patch was necessary yet + +That said, the warning is worth keeping in mind as a likely future brittleness point if later Guile or Guix behavior exposes GC- or thread-related FreeBSD issues. + +## Conclusion + +The local build validates the earlier root-cause analysis in practice: + +- the packaged Guile is broken on FreeBSD for subprocess helpers +- a newer local Guile checkout that already contains upstream commit `eb828801f...` no longer crashes +- the next Guix-on-FreeBSD work can proceed using either: + 1. a locally built fixed Guile, or + 2. a backported packaging patch carrying the same upstream fix + +This clears the way to proceed into Phase 1.2 while keeping a documented workaround available on the current host. diff --git a/tests/guile/run-phase1-verification.sh b/tests/guile/run-phase1-verification.sh index 2ff1e7f..786c3fa 100755 --- a/tests/guile/run-phase1-verification.sh +++ b/tests/guile/run-phase1-verification.sh @@ -10,15 +10,32 @@ if [ ! -d "$guix_source_dir/guix" ]; then exit 1 fi -if command -v guile3 >/dev/null 2>&1; then +if [ -n "${GUILE_BIN:-}" ]; then + guile_bin=$GUILE_BIN +elif command -v guile3 >/dev/null 2>&1; then guile_bin=$(command -v guile3) elif command -v guile-3.0 >/dev/null 2>&1; then guile_bin=$(command -v guile-3.0) else - echo "Unable to find guile3 or guile-3.0 in PATH" >&2 + echo "Unable to find GUILE_BIN, guile3, or guile-3.0 in PATH" >&2 exit 1 fi +if [ ! -x "$guile_bin" ]; then + echo "Guile binary is not executable: $guile_bin" >&2 + exit 1 +fi + +guile_prefix=$(CDPATH= cd -- "$(dirname "$guile_bin")/.." && pwd) +guile_lib_dir=$guile_prefix/lib +if [ -e "$guile_lib_dir/libguile-3.0.so.1" ]; then + if [ -n "${LD_LIBRARY_PATH:-}" ]; then + export LD_LIBRARY_PATH="$guile_lib_dir:$LD_LIBRARY_PATH" + else + export LD_LIBRARY_PATH="$guile_lib_dir" + fi +fi + export GUIX_SOURCE_DIR="$guix_source_dir" export GUILE_AUTO_COMPILE=${GUILE_AUTO_COMPILE:-0} if [ -n "${GUILE_LOAD_PATH:-}" ]; then @@ -27,4 +44,6 @@ else export GUILE_LOAD_PATH="$repo_root/tests/guile/modules:$guix_source_dir" fi +echo "Using Guile: $guile_bin" >&2 +echo "Using LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-}" >&2 exec "$guile_bin" -s "$repo_root/tests/guile/verify-phase1.scm" "$@" diff --git a/tests/guile/run-subprocess-diagnostics.sh b/tests/guile/run-subprocess-diagnostics.sh index e61e166..60b2c3b 100755 --- a/tests/guile/run-subprocess-diagnostics.sh +++ b/tests/guile/run-subprocess-diagnostics.sh @@ -6,23 +6,44 @@ repo_root=$(CDPATH= cd -- "$script_dir/../.." && pwd) workdir=$(mktemp -d /tmp/fruix-guile-subprocess.XXXXXX) trap 'rm -rf "$workdir"' EXIT INT TERM -if command -v guile3 >/dev/null 2>&1; then +if [ -n "${GUILE_BIN:-}" ]; then + guile_bin=$GUILE_BIN +elif command -v guile3 >/dev/null 2>&1; then guile_bin=$(command -v guile3) elif command -v guile-3.0 >/dev/null 2>&1; then guile_bin=$(command -v guile-3.0) else - echo "Unable to find guile3 or guile-3.0 in PATH" >&2 + echo "Unable to find GUILE_BIN, guile3, or guile-3.0 in PATH" >&2 exit 1 fi +if [ ! -x "$guile_bin" ]; then + echo "Guile binary is not executable: $guile_bin" >&2 + exit 1 +fi + +guile_prefix=$(CDPATH= cd -- "$(dirname "$guile_bin")/.." && pwd) +guile_lib_dir=$guile_prefix/lib +if [ -e "$guile_lib_dir/libguile-3.0.so.1" ]; then + if [ -n "${LD_LIBRARY_PATH:-}" ]; then + export LD_LIBRARY_PATH="$guile_lib_dir:$LD_LIBRARY_PATH" + else + export LD_LIBRARY_PATH="$guile_lib_dir" + fi +fi + ulimit -c 0 || true cc -Wall -Wextra -O2 "$repo_root/tests/guile/posix-spawn-freebsd-diagnostics.c" \ -o "$workdir/posix-spawn-freebsd-diagnostics" +printf 'Using Guile: %s\n' "$guile_bin" +printf 'Using LD_LIBRARY_PATH: %s\n' "${LD_LIBRARY_PATH:-}" printf '== Native posix_spawn diagnostics ==\n' "$workdir/posix-spawn-freebsd-diagnostics" +expect_crash=${EXPECT_GUILE_SUBPROCESS_CRASH:-1} + run_guile_case() { name=$1 code=$2 @@ -31,12 +52,24 @@ run_guile_case() { rc=$? set -e printf '%s exit=%s\n' "$name" "$rc" - [ "$rc" -eq 139 ] + if [ "$expect_crash" -eq 1 ]; then + [ "$rc" -eq 139 ] + else + [ "$rc" -eq 0 ] + fi } -printf '== Guile subprocess crash repro ==\n' +if [ "$expect_crash" -eq 1 ]; then + printf '== Guile subprocess crash repro ==\n' +else + printf '== Guile subprocess validation (expect success) ==\n' +fi run_guile_case system-star '(system* "/usr/bin/true")' run_guile_case spawn '(spawn "/usr/bin/true" (list "/usr/bin/true"))' run_guile_case open-pipe-star '(use-modules (ice-9 popen)) (open-pipe* OPEN_READ "/usr/bin/true")' -printf 'known FreeBSD Guile subprocess crash profile reproduced\n' +if [ "$expect_crash" -eq 1 ]; then + printf 'known FreeBSD Guile subprocess crash profile reproduced\n' +else + printf 'Guile subprocess helpers succeeded as expected\n' +fi