From b0312e396df8fc043e2c3d0b022957462574c5c8 Mon Sep 17 00:00:00 2001 From: Steffen Beyer Date: Mon, 13 Apr 2026 19:20:45 +0200 Subject: [PATCH] Add self-hosted node lifecycle validation harness --- docs/plan_2.md | 8 +- tests/run-self-hosted-node-lifecycle.sh | 249 ++++++++++++++++++++++++ 2 files changed, 255 insertions(+), 2 deletions(-) create mode 100755 tests/run-self-hosted-node-lifecycle.sh diff --git a/docs/plan_2.md b/docs/plan_2.md index 7906c89..efb0b9b 100644 --- a/docs/plan_2.md +++ b/docs/plan_2.md @@ -185,11 +185,15 @@ Completed: - enriched `fruix system status` with declaration and generation identity fields - added a focused render-level lifecycle test covering those outputs +- added a manual remote integration harness: + - `tests/run-self-hosted-node-lifecycle.sh` + for exercising `status` / `reconfigure` / `rollback` against a real booted + self-hosted node over SSH Remaining: -- validate `status` / `reconfigure` / `rollback` on a real booted self-hosted - node +- execute the real-node validation harness against a booted self-hosted node + and record the outcome - decide whether generation-local `install.scm` should keep its current deployment-oriented schema or move closer to the initial install-generation shape diff --git a/tests/run-self-hosted-node-lifecycle.sh b/tests/run-self-hosted-node-lifecycle.sh new file mode 100755 index 0000000..81a18b8 --- /dev/null +++ b/tests/run-self-hosted-node-lifecycle.sh @@ -0,0 +1,249 @@ +#!/bin/sh +set -eu + +usage() { + cat <<'EOF' +Usage: + run-self-hosted-node-lifecycle.sh HOST [options] [-- RECONFIGURE_ARGS...] + +Options: + --user USER SSH user (default: root) + --port PORT SSH port (default: 22) + --identity-file PATH SSH identity file + --require-rollback Fail if rollback cannot be exercised + --help Show this help + +This is a manual remote integration harness for a booted self-hosted Fruix +node. It validates: + +- `fruix system status` +- `fruix system reconfigure` +- `fruix system rollback` (when a rollback target exists) + +If no extra arguments are provided after `--`, the harness runs +`fruix system reconfigure` with the node's default declaration. + +Examples: + tests/run-self-hosted-node-lifecycle.sh 192.0.2.10 + tests/run-self-hosted-node-lifecycle.sh 192.0.2.10 --require-rollback + tests/run-self-hosted-node-lifecycle.sh host.example -- --system self-hosted-development-operating-system +EOF +} + +host="" +user="root" +port="22" +identity_file="" +require_rollback=0 + +while [ $# -gt 0 ]; do + case "$1" in + --user) + [ $# -ge 2 ] || { echo "missing value after --user" >&2; exit 1; } + user=$2 + shift 2 + ;; + --port) + [ $# -ge 2 ] || { echo "missing value after --port" >&2; exit 1; } + port=$2 + shift 2 + ;; + --identity-file) + [ $# -ge 2 ] || { echo "missing value after --identity-file" >&2; exit 1; } + identity_file=$2 + shift 2 + ;; + --require-rollback) + require_rollback=1 + shift + ;; + --help|-h) + usage + exit 0 + ;; + --) + shift + break + ;; + -*) + echo "unknown option: $1" >&2 + usage >&2 + exit 1 + ;; + *) + if [ -z "$host" ]; then + host=$1 + shift + else + break + fi + ;; + esac +done + +[ -n "$host" ] || { + usage >&2 + exit 1 +} + +shell_quote() { + printf "'%s'" "$(printf '%s' "$1" | sed "s/'/'\"'\"'/g")" +} + +ssh_invoke() { + remote_command=$1 + if [ -n "$identity_file" ]; then + ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -p "$port" -i "$identity_file" "$user@$host" "$remote_command" + else + ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -p "$port" "$user@$host" "$remote_command" + fi +} + +remote_program_command() { + program=$1 + shift + printf 'set -eu; %s' "$(shell_quote "$program")" + while [ $# -gt 0 ]; do + printf ' %s' "$(shell_quote "$1")" + shift + done + printf '\n' +} + +metadata_value() { + file=$1 + key=$2 + sed -n "s/^${key}=//p" "$file" | tail -n 1 +} + +assert_non_empty() { + name=$1 + value=$2 + if [ -z "$value" ]; then + echo "validation failed: missing $name" >&2 + exit 1 + fi +} + +assert_remote_file_exists() { + path=$1 + if [ -n "$path" ]; then + ssh_invoke "set -eu; test -f $(shell_quote "$path")" + fi +} + +show_status_summary() { + label=$1 + file=$2 + echo "$label" + echo " default_system_name=$(metadata_value "$file" default_system_name)" + echo " current_generation=$(metadata_value "$file" current_generation)" + echo " current_closure=$(metadata_value "$file" current_closure)" + echo " rollback_generation=$(metadata_value "$file" rollback_generation)" + echo " rollback_closure=$(metadata_value "$file" rollback_closure)" +} + +tmpdir=$(mktemp -d /tmp/fruix-self-hosted-node-lifecycle.XXXXXX) +cleanup() { + rm -rf "$tmpdir" +} +trap cleanup EXIT + +status_before=$tmpdir/status-before.out +reconfigure_output=$tmpdir/reconfigure.out +status_after=$tmpdir/status-after.out +rollback_output=$tmpdir/rollback.out +status_final=$tmpdir/status-final.out + +echo "[1/5] preflight: checking remote Fruix CLI on $host" +ssh_invoke "set -eu; test -x /usr/local/bin/fruix" + +echo "[2/5] capturing initial status" +ssh_invoke "$(remote_program_command /usr/local/bin/fruix system status)" > "$status_before" + +before_current_generation=$(metadata_value "$status_before" current_generation) +before_current_closure=$(metadata_value "$status_before" current_closure) +before_default_declaration=$(metadata_value "$status_before" default_declaration_file) +before_default_system_name=$(metadata_value "$status_before" default_system_name) +before_current_generation_metadata=$(metadata_value "$status_before" current_generation_metadata) +before_current_declaration_file=$(metadata_value "$status_before" current_declaration_file) + +assert_non_empty current_generation "$before_current_generation" +assert_non_empty current_closure "$before_current_closure" +assert_non_empty default_declaration_file "$before_default_declaration" +assert_non_empty default_system_name "$before_default_system_name" +assert_non_empty current_generation_metadata "$before_current_generation_metadata" +assert_non_empty current_declaration_file "$before_current_declaration_file" +assert_remote_file_exists "$before_current_generation_metadata" +assert_remote_file_exists "$before_current_declaration_file" +show_status_summary "Initial status:" "$status_before" + +echo "[3/5] running node-local reconfigure" +ssh_invoke "$(remote_program_command /usr/local/bin/fruix system reconfigure "$@")" > "$reconfigure_output" +reconfigure_closure=$(metadata_value "$reconfigure_output" reconfigure_closure) +reboot_required=$(metadata_value "$reconfigure_output" reboot_required) +assert_non_empty reconfigure_closure "$reconfigure_closure" +[ "$reboot_required" = "true" ] || { + echo "validation failed: expected reboot_required=true, got '$reboot_required'" >&2 + exit 1 +} + +echo "[4/5] capturing status after reconfigure" +ssh_invoke "$(remote_program_command /usr/local/bin/fruix system status)" > "$status_after" +after_current_generation=$(metadata_value "$status_after" current_generation) +after_current_closure=$(metadata_value "$status_after" current_closure) +after_current_generation_metadata=$(metadata_value "$status_after" current_generation_metadata) +after_current_declaration_file=$(metadata_value "$status_after" current_declaration_file) +after_rollback_closure=$(metadata_value "$status_after" rollback_closure) +after_rollback_generation_metadata=$(metadata_value "$status_after" rollback_generation_metadata) +assert_non_empty current_generation_after "$after_current_generation" +assert_non_empty current_closure_after "$after_current_closure" +assert_non_empty current_generation_metadata_after "$after_current_generation_metadata" +assert_non_empty current_declaration_file_after "$after_current_declaration_file" +assert_remote_file_exists "$after_current_generation_metadata" +assert_remote_file_exists "$after_current_declaration_file" +[ "$after_current_closure" = "$reconfigure_closure" ] || { + echo "validation failed: current_closure after reconfigure does not match reconfigure_closure" >&2 + echo " expected: $reconfigure_closure" >&2 + echo " actual: $after_current_closure" >&2 + exit 1 +} +show_status_summary "Status after reconfigure:" "$status_after" + +if [ -n "$after_rollback_closure" ]; then + echo "[5/5] running rollback" + if [ -n "$after_rollback_generation_metadata" ]; then + assert_remote_file_exists "$after_rollback_generation_metadata" + fi + ssh_invoke "$(remote_program_command /usr/local/bin/fruix system rollback)" > "$rollback_output" + ssh_invoke "$(remote_program_command /usr/local/bin/fruix system status)" > "$status_final" + final_current_closure=$(metadata_value "$status_final" current_closure) + final_current_generation=$(metadata_value "$status_final" current_generation) + final_current_generation_metadata=$(metadata_value "$status_final" current_generation_metadata) + final_current_declaration_file=$(metadata_value "$status_final" current_declaration_file) + assert_non_empty final_current_generation "$final_current_generation" + assert_non_empty final_current_closure "$final_current_closure" + assert_non_empty final_current_generation_metadata "$final_current_generation_metadata" + assert_non_empty final_current_declaration_file "$final_current_declaration_file" + assert_remote_file_exists "$final_current_generation_metadata" + assert_remote_file_exists "$final_current_declaration_file" + [ "$final_current_closure" = "$after_rollback_closure" ] || { + echo "validation failed: rollback did not switch to the recorded rollback closure" >&2 + echo " expected: $after_rollback_closure" >&2 + echo " actual: $final_current_closure" >&2 + exit 1 + } + show_status_summary "Final status after rollback:" "$status_final" +else + if [ "$require_rollback" -eq 1 ]; then + echo "validation failed: no rollback closure was available after reconfigure" >&2 + exit 1 + fi + echo "[5/5] rollback skipped: no rollback closure was available after reconfigure" + echo " To force a full rollback exercise, run with reconfigure arguments that produce a different closure." +fi + +echo +echo "self-hosted node lifecycle validation completed" +echo "artifacts: $tmpdir" +trap - EXIT