Files
legion_kk/tests/unit/node-deployment-runtime.test.ts
self 466d4491f4 fix: preserve concurrent cluster membership
Reconcile join membership as a monotonic union when preserving existing members so concurrent joiners do not fail on stale desired snapshots. Recompute persisted membership after reconciliation to include peers that became ready while the service was running.
2026-06-18 18:55:51 +02:00

1682 lines
49 KiB
TypeScript

import assert from "node:assert/strict"
import { writeFile } from "node:fs/promises"
import test from "node:test"
import {
APP_STATE_VERSION,
type ClusterBootstrapState,
type DeploymentActivityEvent,
type ManagedNodeBootstrapState,
type NodeExecutionInstallInput,
type NodeReinstallRequest,
type NodeExecutionState,
type ProductCatalog,
type ProviderConfig,
type StoredState,
type VpsRecord
} from "../../src/shared/app"
import type { NodeDeploymentInstallSession } from "../../src/main/deployment/service"
import {
NodeDeploymentRuntime,
type NodeDeploymentPhaseRuntime
} from "../../src/main/deployment/runtime"
import { deriveNodePublicKey } from "../../src/main/bootstrap-crypto"
import { getProviderDeploymentProfile } from "../../src/main/cloud/providers/registry"
function createTestClusterBootstrap(): ClusterBootstrapState {
return {
rootSecret: "root-secret",
systemAdmin: {
username: "legion-system",
nostrPublicKey: "e".repeat(64),
nostrPrivateKey: "1".repeat(64)
},
humanAdmin: {
username: "root",
passwordHash: "password-hash",
nostrPublicKey: "f".repeat(64),
encryptedNostrPrivateKey: "ciphertext",
nostrPrivateKeyNonce: "nonce",
nostrPrivateKeySalt: "salt"
}
}
}
class InMemoryStateStore {
constructor(
private state: StoredState,
private readonly unlocked = true
) {}
hasUnlockedState(): boolean {
return this.unlocked
}
getStateOrThrow(): StoredState {
return this.state
}
async updateServer(
serverId: string,
updater: (server: VpsRecord) => VpsRecord
): Promise<VpsRecord> {
const index = this.state.trackedServers.findIndex((server) => server.id === serverId)
if (index < 0) {
throw new Error("Server not found.")
}
const next = updater(structuredClone(this.state.trackedServers[index]))
this.state.trackedServers[index] = next
return structuredClone(next)
}
async persistNewState(state: StoredState): Promise<void> {
this.state = structuredClone(state)
}
}
class TestNodeDeploymentRuntime extends NodeDeploymentRuntime {
readonly phases: string[] = []
protected override createPhaseRuntime(): NodeDeploymentPhaseRuntime {
return {
shouldRunNbdeReconcile: () => true,
executeProvisioningProvider: async () => undefined,
executeKexec: async (context) => {
context.metrics?.mark("kexecStarted")
this.phases.push("kexec")
return {} as NodeDeploymentInstallSession
},
executeInstallOs: async (_context, session) => {
this.phases.push("installing-os")
return session
},
executeRebootInstalledSystem: async (_context, session) => {
this.phases.push("rebooting-installed-system")
return session
},
executeActivateCertificate: async (_context, session) => {
this.phases.push("activating-certificate")
return session
},
executePostInstallTribes: async (_context, session) => {
this.phases.push("post-install-tribes")
return session
},
executeReconcileCluster: async (_context, session) => {
this.phases.push("reconciling-cluster")
return session
},
executeClearPublicationBarrier: async (_context, session) => {
this.phases.push("clearing-publication-barrier")
return session
},
executeWaitReady: async () => {
this.phases.push("waiting-ready")
return {
nodePublicKey: "a".repeat(64),
nbde: {
luksUuid: "test-luks-uuid"
}
}
},
executeNbdeReconcile: async () => {
this.phases.push("nbde-reconciling")
}
}
}
}
class PhaseRuntimeTestNodeDeploymentRuntime extends NodeDeploymentRuntime {
async executeReconcileClusterForTest(
context: Parameters<NodeDeploymentPhaseRuntime["executeReconcileCluster"]>[0],
session: Parameters<NodeDeploymentPhaseRuntime["executeReconcileCluster"]>[1]
): ReturnType<NodeDeploymentPhaseRuntime["executeReconcileCluster"]> {
return this.createPhaseRuntime().executeReconcileCluster(context, session)
}
}
function createNoopPhaseRuntime(): NodeDeploymentPhaseRuntime {
return {
shouldRunNbdeReconcile: () => false,
executeProvisioningProvider: async () => undefined,
executeKexec: async () => ({}) as NodeDeploymentInstallSession,
executeInstallOs: async (_context, installSession) => installSession,
executeRebootInstalledSystem: async (_context, installSession) => installSession,
executeActivateCertificate: async (_context, installSession) => installSession,
executePostInstallTribes: async (_context, installSession) => installSession,
executeReconcileCluster: async (_context, installSession) => installSession,
executeClearPublicationBarrier: async (_context, installSession) => installSession,
executeWaitReady: async () => ({ nodePublicKey: "a".repeat(64), nbde: {} }),
executeNbdeReconcile: async () => undefined
}
}
type RuntimeDecommissionInternals = {
createDeploymentService: () => {
syncClusterMembership(request: {
desiredClusterMembership: StoredState["clusterMembership"]
clusterTargets?: ManagedNodeBootstrapState[]
preserveExistingMembers?: boolean
}): Promise<void>
}
}
type RuntimeClusterInternals = {
resolveClusterTargets(
currentServerId: string,
currentManagedBootstrap: ManagedNodeBootstrapState
): Array<{ server: VpsRecord; managedBootstrap: ManagedNodeBootstrapState }>
buildDesiredClusterMembership(
overrides?: Map<string, ManagedNodeBootstrapState>
): StoredState["clusterMembership"]
}
type RuntimeReconcileInternals = {
createDeploymentService: () => {
reconcileClusterMembership(session: NodeDeploymentInstallSession): Promise<void>
}
}
type RuntimeNbdeInternals = {
createDeploymentService: () => {
reconcileNbdeNode(request: {
server: VpsRecord
nbde: {
mode?: string
peerTangNodeIds?: string[]
peerTangUrls: string[]
removeLocalBootKey?: boolean
}
}): Promise<void>
}
}
test("initialize marks running node executions as interrupted", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
execution: createExecutionState({
operation: "reinstall",
status: "running",
phase: "installing-os",
input: {
kind: "reinstall",
install: createInstallInput()
}
})
})
])
)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
await runtime.initialize()
const server = store.getStateOrThrow().trackedServers[0]
assert.equal(server.execution?.status, "interrupted")
assert.equal(server.execution?.retryable, true)
assert.equal(server.deployment.status, "failed")
})
test("node deployment runtime emits durable/live deployment events", async () => {
const store = new InMemoryStateStore(createState([createServer()]))
const published: DeploymentActivityEvent[] = []
const recorded: DeploymentActivityEvent[] = []
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never,
publishDeploymentEvent: (event) => published.push(event),
recordDeploymentEvent: async (event) => {
recorded.push(event)
}
})
await runtime.reinstallNode("node-a", {
kexecImage: "/tmp/guix-kexec-installer.tar.gz",
confirmDataLoss: true
})
assert.deepEqual(
recorded.map((event) => [
event.source,
event.nodeId,
event.operation,
event.phase,
event.state
]),
[
["runtime", "node-a", "reinstall", undefined, "started"],
["runtime", "node-a", "reinstall", "kexec", "started"],
["runtime", "node-a", "reinstall", "succeeded", "succeeded"]
]
)
assert.deepEqual(published, recorded)
})
test("initialize is a no-op while the state store is locked", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
execution: createExecutionState({
operation: "reinstall",
status: "running",
phase: "installing-os",
input: {
kind: "reinstall",
install: createInstallInput()
}
})
})
]),
false
)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
await runtime.initialize()
const server = store.getStateOrThrow().trackedServers[0]
assert.equal(server.execution?.status, "running")
})
test("provisionVps persists bootstrap state", async () => {
const store = new InMemoryStateStore(createState([createServer()]))
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
await runtime.provisionVps({ id: "node-a" } as never, {
provider: "hetzner",
providerId: "hetzner"
})
const server = store.getStateOrThrow().trackedServers[0]
assert.equal(server.managedBootstrap?.publicIpv4, "203.0.113.10")
assert.equal(server.execution?.phase, "provisioned")
})
test("provisionVps uses product boot mode unless request overrides it", async () => {
const store = new InMemoryStateStore(createState([createServer({ bootMode: "efi" })]))
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
await runtime.provisionVps({ id: "node-a" } as never, {
provider: "hetzner",
providerId: "hetzner"
})
const catalogInput = store.getStateOrThrow().trackedServers[0].execution?.input
assert.equal(catalogInput?.kind, "add")
assert.equal(catalogInput.kind === "add" ? catalogInput.install.bootMode : undefined, "efi")
await runtime.provisionVps({ id: "node-a" } as never, {
provider: "hetzner",
providerId: "hetzner",
bootMode: "bios"
})
const explicitInput = store.getStateOrThrow().trackedServers[0].execution?.input
assert.equal(explicitInput?.kind, "add")
assert.equal(explicitInput.kind === "add" ? explicitInput.install.bootMode : undefined, "bios")
})
test("manual install requests carry the user password for sudo escalation", async () => {
const state = createState([
createServer({
providerId: "manual",
providerKind: "manual",
sshUsername: "debian",
managedBootstrap: createManagedBootstrap("1", "node-a", "203.0.113.10")
})
])
state.scheme.servers = [
{
id: "node-a",
providerKind: "manual",
manualConfig: {
publicIp: "203.0.113.10",
username: "debian",
password: "user-secret"
}
} as never
]
const store = new InMemoryStateStore(state)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
}) as unknown as {
buildInstallRequest(plan: unknown): Promise<{ userPassword?: string }>
}
const request = await runtime.buildInstallRequest({
kind: "add",
serverId: "node-a",
request: {},
install: createInstallInput()
})
assert.equal(request.userPassword, "user-secret")
})
test("provisionVps provisions when no tracked server exists", async () => {
const store = new InMemoryStateStore(createState([]))
let provisionCalls = 0
const runtime = new NodeDeploymentRuntime({
stateStore: store as never,
provisionManagedServer: async () => {
provisionCalls += 1
const server = createServer()
await store.persistNewState({
...store.getStateOrThrow(),
trackedServers: [server]
})
return server
}
})
await runtime.provisionVps({ id: "node-a" } as never, {
provider: "hetzner",
providerId: "hetzner"
})
assert.equal(provisionCalls, 1)
assert.equal(store.getStateOrThrow().trackedServers.length, 1)
})
test("reinstall and retry drive the shared node deployment actor", async () => {
const store = new InMemoryStateStore(createState([createServer()]))
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
const reinstallRequest: NodeReinstallRequest = {
kexecImage: "/tmp/guix-kexec-installer.tar.gz",
confirmDataLoss: true
}
await runtime.reinstallNode("node-a", reinstallRequest)
assert.deepEqual(runtime.phases, [
"kexec",
"installing-os",
"rebooting-installed-system",
"activating-certificate",
"post-install-tribes",
"waiting-ready",
"reconciling-cluster",
"clearing-publication-barrier",
"nbde-reconciling"
])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.status, "succeeded")
await store.updateServer("node-a", (server) => ({
...server,
execution: createExecutionState({
operation: "reinstall",
status: "failed",
phase: "failed",
input: {
kind: "reinstall",
install: createInstallInput()
},
lastError: "transient failure"
})
}))
runtime.phases.length = 0
await runtime.retryNode("node-a")
assert.deepEqual(runtime.phases, [
"kexec",
"installing-os",
"rebooting-installed-system",
"activating-certificate",
"post-install-tribes",
"waiting-ready",
"reconciling-cluster",
"clearing-publication-barrier",
"nbde-reconciling"
])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.status, "succeeded")
})
test("reinstall reuses the tracked provider image when it is still in the current catalog", async () => {
const store = new InMemoryStateStore(
createState(
[
createServer({
providerImageId: "ubuntu-24.04",
image: "ubuntu-24.04"
})
],
[createProvider(true)]
)
)
const reinstalledImages: string[] = []
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never,
getProductCatalog: async () =>
createProductCatalog([
createCatalogProduct("CAX11", "ubuntu-24.04"),
createCatalogProduct("CAX21", "ubuntu-26.04")
]),
reinstallTrackedServerFromImage: async (serverId, imageId) => {
reinstalledImages.push(`${serverId}:${imageId}`)
return store.getStateOrThrow().trackedServers[0]
}
})
await runtime.reinstallNode("node-a", { confirmDataLoss: true })
assert.deepEqual(reinstalledImages, ["node-a:ubuntu-24.04"])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.operation, "reinstall")
})
test("reinstall selects the latest matching catalog image when the tracked provider image is stale", async () => {
const store = new InMemoryStateStore(
createState(
[
createServer({
planName: "CAX11",
providerImageId: "ubuntu-22.04",
image: "ubuntu-22.04"
})
],
[createProvider(true)]
)
)
const reinstalledImages: string[] = []
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never,
getProductCatalog: async () =>
createProductCatalog([
createCatalogProduct("CAX11", "ubuntu-26.04"),
createCatalogProduct("CAX21", "ubuntu-26.04-large")
]),
reinstallTrackedServerFromImage: async (serverId, imageId) => {
reinstalledImages.push(`${serverId}:${imageId}`)
return store.getStateOrThrow().trackedServers[0]
}
})
await runtime.reinstallNode("node-a", { confirmDataLoss: true })
assert.deepEqual(reinstalledImages, ["node-a:ubuntu-26.04"])
})
test("reinstall skips provider image reset when the provider does not support it", async () => {
const store = new InMemoryStateStore(createState([createServer()], [createProvider(false)]))
const reinstalledImages: string[] = []
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never,
getProductCatalog: async () =>
createProductCatalog([createCatalogProduct("CAX11", "ubuntu-26.04")]),
reinstallTrackedServerFromImage: async (serverId, imageId) => {
reinstalledImages.push(`${serverId}:${imageId}`)
return store.getStateOrThrow().trackedServers[0]
}
})
await runtime.reinstallNode("node-a", { confirmDataLoss: true })
assert.deepEqual(reinstalledImages, [])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.operation, "reinstall")
})
test("reinstall requires confirmation when it would reinitialize the only managed cluster node", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
managedBootstrap: createManagedBootstrap("1", "node-a", "203.0.113.10")
})
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await assert.rejects(runtime.reinstallNode("node-a"), /Confirm data loss to continue/)
})
test("reinstall requires confirmation for an unhealthy managed node when no healthy peer exists", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
status: "error",
deployment: {
status: "failed",
snippets: []
},
managedBootstrap: createManagedBootstrap("1", "node-a", "203.0.113.10")
})
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await assert.rejects(runtime.reinstallNode("node-a"), /Confirm data loss to continue/)
})
test("reinstall does not require data-loss confirmation when another healthy node exists", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: createManagedBootstrap("1", "node-a", "203.0.113.10")
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: createManagedBootstrap("2", "node-b", "203.0.113.11")
})
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await runtime.reinstallNode("node-a")
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.operation, "reinstall")
})
test("node deployment actor exposes provisioning metrics on machine context", async () => {
await writeFile("/tmp/guix-kexec-installer.tar.gz", "test", "utf8")
const store = new InMemoryStateStore(createState([createServer()]))
const marks: string[] = []
const finishes: string[] = []
const operations: string[] = []
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never,
provisioningMetrics: {
startRun(input) {
operations.push(input.operation)
return {
mark(milestone) {
marks.push(milestone)
},
mergeMetadata() {
return undefined
},
async finish(input) {
finishes.push(input.result)
}
}
}
}
})
await runtime.reinstallNode("node-a", {
kexecImage: "/tmp/guix-kexec-installer.tar.gz",
confirmDataLoss: true
})
assert.deepEqual(operations, ["reinstall"])
assert.ok(marks.includes("kexecStarted"))
assert.deepEqual(finishes, ["succeeded"])
})
test("retry resumes add from a later persisted phase without going back to kexec", async () => {
await writeFile("/tmp/guix-kexec-installer.tar.gz", "test", "utf8")
const store = new InMemoryStateStore(
createState([
createServer({
sshPublicKey: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITest legion@test",
managedBootstrap: {
nodePrivateKey: "1".repeat(64),
nodeName: "node-a",
publicIpv4: "203.0.113.10",
publicPort: 443,
publicScheme: "https"
},
execution: createExecutionState({
operation: "add",
status: "failed",
phase: "reconciling-cluster",
input: {
kind: "add",
install: createInstallInput()
},
lastError: "upstream unavailable"
})
})
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await runtime.retryNode("node-a")
assert.deepEqual(runtime.phases, [
"reconciling-cluster",
"clearing-publication-barrier",
"nbde-reconciling"
])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.status, "succeeded")
})
test("bulk NBDE reconcile updates execution state on every target node", async () => {
const store = new InMemoryStateStore(
createState([
createServer({ id: "node-a" }),
createServer({ id: "node-b", publicIp: "203.0.113.11" })
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await runtime.reconcileNbde(["node-a", "node-b"])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.operation, "nbde-reconcile")
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.status, "succeeded")
assert.equal(store.getStateOrThrow().trackedServers[1].execution?.operation, "nbde-reconcile")
assert.equal(store.getStateOrThrow().trackedServers[1].execution?.status, "succeeded")
})
test("bulk NBDE reconcile derives clustered policy from a stable target snapshot", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: createManagedBootstrap("1", "node-a", "203.0.113.10")
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: createManagedBootstrap("2", "node-b", "203.0.113.11")
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: createManagedBootstrap("3", "node-c", "203.0.113.12")
})
])
)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
const requests: Array<{ serverId: string; mode?: string; peerTangNodeIds?: string[] }> = []
const runtimeInternals = runtime as unknown as RuntimeNbdeInternals
runtimeInternals.createDeploymentService = () => ({
reconcileNbdeNode: async (request) => {
requests.push({
serverId: request.server.id,
mode: request.nbde.mode,
peerTangNodeIds: request.nbde.peerTangNodeIds
})
}
})
await runtime.reconcileNbde(["node-a", "node-b", "node-c"])
assert.deepEqual(
requests.map((request) => [request.serverId, request.mode, request.peerTangNodeIds?.sort()]),
[
["node-a", "clustered", ["node-b", "node-c"]],
["node-b", "clustered", ["node-a", "node-c"]],
["node-c", "clustered", ["node-a", "node-b"]]
]
)
assert.deepEqual(
store
.getStateOrThrow()
.trackedServers.map((server) => [
server.id,
server.nbde?.mode,
server.nbde?.peerTangNodeIds.sort(),
server.nbde?.localBootKeyPresent
]),
[
["node-a", "clustered", ["node-b", "node-c"], false],
["node-b", "clustered", ["node-a", "node-c"], false],
["node-c", "clustered", ["node-a", "node-b"], false]
]
)
})
test("clustered NBDE reconcile removes temporary local boot keys", async () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const joinBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const store = new InMemoryStateStore(
createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: joinBootstrap,
nbde: {
mode: "clustered",
tangPort: 7654,
recoverySecret: "test-recovery-secret",
localBootKeyPresent: true,
peerTangNodeIds: ["node-a"],
installedAt: "2026-04-06T12:00:00.000Z"
}
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: createManagedBootstrap("3", "node-c", "203.0.113.12")
})
])
)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
let removeLocalBootKey: boolean | undefined
const runtimeInternals = runtime as unknown as RuntimeNbdeInternals
runtimeInternals.createDeploymentService = () => ({
reconcileNbdeNode: async (request) => {
removeLocalBootKey = request.nbde.removeLocalBootKey
}
})
await runtime.reconcileNbde(["node-b"])
assert.equal(removeLocalBootKey, true)
assert.equal(store.getStateOrThrow().trackedServers[1]?.nbde?.localBootKeyPresent, false)
})
test("reconfigureManagedNodes runs the post-install Tribes phases without kexec", async () => {
const store = new InMemoryStateStore(
createState([
createServer({
sshPublicKey: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITest legion@test",
managedBootstrap: {
nodePrivateKey: "1".repeat(64),
nodeName: "node-a",
publicIpv4: "203.0.113.10",
publicPort: 443,
publicScheme: "https"
}
})
])
)
const runtime = new TestNodeDeploymentRuntime({
stateStore: store as never
})
await runtime.reconfigureManagedNodes(["node-a"])
assert.deepEqual(runtime.phases, [
"post-install-tribes",
"waiting-ready",
"reconciling-cluster",
"clearing-publication-barrier",
"nbde-reconciling"
])
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.operation, "tribes-reconfigure")
assert.equal(store.getStateOrThrow().trackedServers[0].execution?.status, "succeeded")
})
test("decommissionManagedNode syncs survivors and preserves decommissioned membership rows", async () => {
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: {
nodePrivateKey: "1".repeat(64),
nodeName: "node-a",
publicIpv4: "203.0.113.10",
publicPort: 443,
publicScheme: "https",
installedAt: "2026-04-06T12:00:00.000Z"
}
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: {
nodePrivateKey: "2".repeat(64),
nodeName: "node-b",
publicIpv4: "203.0.113.11",
publicPort: 443,
publicScheme: "https",
installedAt: "2026-04-07T12:00:00.000Z"
}
}),
createServer({
id: "node-d",
publicIp: "203.0.113.13",
deployment: createDeploymentState("failed"),
execution: createExecutionState({
operation: "add",
phase: "waiting-ready",
status: "failed"
}),
managedBootstrap: {
nodePrivateKey: "4".repeat(64),
nodeName: "node-d",
publicIpv4: "203.0.113.13",
publicPort: 443,
publicScheme: "https",
installedAt: "2026-04-07T12:05:00.000Z"
}
})
])
state.tribe = {
name: "Demo Tribe",
domain: "example.com",
password: "bootstrap-secret"
}
state.clusterBootstrap = createTestClusterBootstrap()
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey("1".repeat(64)),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
},
{
nodeId: "node-c",
pubkey: "c".repeat(64),
transportAddress: "wss://203.0.113.12:4413/relay",
scope: "all",
status: "decommissioned",
activatedAt: "2026-04-05T12:00:00.000Z",
deactivatedAt: "2026-04-08T12:00:00.000Z"
}
]
const store = new InMemoryStateStore(state)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
let syncedMembership: StoredState["clusterMembership"] | null = null
let syncedClusterTargets: ManagedNodeBootstrapState[] | undefined
let syncedPreserveExistingMembers: boolean | undefined
const runtimeInternals = runtime as unknown as RuntimeDecommissionInternals
runtimeInternals.createDeploymentService = () => ({
syncClusterMembership: async (request: {
desiredClusterMembership: StoredState["clusterMembership"]
clusterTargets?: ManagedNodeBootstrapState[]
preserveExistingMembers?: boolean
}) => {
syncedMembership = structuredClone(request.desiredClusterMembership)
syncedClusterTargets = structuredClone(request.clusterTargets)
syncedPreserveExistingMembers = request.preserveExistingMembers
}
})
await runtime.decommissionManagedNode("node-b")
assert.deepEqual(syncedMembership, [
{
nodeId: "node-b",
pubkey: deriveNodePublicKey("2".repeat(64)),
transportAddress: "wss://203.0.113.11:4413/relay",
scope: "all",
status: "decommissioned",
activatedAt: "2026-04-07T12:00:00.000Z",
deactivatedAt: store
.getStateOrThrow()
.clusterMembership.find((entry) => entry.nodeId === "node-b")?.deactivatedAt
},
{
nodeId: "node-a",
pubkey: deriveNodePublicKey("1".repeat(64)),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z",
deactivatedAt: undefined
},
{
nodeId: "node-c",
pubkey: "c".repeat(64),
transportAddress: "wss://203.0.113.12:4413/relay",
scope: "all",
status: "decommissioned",
activatedAt: "2026-04-05T12:00:00.000Z",
deactivatedAt: "2026-04-08T12:00:00.000Z"
}
])
assert.deepEqual(
syncedClusterTargets?.map((target) => target.publicIpv4),
["203.0.113.10"]
)
assert.equal(syncedPreserveExistingMembers, true)
assert.deepEqual(store.getStateOrThrow().clusterMembership, syncedMembership)
})
test("decommissionManagedNode clears local cluster membership when removing the last clustered node", async () => {
const state = createState([
createServer({
managedBootstrap: {
nodePrivateKey: "1".repeat(64),
nodeName: "node-a",
publicIpv4: "203.0.113.10",
publicPort: 443,
publicScheme: "https"
}
})
])
state.tribe = {
name: "Demo Tribe",
domain: "example.com",
password: "bootstrap-secret"
}
state.clusterBootstrap = createTestClusterBootstrap()
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey("1".repeat(64)),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
}
]
const store = new InMemoryStateStore(state)
const runtime = new NodeDeploymentRuntime({
stateStore: store as never
})
let syncCalls = 0
const runtimeInternals = runtime as unknown as RuntimeDecommissionInternals
runtimeInternals.createDeploymentService = () => ({
syncClusterMembership: async () => {
syncCalls += 1
}
})
await runtime.decommissionManagedNode("node-a")
assert.equal(syncCalls, 0)
assert.deepEqual(store.getStateOrThrow().clusterMembership, [])
})
test("cluster install requests ignore planned peers until they have joined", () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const joinBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const plannedPeerBootstrap = createManagedBootstrap("3", "node-c", "203.0.113.12")
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: joinBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "waiting-ready",
input: {
kind: "add",
install: createInstallInput()
}
})
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: plannedPeerBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "provisioned",
input: {
kind: "add",
install: createInstallInput()
}
})
})
])
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
}
]
const runtime = new NodeDeploymentRuntime({
stateStore: new InMemoryStateStore(state) as never
}) as unknown as RuntimeClusterInternals
const clusterTargets = runtime.resolveClusterTargets("node-b", joinBootstrap)
assert.deepEqual(
clusterTargets.map((entry) => entry.server.id),
["node-a", "node-b"]
)
const desiredMembership = runtime.buildDesiredClusterMembership(
new Map([["node-b", joinBootstrap]])
)
assert.deepEqual(desiredMembership.map((entry) => entry.nodeId).sort(), ["node-a", "node-b"])
})
test("cluster membership includes a planned peer after it becomes the active install target", () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const joinedBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const currentBootstrap = createManagedBootstrap("3", "node-c", "203.0.113.12")
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: joinedBootstrap
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: currentBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "waiting-ready",
input: {
kind: "add",
install: createInstallInput()
}
})
})
])
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
},
{
nodeId: "node-b",
pubkey: deriveNodePublicKey(joinedBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.11:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-07T12:00:00.000Z"
}
]
const runtime = new NodeDeploymentRuntime({
stateStore: new InMemoryStateStore(state) as never
}) as unknown as RuntimeClusterInternals
const clusterTargets = runtime.resolveClusterTargets("node-c", currentBootstrap)
assert.deepEqual(
clusterTargets.map((entry) => entry.server.id),
["node-a", "node-b", "node-c"]
)
const desiredMembership = runtime.buildDesiredClusterMembership(
new Map([["node-c", currentBootstrap]])
)
assert.deepEqual(desiredMembership.map((entry) => entry.nodeId).sort(), [
"node-a",
"node-b",
"node-c"
])
})
test("cluster membership includes a parallel peer once it reaches bootstrap wait", () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const parallelBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const currentBootstrap = createManagedBootstrap("3", "node-c", "203.0.113.12")
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: parallelBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "waiting-ready",
input: {
kind: "add",
install: createInstallInput()
}
})
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: currentBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "reconciling-cluster",
input: {
kind: "add",
install: createInstallInput()
}
})
})
])
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
}
]
const runtime = new NodeDeploymentRuntime({
stateStore: new InMemoryStateStore(state) as never
}) as unknown as RuntimeClusterInternals
const clusterTargets = runtime.resolveClusterTargets("node-c", currentBootstrap)
assert.deepEqual(
clusterTargets.map((entry) => entry.server.id),
["node-a", "node-b", "node-c"]
)
const desiredMembership = runtime.buildDesiredClusterMembership(
new Map([["node-c", currentBootstrap]])
)
assert.deepEqual(desiredMembership.map((entry) => entry.nodeId).sort(), [
"node-a",
"node-b",
"node-c"
])
})
test("reconcile cluster phase recomputes membership after parallel peers finish", async () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const joinedBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const currentBootstrap = createManagedBootstrap("3", "node-c", "203.0.113.12")
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: joinedBootstrap,
execution: createExecutionState({
operation: "add",
status: "succeeded",
phase: "succeeded",
input: {
kind: "add",
install: createInstallInput()
}
})
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: currentBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "waiting-ready",
input: {
kind: "add",
install: createInstallInput()
}
})
})
])
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
},
{
nodeId: "node-b",
pubkey: deriveNodePublicKey(joinedBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.11:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-07T12:00:00.000Z"
}
]
const store = new InMemoryStateStore(state)
const runtime = new PhaseRuntimeTestNodeDeploymentRuntime({
stateStore: store as never
})
const reconcileCapture: { request?: NodeDeploymentInstallSession["request"] } = {}
const runtimeInternals = runtime as unknown as RuntimeReconcileInternals
runtimeInternals.createDeploymentService = () => ({
reconcileClusterMembership: async (session: NodeDeploymentInstallSession) => {
reconcileCapture.request = structuredClone(session.request)
}
})
await runtime.executeReconcileClusterForTest(
{
operationId: "op-add-c",
phaseRuntime: createNoopPhaseRuntime(),
plan: {
kind: "add",
serverId: "node-c",
request: {
provider: "hetzner",
providerId: "provider-1",
offer: "default"
},
install: createInstallInput()
}
} as never,
{
request: {
server: state.trackedServers[2]!,
managedBootstrap: currentBootstrap,
desiredClusterMembership: [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
},
{
nodeId: "node-c",
pubkey: deriveNodePublicKey(currentBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.12:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
}
],
clusterTargets: [
{ server: state.trackedServers[0]!, managedBootstrap: primaryBootstrap },
{ server: state.trackedServers[2]!, managedBootstrap: currentBootstrap }
],
nbde: {
mode: "degraded",
tangPort: 7654,
recoverySecret: "test-recovery-secret",
localBootKeyPresent: true,
peerTangNodeIds: [],
installedAt: "2026-04-06T12:00:00.000Z",
peerTangUrls: []
},
acmeEmail: "hostmaster@example.com",
bootMode: "efi"
}
} as unknown as NodeDeploymentInstallSession
)
assert.ok(reconcileCapture.request)
assert.deepEqual(
reconcileCapture.request.clusterTargets.map((entry) => entry.server.id),
["node-a", "node-b", "node-c"]
)
assert.deepEqual(
reconcileCapture.request.desiredClusterMembership.map((entry) => entry.nodeId).sort(),
["node-a", "node-b", "node-c"]
)
assert.deepEqual(
store
.getStateOrThrow()
.clusterMembership.map((entry) => entry.nodeId)
.sort(),
["node-a", "node-b", "node-c"]
)
})
test("reconcile cluster phase persists peers that become ready while service reconciles", async () => {
const primaryBootstrap = createManagedBootstrap("1", "node-a", "203.0.113.10")
const parallelBootstrap = createManagedBootstrap("2", "node-b", "203.0.113.11")
const currentBootstrap = createManagedBootstrap("3", "node-c", "203.0.113.12")
const state = createState([
createServer({
id: "node-a",
publicIp: "203.0.113.10",
managedBootstrap: primaryBootstrap
}),
createServer({
id: "node-b",
publicIp: "203.0.113.11",
managedBootstrap: parallelBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "post-install-tribes",
input: {
kind: "add",
install: createInstallInput()
}
})
}),
createServer({
id: "node-c",
publicIp: "203.0.113.12",
managedBootstrap: currentBootstrap,
deployment: createDeploymentState("running"),
execution: createExecutionState({
operation: "add",
status: "running",
phase: "waiting-ready",
input: {
kind: "add",
install: createInstallInput()
}
})
})
])
state.clusterMembership = [
{
nodeId: "node-a",
pubkey: deriveNodePublicKey(primaryBootstrap.nodePrivateKey),
transportAddress: "wss://203.0.113.10:4413/relay",
scope: "all",
status: "active",
activatedAt: "2026-04-06T12:00:00.000Z"
}
]
const store = new InMemoryStateStore(state)
const runtime = new PhaseRuntimeTestNodeDeploymentRuntime({
stateStore: store as never
})
const reconcileCapture: { request?: NodeDeploymentInstallSession["request"] } = {}
const runtimeInternals = runtime as unknown as RuntimeReconcileInternals
runtimeInternals.createDeploymentService = () => ({
reconcileClusterMembership: async (session: NodeDeploymentInstallSession) => {
reconcileCapture.request = structuredClone(session.request)
await store.updateServer("node-b", (server) => ({
...server,
execution: server.execution
? {
...server.execution,
phase: "waiting-ready"
}
: server.execution
}))
}
})
await runtime.executeReconcileClusterForTest(
{
operationId: "op-add-c",
phaseRuntime: createNoopPhaseRuntime(),
plan: {
kind: "add",
serverId: "node-c",
request: {
provider: "hetzner",
providerId: "provider-1",
offer: "default"
},
install: createInstallInput()
}
} as never,
{
request: {
server: state.trackedServers[2]!,
managedBootstrap: currentBootstrap,
desiredClusterMembership: [],
clusterTargets: [],
nbde: {
mode: "degraded",
tangPort: 7654,
recoverySecret: "test-recovery-secret",
localBootKeyPresent: true,
peerTangNodeIds: [],
installedAt: "2026-04-06T12:00:00.000Z",
peerTangUrls: []
},
acmeEmail: "hostmaster@example.com",
bootMode: "efi"
}
} as unknown as NodeDeploymentInstallSession
)
assert.ok(reconcileCapture.request)
assert.deepEqual(
reconcileCapture.request.desiredClusterMembership.map((entry) => entry.nodeId).sort(),
["node-a", "node-c"]
)
assert.deepEqual(
store
.getStateOrThrow()
.clusterMembership.map((entry) => entry.nodeId)
.sort(),
["node-a", "node-b", "node-c"]
)
})
test("provider deployment profiles define boot mode defaults", () => {
assert.equal(getProviderDeploymentProfile("hetzner").defaultBootMode, "auto")
assert.equal(getProviderDeploymentProfile("ovh").defaultBootMode, "auto")
assert.equal(getProviderDeploymentProfile("scaleway").defaultBootMode, "auto")
})
function createInstallInput(): NodeExecutionInstallInput {
return {
kexecImagePath: "/tmp/guix-kexec-installer.tar.gz",
bootMode: "efi"
}
}
function createExecutionState(
overrides: Partial<NodeExecutionState> &
Pick<NodeExecutionState, "operation" | "phase" | "status">
): NodeExecutionState {
return {
operationId: "op-1",
operation: overrides.operation,
status: overrides.status,
phase: overrides.phase,
retryable: overrides.retryable ?? true,
startedAt: "2026-04-06T12:00:00.000Z",
updatedAt: "2026-04-06T12:00:00.000Z",
finishedAt: overrides.finishedAt,
lastError: overrides.lastError,
input: overrides.input
}
}
function createManagedBootstrap(
keySeed: string,
nodeName: string,
publicIpv4: string
): ManagedNodeBootstrapState {
return {
nodePrivateKey: keySeed.repeat(64),
nodeName,
publicIpv4,
publicPort: 443,
publicScheme: "https",
installedAt: "2026-04-06T12:00:00.000Z"
}
}
function createDeploymentState(status: VpsRecord["deployment"]["status"]): VpsRecord["deployment"] {
return {
status,
snippets: []
}
}
function createProvider(computeReinstallFromImage: boolean): ProviderConfig {
return {
id: "hetzner",
kind: "hetzner",
credentials: {
kind: "hetzner",
apiKey: "token"
},
capabilities: {
computeCatalog: true,
computeProvisioning: true,
computeReinstallFromImage,
domainRegistration: false,
standardDnsService: true,
fallbackDnsService: false,
dnsRecordManagement: true,
firewallManagement: true
},
configured: true,
createdAt: "2026-04-06T12:00:00.000Z"
}
}
function createProductCatalog(products: ProductCatalog["products"]): ProductCatalog {
return {
providerKind: "hetzner",
isDefaultProvider: true,
products,
generatedAt: "2026-04-06T12:00:00.000Z",
disclaimer: ""
}
}
function createCatalogProduct(planName: string, image: string): ProductCatalog["products"][number] {
return {
id: `hetzner:${planName}:${image}`,
providerKind: "hetzner",
providerProductId: planName,
label: planName,
planName,
description: planName,
architecture: "x86_64",
category: "shared",
cores: 2,
cpuType: "shared",
memoryGb: 4,
diskGb: 40,
storageType: "ssd",
image,
bootMode: "auto",
defaultRegion: "fsn1",
availableRegions: ["fsn1"],
defaultPrice: {
region: "fsn1",
hourlyGrossEur: 0.006,
hourlyNetEur: 0.005,
monthlyGrossEur: 4,
monthlyNetEur: 3.36
},
prices: [
{
region: "fsn1",
hourlyGrossEur: 0.006,
hourlyNetEur: 0.005,
monthlyGrossEur: 4,
monthlyNetEur: 3.36
}
],
commercialMetadata: {
effectiveMonthlyGrossEur: 4,
source: "provider",
notes: []
},
recommended: false
}
}
function createState(trackedServers: VpsRecord[], providers: ProviderConfig[] = []): StoredState {
return {
version: APP_STATE_VERSION,
tribe: {
name: "Test Tribe",
domain: "example.test",
password: "test-tribe-password"
},
clusterBootstrap: createTestClusterBootstrap(),
clusterMembership: [],
profile: null,
providers,
bindings: [],
pendingProvisioning: [],
trackedServers,
scheme: {
servers: [],
domains: [],
dnsHosts: [],
dnsZones: []
},
actual: {
servers: [],
domains: [],
dnsZones: []
},
tutorial: { completed: false }
}
}
function createServer(overrides: Partial<VpsRecord> = {}): VpsRecord {
return {
id: "node-a",
providerId: "hetzner",
providerKind: "hetzner",
role: "primary",
sshUsername: "root",
sshPrivateKey: "private-key",
status: "running",
publicIp: "203.0.113.10",
region: "fsn1",
planName: "CAX11",
commercialMetadata: {
effectiveMonthlyGrossEur: 4,
source: "provider",
notes: []
},
deployment: {
status: "ready",
snippets: []
},
nbde: {
mode: "degraded",
tangPort: 7654,
recoverySecret: "test-recovery-secret",
localBootKeyPresent: true,
peerTangNodeIds: [],
installedAt: "2026-04-06T12:00:00.000Z"
},
createdAt: "2026-04-06T12:00:00.000Z",
updatedAt: "2026-04-06T12:00:00.000Z",
...overrides
}
}