bench: Cloud firewall
Some checks failed
CI / Test (OTP 27.2 / Elixir 1.18.2) (push) Failing after 0s
CI / Test (OTP 28.4 / Elixir 1.19.4 + E2E) (push) Failing after 0s

This commit is contained in:
2026-03-20 00:19:30 +01:00
parent 046f80591b
commit be9d348660
3 changed files with 154 additions and 209 deletions

View File

@@ -54,6 +54,7 @@ const DEFAULTS = {
keep: false,
quick: false,
monitoring: true,
yes: false,
warmEvents: 25000,
hotEvents: 250000,
bench: {
@@ -82,7 +83,7 @@ bench/cloud_artifacts/<run_id>/, and appends metadata + pointers to
bench/history.jsonl.
Options:
--datacenter <name> Initial datacenter selection (default: ${DEFAULTS.datacenter})
--datacenter <name|auto> Initial datacenter selection (default: ${DEFAULTS.datacenter})
--server-type <name> (default: ${DEFAULTS.serverType})
--client-type <name> (default: ${DEFAULTS.clientType})
--image-base <name> (default: ${DEFAULTS.imageBase})
@@ -126,15 +127,17 @@ Options:
--artifacts-dir <path> (default: ${DEFAULTS.artifactsDir})
--keep Keep cloud resources (no cleanup)
--no-monitoring Skip Prometheus + node_exporter setup
--yes Skip interactive prompts and proceed immediately
-h, --help
Notes:
- Requires hcloud, ssh, scp, ssh-keygen, git.
- Before provisioning, checks all datacenters for type availability and estimates ${ESTIMATE_WINDOW_LABEL} cost.
- In interactive terminals, prompts you to pick + confirm the datacenter.
- In interactive terminals, prompts you to pick + confirm the datacenter unless --yes is set.
- Caches built nostr-bench at _build/bench/nostr-bench and reuses it when valid.
- Auto-tunes Postgres/Redis/app pool sizing from server RAM + CPU for DB-backed targets.
- Randomizes target order per run and wipes persisted target data directories on each start.
- Creates a Hetzner Cloud firewall restricting inbound access to benchmark ports from known IPs only.
- Handles Ctrl-C / SIGTERM with best-effort cloud cleanup.
- Tries nix .#nostrBenchStaticX86_64Musl first; falls back to docker-built portable nostr-bench.
- If --parrhesia-image is omitted, requires nix locally.
@@ -267,6 +270,9 @@ function parseArgs(argv) {
case "--no-monitoring":
opts.monitoring = false;
break;
case "--yes":
opts.yes = true;
break;
case "--warm-events":
opts.warmEvents = intOpt(arg, argv[++i]);
break;
@@ -557,10 +563,15 @@ async function chooseDatacenter(opts) {
printDatacenterChoices(choices, opts);
const defaultChoice = choices.find((choice) => choice.name === opts.datacenter) || choices[0];
const wantsAutoDatacenter = opts.datacenter === "auto";
const defaultChoice = wantsAutoDatacenter
? choices[0]
: choices.find((choice) => choice.name === opts.datacenter) || choices[0];
if (!process.stdin.isTTY || !process.stdout.isTTY) {
if (!choices.some((choice) => choice.name === opts.datacenter)) {
const nonInteractiveOrYes = !process.stdin.isTTY || !process.stdout.isTTY || opts.yes;
if (nonInteractiveOrYes) {
if (!wantsAutoDatacenter && !choices.some((choice) => choice.name === opts.datacenter)) {
throw new Error(
`Requested datacenter ${opts.datacenter} is not currently compatible. Compatible: ${choices
.map((choice) => choice.name)
@@ -568,8 +579,15 @@ async function chooseDatacenter(opts) {
);
}
const modeLabel = opts.yes && process.stdin.isTTY && process.stdout.isTTY
? "auto-confirm mode (--yes)"
: "non-interactive mode";
const selectionLabel = wantsAutoDatacenter
? "auto cheapest compatible datacenter"
: `requested datacenter ${defaultChoice.name}`;
console.log(
`[plan] non-interactive mode: using datacenter ${opts.datacenter} (${ESTIMATE_WINDOW_LABEL} est gross=${formatEuro(defaultChoice.estimatedTotal.gross)} net=${formatEuro(defaultChoice.estimatedTotal.net)})`,
`[plan] ${modeLabel}: using ${selectionLabel} (${ESTIMATE_WINDOW_LABEL} est gross=${formatEuro(defaultChoice.estimatedTotal.gross)} net=${formatEuro(defaultChoice.estimatedTotal.net)})`,
);
return defaultChoice;
}
@@ -1396,6 +1414,8 @@ async function main() {
const createdServers = [];
let sshKeyCreated = false;
let firewallName = null;
let firewallCreated = false;
let cleanupPromise = null;
const cleanup = async () => {
@@ -1424,6 +1444,17 @@ async function main() {
);
}
if (firewallCreated) {
console.log("[cleanup] deleting firewall...");
await runCommand("hcloud", ["firewall", "delete", firewallName])
.then(() => {
console.log(`[cleanup] deleted firewall: ${firewallName}`);
})
.catch((error) => {
console.warn(`[cleanup] failed to delete firewall ${firewallName}: ${error.message || error}`);
});
}
if (sshKeyCreated) {
console.log("[cleanup] deleting ssh key...");
await runCommand("hcloud", ["ssh-key", "delete", keyName])
@@ -1530,6 +1561,42 @@ async function main() {
...clientInfos.map((client) => waitForSsh(client.ip, keyPath)),
]);
// Detect orchestrator public IP from the server's perspective.
const orchestratorIp = (
await sshExec(serverIp, keyPath, "echo $SSH_CLIENT")
).stdout.trim().split(/\s+/)[0];
// Create a firewall restricting inbound access to known benchmark IPs only.
firewallName = `${runId}-fw`;
const allBenchIps = [orchestratorIp, serverIp, ...clientInfos.map((c) => c.ip)];
const sourceIps = [...new Set(allBenchIps)].map((ip) => `${ip}/32`);
const firewallRules = [
{ direction: "in", protocol: "tcp", port: "22", source_ips: sourceIps, description: "SSH" },
{ direction: "in", protocol: "tcp", port: "3355", source_ips: sourceIps, description: "Haven" },
{ direction: "in", protocol: "tcp", port: "4413", source_ips: sourceIps, description: "Parrhesia" },
{ direction: "in", protocol: "tcp", port: "7777", source_ips: sourceIps, description: "strfry" },
{ direction: "in", protocol: "tcp", port: "8008", source_ips: sourceIps, description: "Nostream" },
{ direction: "in", protocol: "tcp", port: "8080", source_ips: sourceIps, description: "nostr-rs-relay" },
{ direction: "in", protocol: "tcp", port: "9090", source_ips: sourceIps, description: "Prometheus" },
{ direction: "in", protocol: "tcp", port: "9100", source_ips: sourceIps, description: "node_exporter" },
{ direction: "in", protocol: "icmp", source_ips: ["0.0.0.0/0", "::/0"], description: "ICMP" },
];
const rulesPath = path.join(tmpDir, "firewall-rules.json");
fs.writeFileSync(rulesPath, JSON.stringify(firewallRules));
await runCommand("hcloud", ["firewall", "create", "--name", firewallName, "--rules-file", rulesPath]);
firewallCreated = true;
for (const name of createdServers) {
await runCommand("hcloud", [
"firewall", "apply-to-resource", firewallName,
"--type", "server", "--server", name,
]);
}
console.log(`[firewall] ${firewallName} applied (sources: ${sourceIps.join(", ")})`);
console.log("[phase] install runtime dependencies on server node");
const serverInstallCmd = [
"set -euo pipefail",

View File

@@ -60,6 +60,10 @@ Benchmark commands
just bench cloud [args...] Cloud benchmark wrapper
just bench cloud-quick Cloud smoke profile
Cloud tip:
just bench cloud --yes --datacenter auto
-> auto-pick cheapest compatible DC and skip interactive confirmation
Cloud defaults:
targets = parrhesia-pg,parrhesia-memory,strfry,nostr-rs-relay,nostream,haven

View File

@@ -9,87 +9,68 @@ usage() {
usage:
./scripts/run_bench_cloud.sh [options] [-- extra args for cloud_bench_orchestrate.mjs]
Friendly wrapper around scripts/cloud_bench_orchestrate.mjs.
Thin wrapper around scripts/cloud_bench_orchestrate.mjs.
The orchestrator checks datacenter availability for your server/client types,
shows estimated 30m pricing, and asks for selection/confirmation in interactive terminals.
Behavior:
- Forwards args directly to the orchestrator.
- Adds convenience aliases:
--image IMAGE -> --parrhesia-image IMAGE
- Adds smoke defaults when --quick is set (unless already provided):
--server-type cx23
--client-type cx23
--runs 1
--clients 1
--connect-count 20
--connect-rate 20
--echo-count 20
--echo-rate 20
--echo-size 512
--event-count 20
--event-rate 20
--req-count 20
--req-rate 20
--req-limit 10
--keepalive-seconds 2
Defaults:
Inherited from scripts/cloud_bench_orchestrate.mjs.
This wrapper only passes explicit overrides (flags/env), plus --quick profile overrides.
Flags:
--quick Quick smoke profile (cx23/cx23, 1 run, 1 client, lower load)
--clients N Override client count
--runs N Override run count
--targets CSV Override targets
--datacenter NAME Override datacenter
--server-type NAME Override server type
--client-type NAME Override client type
--image IMAGE Use remote Parrhesia image (e.g. ghcr.io/...)
--git-ref REF Build Parrhesia image from git ref (default: HEAD)
--nostream-repo URL Override nostream repo (default: Cameri/nostream)
--nostream-ref REF Override nostream ref (default: main)
--haven-image IMAGE Override Haven image
--threads N Override nostr-bench worker threads (0 = auto)
--keep Keep cloud resources after run
Flags handled by this wrapper:
--quick
--image IMAGE
-h, --help
Environment overrides (all optional):
PARRHESIA_CLOUD_DATACENTER
PARRHESIA_CLOUD_SERVER_TYPE
PARRHESIA_CLOUD_CLIENT_TYPE
PARRHESIA_CLOUD_CLIENTS
PARRHESIA_BENCH_RUNS
PARRHESIA_CLOUD_TARGETS
PARRHESIA_CLOUD_PARRHESIA_IMAGE
PARRHESIA_CLOUD_GIT_REF
PARRHESIA_CLOUD_NOSTREAM_REPO
PARRHESIA_CLOUD_NOSTREAM_REF
PARRHESIA_CLOUD_HAVEN_IMAGE
Bench knobs (forwarded):
PARRHESIA_BENCH_CONNECT_COUNT
PARRHESIA_BENCH_CONNECT_RATE
PARRHESIA_BENCH_ECHO_COUNT
PARRHESIA_BENCH_ECHO_RATE
PARRHESIA_BENCH_ECHO_SIZE
PARRHESIA_BENCH_EVENT_COUNT
PARRHESIA_BENCH_EVENT_RATE
PARRHESIA_BENCH_REQ_COUNT
PARRHESIA_BENCH_REQ_RATE
PARRHESIA_BENCH_REQ_LIMIT
PARRHESIA_BENCH_KEEPALIVE_SECONDS
PARRHESIA_BENCH_THREADS
Everything else is passed through unchanged.
Examples:
# Default full cloud run
./scripts/run_bench_cloud.sh
# Quick smoke
./scripts/run_bench_cloud.sh --quick
# Use a GHCR image
./scripts/run_bench_cloud.sh --image ghcr.io/owner/parrhesia:latest
just bench cloud
just bench cloud --quick
just bench cloud --clients 2 --runs 1 --targets parrhesia-memory
just bench cloud --image ghcr.io/owner/parrhesia:latest --threads 4
just bench cloud --no-monitoring
just bench cloud --yes --datacenter auto
EOF
}
DATACENTER="${PARRHESIA_CLOUD_DATACENTER:-}"
SERVER_TYPE="${PARRHESIA_CLOUD_SERVER_TYPE:-}"
CLIENT_TYPE="${PARRHESIA_CLOUD_CLIENT_TYPE:-}"
CLIENTS="${PARRHESIA_CLOUD_CLIENTS:-}"
RUNS="${PARRHESIA_BENCH_RUNS:-}"
TARGETS="${PARRHESIA_CLOUD_TARGETS:-}"
PARRHESIA_IMAGE="${PARRHESIA_CLOUD_PARRHESIA_IMAGE:-}"
GIT_REF="${PARRHESIA_CLOUD_GIT_REF:-}"
NOSTREAM_REPO="${PARRHESIA_CLOUD_NOSTREAM_REPO:-}"
NOSTREAM_REF="${PARRHESIA_CLOUD_NOSTREAM_REF:-}"
HAVEN_IMAGE="${PARRHESIA_CLOUD_HAVEN_IMAGE:-}"
THREADS="${PARRHESIA_BENCH_THREADS:-}"
KEEP=0
QUICK=0
has_opt() {
local key="$1"
shift
local arg
for arg in "$@"; do
if [[ "$arg" == "$key" || "$arg" == "$key="* ]]; then
return 0
fi
done
return 1
}
EXTRA_ARGS=()
add_default_if_missing() {
local key="$1"
local value="$2"
if ! has_opt "$key" "${ORCH_ARGS[@]}"; then
ORCH_ARGS+=("$key" "$value")
fi
}
ORCH_ARGS=()
QUICK=0
while [[ $# -gt 0 ]]; do
case "$1" in
@@ -99,156 +80,49 @@ while [[ $# -gt 0 ]]; do
;;
--quick)
QUICK=1
ORCH_ARGS+=("--quick")
shift
;;
--clients)
CLIENTS="$2"
shift 2
;;
--runs)
RUNS="$2"
shift 2
;;
--targets)
TARGETS="$2"
shift 2
;;
--datacenter)
DATACENTER="$2"
shift 2
;;
--server-type)
SERVER_TYPE="$2"
shift 2
;;
--client-type)
CLIENT_TYPE="$2"
shift 2
;;
--image)
PARRHESIA_IMAGE="$2"
if [[ $# -lt 2 ]]; then
echo "Missing value for --image" >&2
exit 1
fi
ORCH_ARGS+=("--parrhesia-image" "$2")
shift 2
;;
--git-ref)
GIT_REF="$2"
shift 2
;;
--nostream-repo)
NOSTREAM_REPO="$2"
shift 2
;;
--nostream-ref)
NOSTREAM_REF="$2"
shift 2
;;
--haven-image)
HAVEN_IMAGE="$2"
shift 2
;;
--threads)
THREADS="$2"
shift 2
;;
--keep)
KEEP=1
shift
;;
--)
shift
EXTRA_ARGS+=("$@")
ORCH_ARGS+=("$@")
break
;;
*)
echo "Unknown argument: $1" >&2
usage
exit 1
ORCH_ARGS+=("$1")
shift
;;
esac
done
if [[ "$QUICK" == "1" ]]; then
: "${SERVER_TYPE:=cx23}"
: "${CLIENT_TYPE:=cx23}"
: "${RUNS:=1}"
: "${CLIENTS:=1}"
add_default_if_missing "--server-type" "cx23"
add_default_if_missing "--client-type" "cx23"
add_default_if_missing "--runs" "1"
add_default_if_missing "--clients" "1"
: "${PARRHESIA_BENCH_CONNECT_COUNT:=20}"
: "${PARRHESIA_BENCH_CONNECT_RATE:=20}"
: "${PARRHESIA_BENCH_ECHO_COUNT:=20}"
: "${PARRHESIA_BENCH_ECHO_RATE:=20}"
: "${PARRHESIA_BENCH_ECHO_SIZE:=512}"
: "${PARRHESIA_BENCH_EVENT_COUNT:=20}"
: "${PARRHESIA_BENCH_EVENT_RATE:=20}"
: "${PARRHESIA_BENCH_REQ_COUNT:=20}"
: "${PARRHESIA_BENCH_REQ_RATE:=20}"
: "${PARRHESIA_BENCH_REQ_LIMIT:=10}"
: "${PARRHESIA_BENCH_KEEPALIVE_SECONDS:=2}"
add_default_if_missing "--connect-count" "20"
add_default_if_missing "--connect-rate" "20"
add_default_if_missing "--echo-count" "20"
add_default_if_missing "--echo-rate" "20"
add_default_if_missing "--echo-size" "512"
add_default_if_missing "--event-count" "20"
add_default_if_missing "--event-rate" "20"
add_default_if_missing "--req-count" "20"
add_default_if_missing "--req-rate" "20"
add_default_if_missing "--req-limit" "10"
add_default_if_missing "--keepalive-seconds" "2"
fi
CMD=(node scripts/cloud_bench_orchestrate.mjs)
if [[ -n "$DATACENTER" ]]; then
CMD+=(--datacenter "$DATACENTER")
fi
if [[ -n "$SERVER_TYPE" ]]; then
CMD+=(--server-type "$SERVER_TYPE")
fi
if [[ -n "$CLIENT_TYPE" ]]; then
CMD+=(--client-type "$CLIENT_TYPE")
fi
if [[ -n "$CLIENTS" ]]; then
CMD+=(--clients "$CLIENTS")
fi
if [[ -n "$RUNS" ]]; then
CMD+=(--runs "$RUNS")
fi
if [[ -n "$TARGETS" ]]; then
CMD+=(--targets "$TARGETS")
fi
if [[ -n "$NOSTREAM_REPO" ]]; then
CMD+=(--nostream-repo "$NOSTREAM_REPO")
fi
if [[ -n "$NOSTREAM_REF" ]]; then
CMD+=(--nostream-ref "$NOSTREAM_REF")
fi
if [[ -n "$HAVEN_IMAGE" ]]; then
CMD+=(--haven-image "$HAVEN_IMAGE")
fi
if [[ -n "$THREADS" ]]; then
CMD+=(--threads "$THREADS")
fi
if [[ -n "$PARRHESIA_IMAGE" ]]; then
CMD+=(--parrhesia-image "$PARRHESIA_IMAGE")
elif [[ -n "$GIT_REF" ]]; then
CMD+=(--git-ref "$GIT_REF")
fi
if [[ "$KEEP" == "1" ]]; then
CMD+=(--keep)
fi
# Forward bench knob envs if set
for kv in \
PARRHESIA_BENCH_CONNECT_COUNT \
PARRHESIA_BENCH_CONNECT_RATE \
PARRHESIA_BENCH_ECHO_COUNT \
PARRHESIA_BENCH_ECHO_RATE \
PARRHESIA_BENCH_ECHO_SIZE \
PARRHESIA_BENCH_EVENT_COUNT \
PARRHESIA_BENCH_EVENT_RATE \
PARRHESIA_BENCH_REQ_COUNT \
PARRHESIA_BENCH_REQ_RATE \
PARRHESIA_BENCH_REQ_LIMIT \
PARRHESIA_BENCH_KEEPALIVE_SECONDS
do
if [[ -n "${!kv:-}" ]]; then
flag="--$(echo "$kv" | tr '[:upper:]' '[:lower:]' | sed -E 's/^parrhesia_bench_//' | tr '_' '-')"
CMD+=("$flag" "${!kv}")
fi
done
CMD+=("${EXTRA_ARGS[@]}")
CMD=(node scripts/cloud_bench_orchestrate.mjs "${ORCH_ARGS[@]}")
printf 'Running cloud bench:\n %q' "${CMD[0]}"
for ((i=1; i<${#CMD[@]}; i++)); do