Files
parrhesia/scripts/cloud_bench_monitoring.mjs
2026-03-19 23:45:57 +01:00

149 lines
4.8 KiB
JavaScript

// cloud_bench_monitoring.mjs — Prometheus + node_exporter setup and metrics collection.
//
// Installs monitoring on ephemeral benchmark VMs, collects all Prometheus
// metrics for a given time window via the HTTP API, and stores them as
// JSON artifacts.
// Generate prometheus.yml scrape config.
export function makePrometheusConfig({ clientIps = [] } = {}) {
const targets = [
{
job_name: "node-server",
static_configs: [{ targets: ["localhost:9100"] }],
},
{
job_name: "relay",
metrics_path: "/metrics",
static_configs: [{ targets: ["localhost:4413"] }],
},
];
if (clientIps.length > 0) {
targets.push({
job_name: "node-clients",
static_configs: [{ targets: clientIps.map((ip) => `${ip}:9100`) }],
});
}
const config = {
global: {
scrape_interval: "5s",
evaluation_interval: "15s",
},
scrape_configs: targets,
};
// Produce minimal YAML by hand (avoids adding a yaml dep).
const lines = [
"global:",
" scrape_interval: 5s",
" evaluation_interval: 15s",
"",
"scrape_configs:",
];
for (const sc of targets) {
lines.push(` - job_name: '${sc.job_name}'`);
if (sc.metrics_path) {
lines.push(` metrics_path: '${sc.metrics_path}'`);
}
lines.push(" static_configs:");
for (const st of sc.static_configs) {
lines.push(" - targets:");
for (const t of st.targets) {
lines.push(` - '${t}'`);
}
}
}
return lines.join("\n") + "\n";
}
// Install Prometheus + node_exporter on server, node_exporter on clients.
// `ssh` is an async function matching the sshExec(ip, keyPath, cmd, opts) signature.
export async function installMonitoring({ serverIp, clientIps = [], keyPath, ssh }) {
const prometheusYml = makePrometheusConfig({ clientIps });
// Server: install prometheus + node-exporter, write config, start
console.log("[monitoring] installing prometheus + node-exporter on server");
await ssh(serverIp, keyPath, [
"export DEBIAN_FRONTEND=noninteractive",
"apt-get update -qq",
"apt-get install -y -qq prometheus prometheus-node-exporter >/dev/null 2>&1",
].join(" && "));
// Write prometheus config
const escapedYml = prometheusYml.replace(/'/g, "'\\''");
await ssh(serverIp, keyPath, `cat > /etc/prometheus/prometheus.yml <<'PROMEOF'\n${prometheusYml}PROMEOF`);
// Restart prometheus with the new config, ensure node-exporter is running
await ssh(serverIp, keyPath, [
"systemctl restart prometheus",
"systemctl enable --now prometheus-node-exporter",
].join(" && "));
// Clients: install node-exporter only (in parallel)
if (clientIps.length > 0) {
console.log(`[monitoring] installing node-exporter on ${clientIps.length} client(s)`);
await Promise.all(
clientIps.map((ip) =>
ssh(ip, keyPath, [
"export DEBIAN_FRONTEND=noninteractive",
"apt-get update -qq",
"apt-get install -y -qq prometheus-node-exporter >/dev/null 2>&1",
"systemctl enable --now prometheus-node-exporter",
].join(" && "))
)
);
}
// Wait for Prometheus to start scraping
console.log("[monitoring] waiting for Prometheus to initialise");
await ssh(serverIp, keyPath,
'for i in $(seq 1 30); do curl -sf http://localhost:9090/api/v1/query?query=up >/dev/null 2>&1 && exit 0; sleep 1; done; echo "prometheus not ready" >&2; exit 1'
);
console.log("[monitoring] monitoring active");
}
// Collect all Prometheus metrics for a time window.
// Returns the raw Prometheus API response JSON (matrix result type).
export async function collectMetrics({ serverIp, startTime, endTime, step = 5 }) {
const params = new URLSearchParams({
query: '{__name__=~".+"}',
start: startTime,
end: endTime,
step: String(step),
});
const url = `http://${serverIp}:9090/api/v1/query_range?${params}`;
try {
const resp = await fetch(url, { signal: AbortSignal.timeout(60_000) });
if (!resp.ok) {
console.error(`[monitoring] Prometheus query failed: ${resp.status} ${resp.statusText}`);
return null;
}
const body = await resp.json();
if (body.status !== "success") {
console.error(`[monitoring] Prometheus query error: ${body.error || "unknown"}`);
return null;
}
return body.data;
} catch (err) {
console.error(`[monitoring] metrics collection failed: ${err.message}`);
return null;
}
}
// Stop monitoring daemons on server and clients.
export async function stopMonitoring({ serverIp, clientIps = [], keyPath, ssh }) {
const allIps = [serverIp, ...clientIps];
await Promise.all(
allIps.map((ip) =>
ssh(ip, keyPath, "systemctl stop prometheus prometheus-node-exporter 2>/dev/null; true").catch(() => {})
)
);
console.log("[monitoring] monitoring stopped");
}