Expand relay metrics and observability

This commit is contained in:
2026-03-18 17:39:13 +01:00
parent c377ed4b62
commit c30449b318
16 changed files with 663 additions and 43 deletions

View File

@@ -6,6 +6,7 @@ defmodule Parrhesia.ApplicationTest do
test "starts the core supervision tree" do
assert is_pid(Process.whereis(Parrhesia.Supervisor))
assert is_pid(Process.whereis(Parrhesia.Telemetry))
assert is_pid(Process.whereis(Parrhesia.ConnectionStats))
assert is_pid(Process.whereis(Parrhesia.Config))
assert is_pid(Process.whereis(Parrhesia.Web.EventIngestLimiter))
assert is_pid(Process.whereis(Parrhesia.Web.IPEventIngestLimiter))

View File

@@ -28,7 +28,7 @@ defmodule Parrhesia.FaultInjectionGroupFlowTest do
test "kind 445 commit recovers cleanly after storage outage", %{
previous_storage: previous_storage
} do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
group_event =
build_event(%{
@@ -62,7 +62,7 @@ defmodule Parrhesia.FaultInjectionGroupFlowTest do
test "reordered group flow remains deterministic after outage recovery", %{
previous_storage: previous_storage
} do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
group_id = String.duplicate("b", 64)
now = System.system_time(:second)

View File

@@ -26,7 +26,7 @@ defmodule Parrhesia.FaultInjectionTest do
end
test "EVENT responds with error prefix when storage is unavailable" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
event = valid_event()
assert {:push, {:text, response}, _next_state} =
@@ -36,7 +36,7 @@ defmodule Parrhesia.FaultInjectionTest do
end
test "REQ closes with storage error when query fails" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
payload = JSON.encode!(["REQ", "sub-db-down", %{"kinds" => [1]}])
assert {:push, {:text, response}, ^state} =

View File

@@ -6,7 +6,12 @@ defmodule Parrhesia.Performance.LoadSoakTest do
@tag :performance
test "fanout enqueue/drain stays within relaxed p95 budget" do
{:ok, state} =
Connection.init(subscription_index: nil, max_outbound_queue: 10_000, trap_exit?: false)
Connection.init(
subscription_index: nil,
max_outbound_queue: 10_000,
trap_exit?: false,
track_population?: false
)
req_payload = JSON.encode!(["REQ", "sub-load", %{"kinds" => [1]}])

View File

@@ -9,10 +9,25 @@ defmodule Parrhesia.TelemetryTest do
assert [:parrhesia, :ingest, :duration, :ms] in metric_names
assert [:parrhesia, :query, :duration, :ms] in metric_names
assert [:parrhesia, :fanout, :duration, :ms] in metric_names
assert [:parrhesia, :fanout, :events_enqueued, :count] in metric_names
assert [:parrhesia, :ingest, :events, :count] in metric_names
assert [:parrhesia, :query, :requests, :count] in metric_names
assert [:parrhesia, :query, :results, :count] in metric_names
assert [:parrhesia, :connection, :outbound_queue, :depth] in metric_names
assert [:parrhesia, :connection, :outbound_queue, :drained_frames, :count] in metric_names
assert [:parrhesia, :connection, :outbound_queue, :dropped_events, :count] in metric_names
assert [:parrhesia, :connection, :outbound_queue, :pressure] in metric_names
assert [:parrhesia, :connection, :outbound_queue, :pressure_events, :count] in metric_names
assert [:parrhesia, :listener, :connections, :active] in metric_names
assert [:parrhesia, :listener, :subscriptions, :active] in metric_names
assert [:parrhesia, :rate_limit, :hits, :count] in metric_names
assert [:parrhesia, :db, :query, :count] in metric_names
assert [:parrhesia, :process, :mailbox, :depth] in metric_names
assert [:parrhesia, :maintenance, :purge_expired, :events, :count] in metric_names
assert [:parrhesia, :maintenance, :partition_retention, :dropped_partitions, :count] in metric_names
assert [:parrhesia, :vm, :memory, :binary, :bytes] in metric_names
end
test "emit/3 accepts traffic-class metadata" do

View File

@@ -6,7 +6,7 @@ defmodule Parrhesia.Web.ConformanceTest do
alias Parrhesia.Web.Connection
test "REQ -> EOSE emitted once and CLOSE emits CLOSED" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
req_payload = JSON.encode!(["REQ", "sub-e2e", %{"kinds" => [1]}])
@@ -26,7 +26,7 @@ defmodule Parrhesia.Web.ConformanceTest do
end
test "EVENT accepted path returns canonical OK frame" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
event = valid_event()
@@ -37,7 +37,7 @@ defmodule Parrhesia.Web.ConformanceTest do
end
test "wrapped kind 1059 welcome delivery is recipient-gated" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
recipient = String.duplicate("9", 64)
wrapped_welcome =
@@ -90,7 +90,7 @@ defmodule Parrhesia.Web.ConformanceTest do
end
test "kind 445 commit ACK implies durable visibility before wrapped welcome ACK" do
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
commit_event =
valid_event(%{
@@ -161,7 +161,7 @@ defmodule Parrhesia.Web.ConformanceTest do
Application.put_env(:parrhesia, :policies, previous_policies)
end)
{:ok, state} = Connection.init(subscription_index: nil)
{:ok, state} = Connection.init(subscription_index: nil, track_population?: false)
relay_list_event =
valid_event(%{

View File

@@ -251,7 +251,12 @@ defmodule Parrhesia.Web.ConnectionNIP43Test do
end
defp connection_state(opts \\ []) do
{:ok, state} = Connection.init(Keyword.put_new(opts, :subscription_index, nil))
opts =
opts
|> Keyword.put_new(:subscription_index, nil)
|> Keyword.put_new(:track_population?, false)
{:ok, state} = Connection.init(opts)
state
end

View File

@@ -1003,6 +1003,7 @@ defmodule Parrhesia.Web.ConnectionTest do
opts
|> Keyword.put_new(:subscription_index, nil)
|> Keyword.put_new(:trap_exit?, false)
|> Keyword.put_new(:track_population?, false)
{:ok, state} = Connection.init(opts)
state

View File

@@ -6,6 +6,8 @@ defmodule Parrhesia.Web.RouterTest do
alias Parrhesia.API.Sync
alias Parrhesia.Protocol.EventValidator
alias Parrhesia.Repo
alias Parrhesia.Telemetry
alias Parrhesia.Web.Listener
alias Parrhesia.Web.Router
@@ -51,6 +53,47 @@ defmodule Parrhesia.Web.RouterTest do
assert get_resp_header(conn, "content-type") == ["text/plain; charset=utf-8"]
end
test "GET /metrics includes exported relay counters and gauges" do
Telemetry.emit(
[:parrhesia, :ingest, :result],
%{count: 1},
%{traffic_class: :generic, outcome: :accepted, reason: :accepted}
)
Telemetry.emit(
[:parrhesia, :listener, :population],
%{connections: 2, subscriptions: 3},
%{listener_id: :public}
)
Telemetry.emit(
[:parrhesia, :rate_limit, :hit],
%{count: 1},
%{scope: :event_ingest_per_ip, traffic_class: :generic}
)
Telemetry.emit_vm_memory()
_ = Repo.query!("SELECT 1")
conn =
conn(:get, "/metrics")
|> route_conn(
listener(%{
features: %{metrics: %{enabled: true, access: %{private_networks_only: true}}}
})
)
assert conn.status == 200
assert String.contains?(conn.resp_body, "parrhesia_ingest_events_count")
assert String.contains?(conn.resp_body, "parrhesia_listener_connections_active")
assert String.contains?(conn.resp_body, "listener_id=\"public\"")
assert String.contains?(conn.resp_body, "parrhesia_rate_limit_hits_count")
assert String.contains?(conn.resp_body, "scope=\"event_ingest_per_ip\"")
assert String.contains?(conn.resp_body, "parrhesia_db_query_count")
assert String.contains?(conn.resp_body, "repo_role=\"write\"")
assert String.contains?(conn.resp_body, "parrhesia_vm_memory_binary_bytes")
end
test "GET /metrics denies public-network clients by default" do
conn = conn(:get, "/metrics")
conn = %{conn | remote_ip: {8, 8, 8, 8}}