Upgrade NIP-50 search to ranked Postgres FTS
This commit is contained in:
@@ -34,6 +34,8 @@ Current `supported_nips` list:
|
|||||||
|
|
||||||
`43` is advertised when the built-in NIP-43 relay access flow is enabled. Parrhesia generates relay-signed `28935` invite responses on `REQ`, validates join and leave requests locally, and publishes the resulting signed `8000`, `8001`, and `13534` relay membership events into its own local event store.
|
`43` is advertised when the built-in NIP-43 relay access flow is enabled. Parrhesia generates relay-signed `28935` invite responses on `REQ`, validates join and leave requests locally, and publishes the resulting signed `8000`, `8001`, and `13534` relay membership events into its own local event store.
|
||||||
|
|
||||||
|
`50` uses ranked PostgreSQL full-text search over event `content` by default. Parrhesia applies the filter `limit` after ordering by match quality, and falls back to trigram-backed substring matching for short or symbol-heavy queries such as search-as-you-type prefixes, domains, and punctuation-rich tokens.
|
||||||
|
|
||||||
`66` is advertised when the built-in NIP-66 publisher is enabled and has at least one relay target. The default config enables it for the `public` relay URL. Parrhesia probes those target relays, collects the resulting NIP-11 / websocket liveness data, and then publishes the signed `10166` and `30166` events locally on this relay.
|
`66` is advertised when the built-in NIP-66 publisher is enabled and has at least one relay target. The default config enables it for the `public` relay URL. Parrhesia probes those target relays, collects the resulting NIP-11 / websocket liveness data, and then publishes the signed `10166` and `30166` events locally on this relay.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|||||||
@@ -9,6 +9,11 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
alias Parrhesia.Repo
|
alias Parrhesia.Repo
|
||||||
|
|
||||||
@behaviour Parrhesia.Storage.Events
|
@behaviour Parrhesia.Storage.Events
|
||||||
|
@trigram_fallback_max_single_term_length 4
|
||||||
|
@trigram_fallback_pattern ~r/[^\p{L}\p{N}\s"]/u
|
||||||
|
@fts_match_fragment "to_tsvector('simple', ?) @@ websearch_to_tsquery('simple', ?)"
|
||||||
|
@fts_rank_fragment "ts_rank_cd(to_tsvector('simple', ?), websearch_to_tsquery('simple', ?))"
|
||||||
|
@trigram_rank_fragment "word_similarity(lower(?), lower(?))"
|
||||||
|
|
||||||
@type normalized_event :: %{
|
@type normalized_event :: %{
|
||||||
id: binary(),
|
id: binary(),
|
||||||
@@ -85,7 +90,7 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
|> Repo.all()
|
|> Repo.all()
|
||||||
end)
|
end)
|
||||||
|> deduplicate_events()
|
|> deduplicate_events()
|
||||||
|> sort_persisted_events()
|
|> sort_persisted_events(filters)
|
||||||
|> maybe_apply_query_limit(opts)
|
|> maybe_apply_query_limit(opts)
|
||||||
|
|
||||||
{:ok, Enum.map(persisted_events, &to_nostr_event/1)}
|
{:ok, Enum.map(persisted_events, &to_nostr_event/1)}
|
||||||
@@ -607,11 +612,12 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp event_query_for_filter(filter, now, opts) do
|
defp event_query_for_filter(filter, now, opts) do
|
||||||
|
search_plan = search_plan(Map.get(filter, "search"))
|
||||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||||
|
|
||||||
base_query
|
base_query
|
||||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||||
|> order_by([event: event], desc: event.created_at, asc: event.id)
|
|> maybe_order_by_search_rank(search_plan)
|
||||||
|> select([event: event], %{
|
|> select([event: event], %{
|
||||||
id: event.id,
|
id: event.id,
|
||||||
pubkey: event.pubkey,
|
pubkey: event.pubkey,
|
||||||
@@ -621,14 +627,16 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
content: event.content,
|
content: event.content,
|
||||||
sig: event.sig
|
sig: event.sig
|
||||||
})
|
})
|
||||||
|
|> maybe_select_search_score(search_plan)
|
||||||
|> maybe_limit_query(effective_filter_limit(filter, opts))
|
|> maybe_limit_query(effective_filter_limit(filter, opts))
|
||||||
end
|
end
|
||||||
|
|
||||||
defp event_id_query_for_filter(filter, now, opts) do
|
defp event_id_query_for_filter(filter, now, opts) do
|
||||||
|
search_plan = search_plan(Map.get(filter, "search"))
|
||||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||||
|
|
||||||
base_query
|
base_query
|
||||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||||
|> select([event: event], event.id)
|
|> select([event: event], event.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -647,10 +655,11 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp event_ref_query_for_filter(filter, now, opts) do
|
defp event_ref_query_for_filter(filter, now, opts) do
|
||||||
|
search_plan = search_plan(Map.get(filter, "search"))
|
||||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||||
|
|
||||||
base_query
|
base_query
|
||||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||||
|> order_by([event: event], asc: event.created_at, asc: event.id)
|
|> order_by([event: event], asc: event.created_at, asc: event.id)
|
||||||
|> select([event: event], %{
|
|> select([event: event], %{
|
||||||
created_at: event.created_at,
|
created_at: event.created_at,
|
||||||
@@ -744,14 +753,14 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp apply_common_event_filters(query, filter, remaining_tag_filters, opts) do
|
defp apply_common_event_filters(query, filter, remaining_tag_filters, opts, search_plan) do
|
||||||
query
|
query
|
||||||
|> maybe_filter_ids(Map.get(filter, "ids"))
|
|> maybe_filter_ids(Map.get(filter, "ids"))
|
||||||
|> maybe_filter_authors(Map.get(filter, "authors"))
|
|> maybe_filter_authors(Map.get(filter, "authors"))
|
||||||
|> maybe_filter_kinds(Map.get(filter, "kinds"))
|
|> maybe_filter_kinds(Map.get(filter, "kinds"))
|
||||||
|> maybe_filter_since(Map.get(filter, "since"))
|
|> maybe_filter_since(Map.get(filter, "since"))
|
||||||
|> maybe_filter_until(Map.get(filter, "until"))
|
|> maybe_filter_until(Map.get(filter, "until"))
|
||||||
|> maybe_filter_search(Map.get(filter, "search"))
|
|> maybe_filter_search(search_plan)
|
||||||
|> filter_by_tag_filters(remaining_tag_filters)
|
|> filter_by_tag_filters(remaining_tag_filters)
|
||||||
|> maybe_restrict_giftwrap_access(filter, opts)
|
|> maybe_restrict_giftwrap_access(filter, opts)
|
||||||
end
|
end
|
||||||
@@ -792,13 +801,19 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
|
|
||||||
defp maybe_filter_search(query, nil), do: query
|
defp maybe_filter_search(query, nil), do: query
|
||||||
|
|
||||||
defp maybe_filter_search(query, search) when is_binary(search) and search != "" do
|
defp maybe_filter_search(query, %{mode: :fts, query: search}) do
|
||||||
|
where(
|
||||||
|
query,
|
||||||
|
[event: event],
|
||||||
|
fragment(@fts_match_fragment, event.content, ^search)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_filter_search(query, %{mode: :trigram, query: search}) do
|
||||||
escaped_search = escape_like_pattern(search)
|
escaped_search = escape_like_pattern(search)
|
||||||
where(query, [event: event], ilike(event.content, ^"%#{escaped_search}%"))
|
where(query, [event: event], ilike(event.content, ^"%#{escaped_search}%"))
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_filter_search(query, _search), do: query
|
|
||||||
|
|
||||||
defp escape_like_pattern(search) do
|
defp escape_like_pattern(search) do
|
||||||
search
|
search
|
||||||
|> String.replace("\\", "\\\\")
|
|> String.replace("\\", "\\\\")
|
||||||
@@ -886,20 +901,90 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
defp maybe_limit_query(query, nil), do: query
|
defp maybe_limit_query(query, nil), do: query
|
||||||
defp maybe_limit_query(query, limit), do: limit(query, ^limit)
|
defp maybe_limit_query(query, limit), do: limit(query, ^limit)
|
||||||
|
|
||||||
|
defp maybe_order_by_search_rank(query, nil) do
|
||||||
|
order_by(query, [event: event], desc: event.created_at, asc: event.id)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_order_by_search_rank(query, %{mode: :fts, query: search}) do
|
||||||
|
order_by(
|
||||||
|
query,
|
||||||
|
[event: event],
|
||||||
|
desc: fragment(@fts_rank_fragment, event.content, ^search),
|
||||||
|
desc: event.created_at,
|
||||||
|
asc: event.id
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_order_by_search_rank(query, %{mode: :trigram, query: search}) do
|
||||||
|
order_by(
|
||||||
|
query,
|
||||||
|
[event: event],
|
||||||
|
desc: fragment(@trigram_rank_fragment, ^search, event.content),
|
||||||
|
desc: event.created_at,
|
||||||
|
asc: event.id
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_select_search_score(query, nil), do: query
|
||||||
|
|
||||||
|
defp maybe_select_search_score(query, %{mode: :fts, query: search}) do
|
||||||
|
select_merge(
|
||||||
|
query,
|
||||||
|
[event: event],
|
||||||
|
%{search_score: fragment(@fts_rank_fragment, event.content, ^search)}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_select_search_score(query, %{mode: :trigram, query: search}) do
|
||||||
|
select_merge(
|
||||||
|
query,
|
||||||
|
[event: event],
|
||||||
|
%{search_score: fragment(@trigram_rank_fragment, ^search, event.content)}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp search_plan(nil), do: nil
|
||||||
|
|
||||||
|
defp search_plan(search) when is_binary(search) do
|
||||||
|
normalized_search = String.trim(search)
|
||||||
|
|
||||||
|
cond do
|
||||||
|
normalized_search == "" ->
|
||||||
|
nil
|
||||||
|
|
||||||
|
trigram_fallback_search?(normalized_search) ->
|
||||||
|
%{mode: :trigram, query: normalized_search}
|
||||||
|
|
||||||
|
true ->
|
||||||
|
%{mode: :fts, query: normalized_search}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp trigram_fallback_search?(search) do
|
||||||
|
String.match?(search, @trigram_fallback_pattern) or short_single_term_search?(search)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp short_single_term_search?(search) do
|
||||||
|
case String.split(search, ~r/\s+/, trim: true) do
|
||||||
|
[term] -> String.length(term) <= @trigram_fallback_max_single_term_length
|
||||||
|
_other -> false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
defp deduplicate_events(events) do
|
defp deduplicate_events(events) do
|
||||||
events
|
events
|
||||||
|> Enum.reduce(%{}, fn event, acc -> Map.put_new(acc, event.id, event) end)
|
|> Enum.reduce(%{}, fn event, acc ->
|
||||||
|
Map.update(acc, event.id, event, fn existing -> preferred_event(existing, event) end)
|
||||||
|
end)
|
||||||
|> Map.values()
|
|> Map.values()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp sort_persisted_events(events) do
|
defp sort_persisted_events(events, filters) do
|
||||||
Enum.sort(events, fn left, right ->
|
if Enum.any?(filters, &search_filter?/1) do
|
||||||
cond do
|
Enum.sort(events, &search_result_sorter/2)
|
||||||
left.created_at > right.created_at -> true
|
else
|
||||||
left.created_at < right.created_at -> false
|
Enum.sort(events, &chronological_sorter/2)
|
||||||
true -> left.id < right.id
|
end
|
||||||
end
|
|
||||||
end)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_apply_query_limit(events, opts) do
|
defp maybe_apply_query_limit(events, opts) do
|
||||||
@@ -921,6 +1006,50 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
|||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp preferred_event(existing, candidate) do
|
||||||
|
if search_result_sorter(candidate, existing) do
|
||||||
|
candidate
|
||||||
|
else
|
||||||
|
existing
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp search_filter?(filter) do
|
||||||
|
filter
|
||||||
|
|> Map.get("search")
|
||||||
|
|> search_plan()
|
||||||
|
|> Kernel.!=(nil)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp search_result_sorter(left, right) do
|
||||||
|
left_score = search_score(left)
|
||||||
|
right_score = search_score(right)
|
||||||
|
|
||||||
|
cond do
|
||||||
|
left_score > right_score -> true
|
||||||
|
left_score < right_score -> false
|
||||||
|
true -> chronological_sorter(left, right)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp chronological_sorter(left, right) do
|
||||||
|
cond do
|
||||||
|
left.created_at > right.created_at -> true
|
||||||
|
left.created_at < right.created_at -> false
|
||||||
|
true -> left.id < right.id
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp search_score(event) do
|
||||||
|
event
|
||||||
|
|> Map.get(:search_score, 0.0)
|
||||||
|
|> case do
|
||||||
|
score when is_float(score) -> score
|
||||||
|
score when is_integer(score) -> score / 1
|
||||||
|
_other -> 0.0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
defp normalize_persisted_tags(tags) when is_list(tags), do: tags
|
defp normalize_persisted_tags(tags) when is_list(tags), do: tags
|
||||||
defp normalize_persisted_tags(_tags), do: []
|
defp normalize_persisted_tags(_tags), do: []
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
defmodule Parrhesia.Repo.Migrations.AddNip50FtsAndTrigramSearch do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
def up do
|
||||||
|
execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||||
|
|
||||||
|
execute("""
|
||||||
|
CREATE INDEX events_content_fts_idx
|
||||||
|
ON events
|
||||||
|
USING GIN (to_tsvector('simple', content))
|
||||||
|
WHERE deleted_at IS NULL
|
||||||
|
""")
|
||||||
|
|
||||||
|
execute("""
|
||||||
|
CREATE INDEX events_content_trgm_idx
|
||||||
|
ON events
|
||||||
|
USING GIN (content gin_trgm_ops)
|
||||||
|
WHERE deleted_at IS NULL
|
||||||
|
""")
|
||||||
|
end
|
||||||
|
|
||||||
|
def down do
|
||||||
|
execute("DROP INDEX IF EXISTS events_content_trgm_idx")
|
||||||
|
execute("DROP INDEX IF EXISTS events_content_fts_idx")
|
||||||
|
execute("DROP EXTENSION IF EXISTS pg_trgm")
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -264,6 +264,48 @@ defmodule Parrhesia.Storage.Adapters.Postgres.EventsQueryCountTest do
|
|||||||
assert {:ok, 0} = Events.count(%{}, filters, requester_pubkeys: [])
|
assert {:ok, 0} = Events.count(%{}, filters, requester_pubkeys: [])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "search ranks FTS matches by relevance and applies limit after ranking" do
|
||||||
|
stronger_match =
|
||||||
|
persist_event(%{
|
||||||
|
"kind" => 1,
|
||||||
|
"created_at" => 1_700_000_210,
|
||||||
|
"content" => "relay relay relay search ranking"
|
||||||
|
})
|
||||||
|
|
||||||
|
_newer_weaker_match =
|
||||||
|
persist_event(%{
|
||||||
|
"kind" => 1,
|
||||||
|
"created_at" => 1_700_000_211,
|
||||||
|
"content" => "relay only"
|
||||||
|
})
|
||||||
|
|
||||||
|
filters = [%{"kinds" => [1], "search" => "relay", "limit" => 1}]
|
||||||
|
|
||||||
|
assert {:ok, [result]} = Events.query(%{}, filters, [])
|
||||||
|
assert result["id"] == stronger_match["id"]
|
||||||
|
assert {:ok, 2} = Events.count(%{}, filters, [])
|
||||||
|
end
|
||||||
|
|
||||||
|
test "search falls back to trigram matching for short prefixes" do
|
||||||
|
matching =
|
||||||
|
persist_event(%{
|
||||||
|
"kind" => 1,
|
||||||
|
"content" => "alpha relay note"
|
||||||
|
})
|
||||||
|
|
||||||
|
_other =
|
||||||
|
persist_event(%{
|
||||||
|
"kind" => 1,
|
||||||
|
"content" => "omega relay note"
|
||||||
|
})
|
||||||
|
|
||||||
|
filters = [%{"kinds" => [1], "search" => "alph"}]
|
||||||
|
|
||||||
|
assert {:ok, [result]} = Events.query(%{}, filters, [])
|
||||||
|
assert result["id"] == matching["id"]
|
||||||
|
assert {:ok, 1} = Events.count(%{}, filters, [])
|
||||||
|
end
|
||||||
|
|
||||||
test "search treats % and _ as literals" do
|
test "search treats % and _ as literals" do
|
||||||
matching =
|
matching =
|
||||||
persist_event(%{
|
persist_event(%{
|
||||||
|
|||||||
Reference in New Issue
Block a user