Upgrade NIP-50 search to ranked Postgres FTS
This commit is contained in:
@@ -34,6 +34,8 @@ Current `supported_nips` list:
|
||||
|
||||
`43` is advertised when the built-in NIP-43 relay access flow is enabled. Parrhesia generates relay-signed `28935` invite responses on `REQ`, validates join and leave requests locally, and publishes the resulting signed `8000`, `8001`, and `13534` relay membership events into its own local event store.
|
||||
|
||||
`50` uses ranked PostgreSQL full-text search over event `content` by default. Parrhesia applies the filter `limit` after ordering by match quality, and falls back to trigram-backed substring matching for short or symbol-heavy queries such as search-as-you-type prefixes, domains, and punctuation-rich tokens.
|
||||
|
||||
`66` is advertised when the built-in NIP-66 publisher is enabled and has at least one relay target. The default config enables it for the `public` relay URL. Parrhesia probes those target relays, collects the resulting NIP-11 / websocket liveness data, and then publishes the signed `10166` and `30166` events locally on this relay.
|
||||
|
||||
## Requirements
|
||||
|
||||
@@ -9,6 +9,11 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
alias Parrhesia.Repo
|
||||
|
||||
@behaviour Parrhesia.Storage.Events
|
||||
@trigram_fallback_max_single_term_length 4
|
||||
@trigram_fallback_pattern ~r/[^\p{L}\p{N}\s"]/u
|
||||
@fts_match_fragment "to_tsvector('simple', ?) @@ websearch_to_tsquery('simple', ?)"
|
||||
@fts_rank_fragment "ts_rank_cd(to_tsvector('simple', ?), websearch_to_tsquery('simple', ?))"
|
||||
@trigram_rank_fragment "word_similarity(lower(?), lower(?))"
|
||||
|
||||
@type normalized_event :: %{
|
||||
id: binary(),
|
||||
@@ -85,7 +90,7 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
|> Repo.all()
|
||||
end)
|
||||
|> deduplicate_events()
|
||||
|> sort_persisted_events()
|
||||
|> sort_persisted_events(filters)
|
||||
|> maybe_apply_query_limit(opts)
|
||||
|
||||
{:ok, Enum.map(persisted_events, &to_nostr_event/1)}
|
||||
@@ -607,11 +612,12 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
end
|
||||
|
||||
defp event_query_for_filter(filter, now, opts) do
|
||||
search_plan = search_plan(Map.get(filter, "search"))
|
||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||
|
||||
base_query
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
||||
|> order_by([event: event], desc: event.created_at, asc: event.id)
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||
|> maybe_order_by_search_rank(search_plan)
|
||||
|> select([event: event], %{
|
||||
id: event.id,
|
||||
pubkey: event.pubkey,
|
||||
@@ -621,14 +627,16 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
content: event.content,
|
||||
sig: event.sig
|
||||
})
|
||||
|> maybe_select_search_score(search_plan)
|
||||
|> maybe_limit_query(effective_filter_limit(filter, opts))
|
||||
end
|
||||
|
||||
defp event_id_query_for_filter(filter, now, opts) do
|
||||
search_plan = search_plan(Map.get(filter, "search"))
|
||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||
|
||||
base_query
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||
|> select([event: event], event.id)
|
||||
end
|
||||
|
||||
@@ -647,10 +655,11 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
end
|
||||
|
||||
defp event_ref_query_for_filter(filter, now, opts) do
|
||||
search_plan = search_plan(Map.get(filter, "search"))
|
||||
{base_query, remaining_tag_filters} = event_source_query(filter, now)
|
||||
|
||||
base_query
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts)
|
||||
|> apply_common_event_filters(filter, remaining_tag_filters, opts, search_plan)
|
||||
|> order_by([event: event], asc: event.created_at, asc: event.id)
|
||||
|> select([event: event], %{
|
||||
created_at: event.created_at,
|
||||
@@ -744,14 +753,14 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
end
|
||||
end
|
||||
|
||||
defp apply_common_event_filters(query, filter, remaining_tag_filters, opts) do
|
||||
defp apply_common_event_filters(query, filter, remaining_tag_filters, opts, search_plan) do
|
||||
query
|
||||
|> maybe_filter_ids(Map.get(filter, "ids"))
|
||||
|> maybe_filter_authors(Map.get(filter, "authors"))
|
||||
|> maybe_filter_kinds(Map.get(filter, "kinds"))
|
||||
|> maybe_filter_since(Map.get(filter, "since"))
|
||||
|> maybe_filter_until(Map.get(filter, "until"))
|
||||
|> maybe_filter_search(Map.get(filter, "search"))
|
||||
|> maybe_filter_search(search_plan)
|
||||
|> filter_by_tag_filters(remaining_tag_filters)
|
||||
|> maybe_restrict_giftwrap_access(filter, opts)
|
||||
end
|
||||
@@ -792,13 +801,19 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
|
||||
defp maybe_filter_search(query, nil), do: query
|
||||
|
||||
defp maybe_filter_search(query, search) when is_binary(search) and search != "" do
|
||||
defp maybe_filter_search(query, %{mode: :fts, query: search}) do
|
||||
where(
|
||||
query,
|
||||
[event: event],
|
||||
fragment(@fts_match_fragment, event.content, ^search)
|
||||
)
|
||||
end
|
||||
|
||||
defp maybe_filter_search(query, %{mode: :trigram, query: search}) do
|
||||
escaped_search = escape_like_pattern(search)
|
||||
where(query, [event: event], ilike(event.content, ^"%#{escaped_search}%"))
|
||||
end
|
||||
|
||||
defp maybe_filter_search(query, _search), do: query
|
||||
|
||||
defp escape_like_pattern(search) do
|
||||
search
|
||||
|> String.replace("\\", "\\\\")
|
||||
@@ -886,20 +901,90 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
defp maybe_limit_query(query, nil), do: query
|
||||
defp maybe_limit_query(query, limit), do: limit(query, ^limit)
|
||||
|
||||
defp maybe_order_by_search_rank(query, nil) do
|
||||
order_by(query, [event: event], desc: event.created_at, asc: event.id)
|
||||
end
|
||||
|
||||
defp maybe_order_by_search_rank(query, %{mode: :fts, query: search}) do
|
||||
order_by(
|
||||
query,
|
||||
[event: event],
|
||||
desc: fragment(@fts_rank_fragment, event.content, ^search),
|
||||
desc: event.created_at,
|
||||
asc: event.id
|
||||
)
|
||||
end
|
||||
|
||||
defp maybe_order_by_search_rank(query, %{mode: :trigram, query: search}) do
|
||||
order_by(
|
||||
query,
|
||||
[event: event],
|
||||
desc: fragment(@trigram_rank_fragment, ^search, event.content),
|
||||
desc: event.created_at,
|
||||
asc: event.id
|
||||
)
|
||||
end
|
||||
|
||||
defp maybe_select_search_score(query, nil), do: query
|
||||
|
||||
defp maybe_select_search_score(query, %{mode: :fts, query: search}) do
|
||||
select_merge(
|
||||
query,
|
||||
[event: event],
|
||||
%{search_score: fragment(@fts_rank_fragment, event.content, ^search)}
|
||||
)
|
||||
end
|
||||
|
||||
defp maybe_select_search_score(query, %{mode: :trigram, query: search}) do
|
||||
select_merge(
|
||||
query,
|
||||
[event: event],
|
||||
%{search_score: fragment(@trigram_rank_fragment, ^search, event.content)}
|
||||
)
|
||||
end
|
||||
|
||||
defp search_plan(nil), do: nil
|
||||
|
||||
defp search_plan(search) when is_binary(search) do
|
||||
normalized_search = String.trim(search)
|
||||
|
||||
cond do
|
||||
normalized_search == "" ->
|
||||
nil
|
||||
|
||||
trigram_fallback_search?(normalized_search) ->
|
||||
%{mode: :trigram, query: normalized_search}
|
||||
|
||||
true ->
|
||||
%{mode: :fts, query: normalized_search}
|
||||
end
|
||||
end
|
||||
|
||||
defp trigram_fallback_search?(search) do
|
||||
String.match?(search, @trigram_fallback_pattern) or short_single_term_search?(search)
|
||||
end
|
||||
|
||||
defp short_single_term_search?(search) do
|
||||
case String.split(search, ~r/\s+/, trim: true) do
|
||||
[term] -> String.length(term) <= @trigram_fallback_max_single_term_length
|
||||
_other -> false
|
||||
end
|
||||
end
|
||||
|
||||
defp deduplicate_events(events) do
|
||||
events
|
||||
|> Enum.reduce(%{}, fn event, acc -> Map.put_new(acc, event.id, event) end)
|
||||
|> Enum.reduce(%{}, fn event, acc ->
|
||||
Map.update(acc, event.id, event, fn existing -> preferred_event(existing, event) end)
|
||||
end)
|
||||
|> Map.values()
|
||||
end
|
||||
|
||||
defp sort_persisted_events(events) do
|
||||
Enum.sort(events, fn left, right ->
|
||||
cond do
|
||||
left.created_at > right.created_at -> true
|
||||
left.created_at < right.created_at -> false
|
||||
true -> left.id < right.id
|
||||
end
|
||||
end)
|
||||
defp sort_persisted_events(events, filters) do
|
||||
if Enum.any?(filters, &search_filter?/1) do
|
||||
Enum.sort(events, &search_result_sorter/2)
|
||||
else
|
||||
Enum.sort(events, &chronological_sorter/2)
|
||||
end
|
||||
end
|
||||
|
||||
defp maybe_apply_query_limit(events, opts) do
|
||||
@@ -921,6 +1006,50 @@ defmodule Parrhesia.Storage.Adapters.Postgres.Events do
|
||||
}
|
||||
end
|
||||
|
||||
defp preferred_event(existing, candidate) do
|
||||
if search_result_sorter(candidate, existing) do
|
||||
candidate
|
||||
else
|
||||
existing
|
||||
end
|
||||
end
|
||||
|
||||
defp search_filter?(filter) do
|
||||
filter
|
||||
|> Map.get("search")
|
||||
|> search_plan()
|
||||
|> Kernel.!=(nil)
|
||||
end
|
||||
|
||||
defp search_result_sorter(left, right) do
|
||||
left_score = search_score(left)
|
||||
right_score = search_score(right)
|
||||
|
||||
cond do
|
||||
left_score > right_score -> true
|
||||
left_score < right_score -> false
|
||||
true -> chronological_sorter(left, right)
|
||||
end
|
||||
end
|
||||
|
||||
defp chronological_sorter(left, right) do
|
||||
cond do
|
||||
left.created_at > right.created_at -> true
|
||||
left.created_at < right.created_at -> false
|
||||
true -> left.id < right.id
|
||||
end
|
||||
end
|
||||
|
||||
defp search_score(event) do
|
||||
event
|
||||
|> Map.get(:search_score, 0.0)
|
||||
|> case do
|
||||
score when is_float(score) -> score
|
||||
score when is_integer(score) -> score / 1
|
||||
_other -> 0.0
|
||||
end
|
||||
end
|
||||
|
||||
defp normalize_persisted_tags(tags) when is_list(tags), do: tags
|
||||
defp normalize_persisted_tags(_tags), do: []
|
||||
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
defmodule Parrhesia.Repo.Migrations.AddNip50FtsAndTrigramSearch do
|
||||
use Ecto.Migration
|
||||
|
||||
def up do
|
||||
execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||
|
||||
execute("""
|
||||
CREATE INDEX events_content_fts_idx
|
||||
ON events
|
||||
USING GIN (to_tsvector('simple', content))
|
||||
WHERE deleted_at IS NULL
|
||||
""")
|
||||
|
||||
execute("""
|
||||
CREATE INDEX events_content_trgm_idx
|
||||
ON events
|
||||
USING GIN (content gin_trgm_ops)
|
||||
WHERE deleted_at IS NULL
|
||||
""")
|
||||
end
|
||||
|
||||
def down do
|
||||
execute("DROP INDEX IF EXISTS events_content_trgm_idx")
|
||||
execute("DROP INDEX IF EXISTS events_content_fts_idx")
|
||||
execute("DROP EXTENSION IF EXISTS pg_trgm")
|
||||
end
|
||||
end
|
||||
@@ -264,6 +264,48 @@ defmodule Parrhesia.Storage.Adapters.Postgres.EventsQueryCountTest do
|
||||
assert {:ok, 0} = Events.count(%{}, filters, requester_pubkeys: [])
|
||||
end
|
||||
|
||||
test "search ranks FTS matches by relevance and applies limit after ranking" do
|
||||
stronger_match =
|
||||
persist_event(%{
|
||||
"kind" => 1,
|
||||
"created_at" => 1_700_000_210,
|
||||
"content" => "relay relay relay search ranking"
|
||||
})
|
||||
|
||||
_newer_weaker_match =
|
||||
persist_event(%{
|
||||
"kind" => 1,
|
||||
"created_at" => 1_700_000_211,
|
||||
"content" => "relay only"
|
||||
})
|
||||
|
||||
filters = [%{"kinds" => [1], "search" => "relay", "limit" => 1}]
|
||||
|
||||
assert {:ok, [result]} = Events.query(%{}, filters, [])
|
||||
assert result["id"] == stronger_match["id"]
|
||||
assert {:ok, 2} = Events.count(%{}, filters, [])
|
||||
end
|
||||
|
||||
test "search falls back to trigram matching for short prefixes" do
|
||||
matching =
|
||||
persist_event(%{
|
||||
"kind" => 1,
|
||||
"content" => "alpha relay note"
|
||||
})
|
||||
|
||||
_other =
|
||||
persist_event(%{
|
||||
"kind" => 1,
|
||||
"content" => "omega relay note"
|
||||
})
|
||||
|
||||
filters = [%{"kinds" => [1], "search" => "alph"}]
|
||||
|
||||
assert {:ok, [result]} = Events.query(%{}, filters, [])
|
||||
assert result["id"] == matching["id"]
|
||||
assert {:ok, 1} = Events.count(%{}, filters, [])
|
||||
end
|
||||
|
||||
test "search treats % and _ as literals" do
|
||||
matching =
|
||||
persist_event(%{
|
||||
|
||||
Reference in New Issue
Block a user