From 94e4f215896dc7976a54fd146daf3e286602925a Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Thu, 23 May 2024 14:38:30 +0400 Subject: [PATCH] QdrantSearch: Deal with actor restrictions --- lib/pleroma/search/qdrant_search.ex | 22 ++++- test/pleroma/search/qdrant_search_test.exs | 95 +++++++++++++++++++++- 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex index 283c43075..9cb34ef71 100644 --- a/lib/pleroma/search/qdrant_search.ex +++ b/lib/pleroma/search/qdrant_search.ex @@ -43,23 +43,41 @@ def get_embedding(text) do end end + defp actor_from_activity(%{data: %{"actor" => actor}}) do + actor + end + + defp actor_from_activity(_), do: nil + defp build_index_payload(activity, embedding) do + actor = actor_from_activity(activity) + published_at = activity.data["published"] + %{ points: [ %{ id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(), - vector: embedding + vector: embedding, + payload: %{actor: actor, published_at: published_at} } ] } end defp build_search_payload(embedding, options) do - %{ + base = %{ vector: embedding, limit: options[:limit] || 20, offset: options[:offset] || 0 } + + if options[:actor] do + Map.put(base, :filter, %{ + must: [%{key: "actor", match: %{value: options[:actor].ap_id}}] + }) + else + base + end end @impl true diff --git a/test/pleroma/search/qdrant_search_test.exs b/test/pleroma/search/qdrant_search_test.exs index a2f9cc7ec..371074dcf 100644 --- a/test/pleroma/search/qdrant_search_test.exs +++ b/test/pleroma/search/qdrant_search_test.exs @@ -15,6 +15,94 @@ defmodule Pleroma.Search.QdrantSearchTest do alias Pleroma.Workers.SearchIndexingWorker describe "Qdrant search" do + test "searches for a term by encoding it and sending it to qdrant" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + Config + |> expect(:get, 3, fn + [Pleroma.Search, :module], nil -> + QdrantSearch + + [Pleroma.Search.QdrantSearch, key], nil -> + %{ + openai_model: "a_model", + openai_url: "https://openai.url", + qdrant_url: "https://qdrant.url" + }[key] + end) + + Tesla.Mock.mock(fn + %{url: "https://openai.url/v1/embeddings", method: :post} -> + Tesla.Mock.json(%{ + data: [%{embedding: [1, 2, 3]}] + }) + + %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} -> + data = Jason.decode!(body) + refute data["filter"] + + Tesla.Mock.json(%{ + result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}] + }) + end) + + results = QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{}) + + assert results == [activity] + end + + test "for a given actor, ask for only relevant matches" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + Config + |> expect(:get, 3, fn + [Pleroma.Search, :module], nil -> + QdrantSearch + + [Pleroma.Search.QdrantSearch, key], nil -> + %{ + openai_model: "a_model", + openai_url: "https://openai.url", + qdrant_url: "https://qdrant.url" + }[key] + end) + + Tesla.Mock.mock(fn + %{url: "https://openai.url/v1/embeddings", method: :post} -> + Tesla.Mock.json(%{ + data: [%{embedding: [1, 2, 3]}] + }) + + %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} -> + data = Jason.decode!(body) + + assert data["filter"] == %{ + "must" => [%{"key" => "actor", "match" => %{"value" => user.ap_id}}] + } + + Tesla.Mock.json(%{ + result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}] + }) + end) + + results = + QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{actor: user}) + + assert results == [activity] + end + test "indexes a public post on creation, deletes from the index on deletion" do user = insert(:user) @@ -29,7 +117,12 @@ test "indexes a public post on creation, deletes from the index on deletion" do %{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} -> send(self(), "posted_to_qdrant") - assert match?(%{"points" => [%{"vector" => [1, 2, 3]}]}, Jason.decode!(body)) + data = Jason.decode!(body) + %{"points" => [%{"vector" => vector, "payload" => payload}]} = data + + assert vector == [1, 2, 3] + assert payload["actor"] + assert payload["published_at"] Tesla.Mock.json("ok")