Merge remote-tracking branch 'upstream/qdrant-search-2' into spc2

This commit is contained in:
Moon Man 2024-05-23 14:03:58 +00:00
commit 696b916c7c
2 changed files with 114 additions and 3 deletions

View File

@ -43,23 +43,41 @@ def get_embedding(text) do
end end
end end
defp actor_from_activity(%{data: %{"actor" => actor}}) do
actor
end
defp actor_from_activity(_), do: nil
defp build_index_payload(activity, embedding) do defp build_index_payload(activity, embedding) do
actor = actor_from_activity(activity)
published_at = activity.data["published"]
%{ %{
points: [ points: [
%{ %{
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(), id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
vector: embedding vector: embedding,
payload: %{actor: actor, published_at: published_at}
} }
] ]
} }
end end
defp build_search_payload(embedding, options) do defp build_search_payload(embedding, options) do
%{ base = %{
vector: embedding, vector: embedding,
limit: options[:limit] || 20, limit: options[:limit] || 20,
offset: options[:offset] || 0 offset: options[:offset] || 0
} }
if options[:actor] do
Map.put(base, :filter, %{
must: [%{key: "actor", match: %{value: options[:actor].ap_id}}]
})
else
base
end
end end
@impl true @impl true

View File

@ -15,6 +15,94 @@ defmodule Pleroma.Search.QdrantSearchTest do
alias Pleroma.Workers.SearchIndexingWorker alias Pleroma.Workers.SearchIndexingWorker
describe "Qdrant search" do describe "Qdrant search" do
test "searches for a term by encoding it and sending it to qdrant" do
user = insert(:user)
{:ok, activity} =
CommonAPI.post(user, %{
status: "guys i just don't wanna leave the swamp",
visibility: "public"
})
Config
|> expect(:get, 3, fn
[Pleroma.Search, :module], nil ->
QdrantSearch
[Pleroma.Search.QdrantSearch, key], nil ->
%{
openai_model: "a_model",
openai_url: "https://openai.url",
qdrant_url: "https://qdrant.url"
}[key]
end)
Tesla.Mock.mock(fn
%{url: "https://openai.url/v1/embeddings", method: :post} ->
Tesla.Mock.json(%{
data: [%{embedding: [1, 2, 3]}]
})
%{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
data = Jason.decode!(body)
refute data["filter"]
Tesla.Mock.json(%{
result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
})
end)
results = QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{})
assert results == [activity]
end
test "for a given actor, ask for only relevant matches" do
user = insert(:user)
{:ok, activity} =
CommonAPI.post(user, %{
status: "guys i just don't wanna leave the swamp",
visibility: "public"
})
Config
|> expect(:get, 3, fn
[Pleroma.Search, :module], nil ->
QdrantSearch
[Pleroma.Search.QdrantSearch, key], nil ->
%{
openai_model: "a_model",
openai_url: "https://openai.url",
qdrant_url: "https://qdrant.url"
}[key]
end)
Tesla.Mock.mock(fn
%{url: "https://openai.url/v1/embeddings", method: :post} ->
Tesla.Mock.json(%{
data: [%{embedding: [1, 2, 3]}]
})
%{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
data = Jason.decode!(body)
assert data["filter"] == %{
"must" => [%{"key" => "actor", "match" => %{"value" => user.ap_id}}]
}
Tesla.Mock.json(%{
result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
})
end)
results =
QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{actor: user})
assert results == [activity]
end
test "indexes a public post on creation, deletes from the index on deletion" do test "indexes a public post on creation, deletes from the index on deletion" do
user = insert(:user) user = insert(:user)
@ -29,7 +117,12 @@ test "indexes a public post on creation, deletes from the index on deletion" do
%{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} -> %{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} ->
send(self(), "posted_to_qdrant") send(self(), "posted_to_qdrant")
assert match?(%{"points" => [%{"vector" => [1, 2, 3]}]}, Jason.decode!(body)) data = Jason.decode!(body)
%{"points" => [%{"vector" => vector, "payload" => payload}]} = data
assert vector == [1, 2, 3]
assert payload["actor"]
assert payload["published_at"]
Tesla.Mock.json("ok") Tesla.Mock.json("ok")