spc-pleroma/lib/pleroma/search/qdrant_search.ex

168 lines
4.3 KiB
Elixir
Raw Normal View History

2024-05-14 10:13:37 +00:00
defmodule Pleroma.Search.QdrantSearch do
@behaviour Pleroma.Search.SearchBackend
import Ecto.Query
2024-05-18 11:02:22 +00:00
2024-05-14 10:13:37 +00:00
alias Pleroma.Activity
2024-05-18 11:02:22 +00:00
alias Pleroma.Config.Getting, as: Config
2024-05-14 10:13:37 +00:00
2024-05-19 08:17:46 +00:00
alias __MODULE__.OpenAIClient
2024-05-18 11:02:22 +00:00
alias __MODULE__.QdrantClient
2024-05-14 10:13:37 +00:00
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
2024-05-14 13:19:36 +00:00
@impl true
2024-05-18 11:02:22 +00:00
def create_index do
2024-05-18 09:43:47 +00:00
payload = Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
2024-05-16 06:47:24 +00:00
with {:ok, %{status: 200}} <- QdrantClient.put("/collections/posts", payload) do
:ok
else
e -> {:error, e}
end
2024-05-14 10:13:37 +00:00
end
2024-05-16 06:47:24 +00:00
@impl true
2024-05-18 11:02:22 +00:00
def drop_index do
2024-05-16 06:47:24 +00:00
with {:ok, %{status: 200}} <- QdrantClient.delete("/collections/posts") do
:ok
else
e -> {:error, e}
end
2024-05-14 10:13:37 +00:00
end
def get_embedding(text) do
2024-05-19 08:17:46 +00:00
with {:ok, %{body: %{"data" => [%{"embedding" => embedding}]}}} <-
OpenAIClient.post("/v1/embeddings", %{
input: text,
model: Config.get([Pleroma.Search.QdrantSearch, :openai_model])
2024-05-14 10:26:41 +00:00
}) do
2024-05-14 10:13:37 +00:00
{:ok, embedding}
else
_ ->
{:error, "Failed to get embedding"}
end
end
defp actor_from_activity(%{data: %{"actor" => actor}}) do
actor
end
defp actor_from_activity(_), do: nil
2024-05-14 10:13:37 +00:00
defp build_index_payload(activity, embedding) do
actor = actor_from_activity(activity)
published_at = activity.data["published"]
2024-05-14 10:13:37 +00:00
%{
points: [
%{
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
vector: embedding,
payload: %{actor: actor, published_at: published_at}
2024-05-14 10:13:37 +00:00
}
]
}
end
2024-05-19 10:41:05 +00:00
defp build_search_payload(embedding, options) do
base = %{
2024-05-14 10:13:37 +00:00
vector: embedding,
2024-05-19 10:41:05 +00:00
limit: options[:limit] || 20,
offset: options[:offset] || 0
2024-05-14 10:13:37 +00:00
}
if options[:actor] do
Map.put(base, :filter, %{
must: [%{key: "actor", match: %{value: options[:actor].ap_id}}]
})
else
base
end
2024-05-14 10:13:37 +00:00
end
@impl true
def add_to_index(activity) do
# This will only index public or unlisted notes
maybe_search_data = object_to_search_data(activity.object)
if activity.data["type"] == "Create" and maybe_search_data do
with {:ok, embedding} <- get_embedding(maybe_search_data.content),
{:ok, %{status: 200}} <-
QdrantClient.put(
"/collections/posts/points",
build_index_payload(activity, embedding)
) do
:ok
else
e -> {:error, e}
end
else
:ok
end
end
2024-05-18 10:04:32 +00:00
@impl true
def remove_from_index(object) do
activity = Activity.get_by_object_ap_id_with_object(object.data["id"])
id = activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()
with {:ok, %{status: 200}} <-
QdrantClient.post("/collections/posts/points/delete", %{"points" => [id]}) do
:ok
else
e -> {:error, e}
end
end
2024-05-14 10:13:37 +00:00
@impl true
2024-05-19 10:41:05 +00:00
def search(_user, query, options) do
2024-05-14 11:09:38 +00:00
query = "Represent this sentence for searching relevant passages: #{query}"
2024-05-14 10:13:37 +00:00
with {:ok, embedding} <- get_embedding(query),
{:ok, %{body: %{"result" => result}}} <-
2024-05-19 10:41:05 +00:00
QdrantClient.post(
"/collections/posts/points/search",
build_search_payload(embedding, options)
) do
2024-05-14 10:13:37 +00:00
ids =
Enum.map(result, fn %{"id" => id} ->
Ecto.UUID.dump!(id)
end)
from(a in Activity, where: a.id in ^ids)
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
|> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
|> Pleroma.Repo.all()
else
_ ->
[]
end
end
end
2024-05-19 08:17:46 +00:00
defmodule Pleroma.Search.QdrantSearch.OpenAIClient do
2024-05-14 10:13:37 +00:00
use Tesla
2024-05-18 09:43:47 +00:00
alias Pleroma.Config.Getting, as: Config
2024-05-14 10:13:37 +00:00
2024-05-19 08:17:46 +00:00
plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :openai_url]))
2024-05-14 10:13:37 +00:00
plug(Tesla.Middleware.JSON)
2024-05-19 08:17:46 +00:00
plug(Tesla.Middleware.Headers, [
{"Authorization",
"Bearer #{Pleroma.Config.get([Pleroma.Search.QdrantSearch, :openai_api_key])}"}
])
2024-05-14 10:13:37 +00:00
end
defmodule Pleroma.Search.QdrantSearch.QdrantClient do
use Tesla
2024-05-18 09:43:47 +00:00
alias Pleroma.Config.Getting, as: Config
2024-05-14 10:13:37 +00:00
2024-05-18 09:43:47 +00:00
plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
2024-05-14 10:13:37 +00:00
plug(Tesla.Middleware.JSON)
plug(Tesla.Middleware.Headers, [
{"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
])
end