2024-05-14 10:13:37 +00:00
|
|
|
defmodule Pleroma.Search.QdrantSearch do
|
|
|
|
@behaviour Pleroma.Search.SearchBackend
|
|
|
|
import Ecto.Query
|
2024-05-18 11:02:22 +00:00
|
|
|
|
2024-05-14 10:13:37 +00:00
|
|
|
alias Pleroma.Activity
|
2024-05-18 11:02:22 +00:00
|
|
|
alias Pleroma.Config.Getting, as: Config
|
2024-05-14 10:13:37 +00:00
|
|
|
|
2024-05-19 08:17:46 +00:00
|
|
|
alias __MODULE__.OpenAIClient
|
2024-05-18 11:02:22 +00:00
|
|
|
alias __MODULE__.QdrantClient
|
2024-05-14 10:13:37 +00:00
|
|
|
|
|
|
|
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
|
2024-05-27 10:35:08 +00:00
|
|
|
import Pleroma.Search.DatabaseSearch, only: [maybe_fetch: 3]
|
2024-05-14 10:13:37 +00:00
|
|
|
|
2024-05-14 13:19:36 +00:00
|
|
|
@impl true
|
2024-05-18 11:02:22 +00:00
|
|
|
def create_index do
|
2024-05-18 09:43:47 +00:00
|
|
|
payload = Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
|
2024-05-16 06:47:24 +00:00
|
|
|
|
|
|
|
with {:ok, %{status: 200}} <- QdrantClient.put("/collections/posts", payload) do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
e -> {:error, e}
|
|
|
|
end
|
2024-05-14 10:13:37 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 06:47:24 +00:00
|
|
|
@impl true
|
2024-05-18 11:02:22 +00:00
|
|
|
def drop_index do
|
2024-05-16 06:47:24 +00:00
|
|
|
with {:ok, %{status: 200}} <- QdrantClient.delete("/collections/posts") do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
e -> {:error, e}
|
|
|
|
end
|
2024-05-14 10:13:37 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def get_embedding(text) do
|
2024-05-19 08:17:46 +00:00
|
|
|
with {:ok, %{body: %{"data" => [%{"embedding" => embedding}]}}} <-
|
|
|
|
OpenAIClient.post("/v1/embeddings", %{
|
|
|
|
input: text,
|
|
|
|
model: Config.get([Pleroma.Search.QdrantSearch, :openai_model])
|
2024-05-14 10:26:41 +00:00
|
|
|
}) do
|
2024-05-14 10:13:37 +00:00
|
|
|
{:ok, embedding}
|
|
|
|
else
|
|
|
|
_ ->
|
|
|
|
{:error, "Failed to get embedding"}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-05-23 10:38:30 +00:00
|
|
|
defp actor_from_activity(%{data: %{"actor" => actor}}) do
|
|
|
|
actor
|
|
|
|
end
|
|
|
|
|
|
|
|
defp actor_from_activity(_), do: nil
|
|
|
|
|
2024-05-14 10:13:37 +00:00
|
|
|
defp build_index_payload(activity, embedding) do
|
2024-05-23 10:38:30 +00:00
|
|
|
actor = actor_from_activity(activity)
|
|
|
|
published_at = activity.data["published"]
|
|
|
|
|
2024-05-14 10:13:37 +00:00
|
|
|
%{
|
|
|
|
points: [
|
|
|
|
%{
|
|
|
|
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
|
2024-05-23 10:38:30 +00:00
|
|
|
vector: embedding,
|
|
|
|
payload: %{actor: actor, published_at: published_at}
|
2024-05-14 10:13:37 +00:00
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2024-05-19 10:41:05 +00:00
|
|
|
defp build_search_payload(embedding, options) do
|
2024-05-23 10:38:30 +00:00
|
|
|
base = %{
|
2024-05-14 10:13:37 +00:00
|
|
|
vector: embedding,
|
2024-05-19 10:41:05 +00:00
|
|
|
limit: options[:limit] || 20,
|
|
|
|
offset: options[:offset] || 0
|
2024-05-14 10:13:37 +00:00
|
|
|
}
|
2024-05-23 10:38:30 +00:00
|
|
|
|
2024-05-23 14:55:16 +00:00
|
|
|
if author = options[:author] do
|
2024-05-23 10:38:30 +00:00
|
|
|
Map.put(base, :filter, %{
|
2024-05-23 14:55:16 +00:00
|
|
|
must: [%{key: "actor", match: %{value: author.ap_id}}]
|
2024-05-23 10:38:30 +00:00
|
|
|
})
|
|
|
|
else
|
|
|
|
base
|
|
|
|
end
|
2024-05-14 10:13:37 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
@impl true
|
|
|
|
def add_to_index(activity) do
|
|
|
|
# This will only index public or unlisted notes
|
|
|
|
maybe_search_data = object_to_search_data(activity.object)
|
|
|
|
|
|
|
|
if activity.data["type"] == "Create" and maybe_search_data do
|
|
|
|
with {:ok, embedding} <- get_embedding(maybe_search_data.content),
|
|
|
|
{:ok, %{status: 200}} <-
|
|
|
|
QdrantClient.put(
|
|
|
|
"/collections/posts/points",
|
|
|
|
build_index_payload(activity, embedding)
|
|
|
|
) do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
e -> {:error, e}
|
|
|
|
end
|
|
|
|
else
|
|
|
|
:ok
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-05-18 10:04:32 +00:00
|
|
|
@impl true
|
|
|
|
def remove_from_index(object) do
|
|
|
|
activity = Activity.get_by_object_ap_id_with_object(object.data["id"])
|
|
|
|
id = activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()
|
|
|
|
|
|
|
|
with {:ok, %{status: 200}} <-
|
|
|
|
QdrantClient.post("/collections/posts/points/delete", %{"points" => [id]}) do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
e -> {:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-05-14 10:13:37 +00:00
|
|
|
@impl true
|
2024-05-27 10:35:08 +00:00
|
|
|
def search(user, original_query, options) do
|
|
|
|
query = "Represent this sentence for searching relevant passages: #{original_query}"
|
2024-05-14 11:09:38 +00:00
|
|
|
|
2024-05-14 10:13:37 +00:00
|
|
|
with {:ok, embedding} <- get_embedding(query),
|
|
|
|
{:ok, %{body: %{"result" => result}}} <-
|
2024-05-19 10:41:05 +00:00
|
|
|
QdrantClient.post(
|
|
|
|
"/collections/posts/points/search",
|
|
|
|
build_search_payload(embedding, options)
|
|
|
|
) do
|
2024-05-14 10:13:37 +00:00
|
|
|
ids =
|
|
|
|
Enum.map(result, fn %{"id" => id} ->
|
|
|
|
Ecto.UUID.dump!(id)
|
|
|
|
end)
|
|
|
|
|
|
|
|
from(a in Activity, where: a.id in ^ids)
|
|
|
|
|> Activity.with_preloaded_object()
|
|
|
|
|> Activity.restrict_deactivated_users()
|
|
|
|
|> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
|
|
|
|
|> Pleroma.Repo.all()
|
2024-05-27 10:35:08 +00:00
|
|
|
|> maybe_fetch(user, original_query)
|
2024-05-14 10:13:37 +00:00
|
|
|
else
|
|
|
|
_ ->
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
2024-05-27 09:57:42 +00:00
|
|
|
|
|
|
|
@impl true
|
|
|
|
def healthcheck_endpoints do
|
|
|
|
qdrant_health =
|
|
|
|
Config.get([Pleroma.Search.QdrantSearch, :qdrant_url])
|
|
|
|
|> URI.parse()
|
|
|
|
|> Map.put(:path, "/healthz")
|
|
|
|
|> URI.to_string()
|
|
|
|
|
2024-05-27 10:21:55 +00:00
|
|
|
openai_health = Config.get([Pleroma.Search.QdrantSearch, :openai_healthcheck_url])
|
|
|
|
|
|
|
|
[qdrant_health, openai_health] |> Enum.filter(& &1)
|
2024-05-27 09:57:42 +00:00
|
|
|
end
|
2024-05-14 10:13:37 +00:00
|
|
|
end
|
|
|
|
|
2024-05-19 08:17:46 +00:00
|
|
|
defmodule Pleroma.Search.QdrantSearch.OpenAIClient do
|
2024-05-14 10:13:37 +00:00
|
|
|
use Tesla
|
2024-05-18 09:43:47 +00:00
|
|
|
alias Pleroma.Config.Getting, as: Config
|
2024-05-14 10:13:37 +00:00
|
|
|
|
2024-05-19 08:17:46 +00:00
|
|
|
plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :openai_url]))
|
2024-05-14 10:13:37 +00:00
|
|
|
plug(Tesla.Middleware.JSON)
|
2024-05-19 08:17:46 +00:00
|
|
|
|
|
|
|
plug(Tesla.Middleware.Headers, [
|
|
|
|
{"Authorization",
|
|
|
|
"Bearer #{Pleroma.Config.get([Pleroma.Search.QdrantSearch, :openai_api_key])}"}
|
|
|
|
])
|
2024-05-14 10:13:37 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
defmodule Pleroma.Search.QdrantSearch.QdrantClient do
|
|
|
|
use Tesla
|
2024-05-18 09:43:47 +00:00
|
|
|
alias Pleroma.Config.Getting, as: Config
|
2024-05-14 10:13:37 +00:00
|
|
|
|
2024-05-18 09:43:47 +00:00
|
|
|
plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
|
2024-05-14 10:13:37 +00:00
|
|
|
plug(Tesla.Middleware.JSON)
|
|
|
|
|
|
|
|
plug(Tesla.Middleware.Headers, [
|
|
|
|
{"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
|
|
|
|
])
|
|
|
|
end
|