Merge remote-tracking branch 'upstream/qdrant-search-2' into spc2
This commit is contained in:
commit
d83a15e879
|
@ -911,6 +911,15 @@
|
||||||
|
|
||||||
config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
|
config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
|
||||||
|
|
||||||
|
config :pleroma, Pleroma.Search.QdrantSearch,
|
||||||
|
qdrant_url: "http://127.0.0.1:6333/",
|
||||||
|
qdrant_api_key: nil,
|
||||||
|
ollama_url: "http://127.0.0.1:11434",
|
||||||
|
ollama_model: "snowflake-arctic-embed:xs",
|
||||||
|
qdrant_index_configuration: %{
|
||||||
|
vectors: %{size: 384, distance: "Cosine"}
|
||||||
|
}
|
||||||
|
|
||||||
# Import environment specific config. This must remain at the bottom
|
# Import environment specific config. This must remain at the bottom
|
||||||
# of this file so it overrides the configuration defined above.
|
# of this file so it overrides the configuration defined above.
|
||||||
import_config "#{Mix.env()}.exs"
|
import_config "#{Mix.env()}.exs"
|
||||||
|
|
|
@ -10,6 +10,12 @@ To use built-in search that has no external dependencies, set the search module
|
||||||
|
|
||||||
While it has no external dependencies, it has problems with performance and relevancy.
|
While it has no external dependencies, it has problems with performance and relevancy.
|
||||||
|
|
||||||
|
## QdrantSearch
|
||||||
|
|
||||||
|
This uses the vector search engine [Qdrant](https://qdrant.tech) to search the posts in a vector space. This needs a way to generate embeddings, for now only the [Ollama](Ollama) api is supported.
|
||||||
|
|
||||||
|
The default settings will support a setup where both Ollama and Qdrant run on the same system as pleroma. The embedding model used by Ollama will need to be pulled first (e.g. `ollama pull snowflake-arctic-embed:xs`) for the embedding to work.
|
||||||
|
|
||||||
## Meilisearch
|
## Meilisearch
|
||||||
|
|
||||||
Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
|
Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Mix.Tasks.Pleroma.Search.Indexer do
|
||||||
|
import Mix.Pleroma
|
||||||
|
import Ecto.Query
|
||||||
|
|
||||||
|
alias Pleroma.Workers.SearchIndexingWorker
|
||||||
|
|
||||||
|
def run(["create_index"]) do
|
||||||
|
Application.ensure_all_started(:pleroma)
|
||||||
|
|
||||||
|
Pleroma.Config.get([Pleroma.Search, :module]).create_index()
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(["index" | options]) do
|
||||||
|
{options, [], []} =
|
||||||
|
OptionParser.parse(
|
||||||
|
options,
|
||||||
|
strict: [
|
||||||
|
limit: :integer
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
start_pleroma()
|
||||||
|
|
||||||
|
limit = Keyword.get(options, :limit, 100_000)
|
||||||
|
|
||||||
|
per_step = 1000
|
||||||
|
chunks = max(div(limit, per_step), 1)
|
||||||
|
|
||||||
|
1..chunks
|
||||||
|
|> Enum.each(fn step ->
|
||||||
|
q =
|
||||||
|
from(a in Pleroma.Activity,
|
||||||
|
limit: ^per_step,
|
||||||
|
offset: ^per_step * (^step - 1),
|
||||||
|
select: [:id],
|
||||||
|
order_by: [desc: :id]
|
||||||
|
)
|
||||||
|
|
||||||
|
{:ok, ids} =
|
||||||
|
Pleroma.Repo.transaction(fn ->
|
||||||
|
Pleroma.Repo.stream(q, timeout: :infinity)
|
||||||
|
|> Enum.map(fn a ->
|
||||||
|
a.id
|
||||||
|
end)
|
||||||
|
end)
|
||||||
|
|
||||||
|
IO.puts("Got #{length(ids)} activities, adding to indexer")
|
||||||
|
|
||||||
|
ids
|
||||||
|
|> Enum.chunk_every(100)
|
||||||
|
|> Enum.each(fn chunk ->
|
||||||
|
IO.puts("Adding #{length(chunk)} activities to indexing queue")
|
||||||
|
|
||||||
|
chunk
|
||||||
|
|> Enum.map(fn id ->
|
||||||
|
SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
|
||||||
|
end)
|
||||||
|
|> Oban.insert_all()
|
||||||
|
end)
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,118 @@
|
||||||
|
defmodule Pleroma.Search.QdrantSearch do
|
||||||
|
@behaviour Pleroma.Search.SearchBackend
|
||||||
|
import Ecto.Query
|
||||||
|
alias Pleroma.Activity
|
||||||
|
|
||||||
|
alias __MODULE__.QdrantClient
|
||||||
|
alias __MODULE__.OllamaClient
|
||||||
|
|
||||||
|
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def create_index() do
|
||||||
|
payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
|
||||||
|
QdrantClient.put("/collections/posts", payload)
|
||||||
|
end
|
||||||
|
|
||||||
|
def drop_index() do
|
||||||
|
QdrantClient.delete("/collections/posts")
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_embedding(text) do
|
||||||
|
with {:ok, %{body: %{"embedding" => embedding}}} <-
|
||||||
|
OllamaClient.post("/api/embeddings", %{
|
||||||
|
prompt: text,
|
||||||
|
model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
|
||||||
|
}) do
|
||||||
|
{:ok, embedding}
|
||||||
|
else
|
||||||
|
_ ->
|
||||||
|
{:error, "Failed to get embedding"}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp build_index_payload(activity, embedding) do
|
||||||
|
%{
|
||||||
|
points: [
|
||||||
|
%{
|
||||||
|
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
|
||||||
|
vector: embedding
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
defp build_search_payload(embedding) do
|
||||||
|
%{
|
||||||
|
vector: embedding,
|
||||||
|
limit: 20
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def add_to_index(activity) do
|
||||||
|
# This will only index public or unlisted notes
|
||||||
|
maybe_search_data = object_to_search_data(activity.object)
|
||||||
|
|
||||||
|
if activity.data["type"] == "Create" and maybe_search_data do
|
||||||
|
with {:ok, embedding} <- get_embedding(maybe_search_data.content),
|
||||||
|
{:ok, %{status: 200}} <-
|
||||||
|
QdrantClient.put(
|
||||||
|
"/collections/posts/points",
|
||||||
|
build_index_payload(activity, embedding)
|
||||||
|
) do
|
||||||
|
:ok
|
||||||
|
else
|
||||||
|
e -> {:error, e}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def search(_user, query, _options) do
|
||||||
|
query = "Represent this sentence for searching relevant passages: #{query}"
|
||||||
|
|
||||||
|
with {:ok, embedding} <- get_embedding(query),
|
||||||
|
{:ok, %{body: %{"result" => result}}} <-
|
||||||
|
QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
|
||||||
|
ids =
|
||||||
|
Enum.map(result, fn %{"id" => id} ->
|
||||||
|
Ecto.UUID.dump!(id)
|
||||||
|
end)
|
||||||
|
|
||||||
|
from(a in Activity, where: a.id in ^ids)
|
||||||
|
|> Activity.with_preloaded_object()
|
||||||
|
|> Activity.restrict_deactivated_users()
|
||||||
|
|> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
|
||||||
|
|> Pleroma.Repo.all()
|
||||||
|
else
|
||||||
|
_ ->
|
||||||
|
[]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def remove_from_index(_object) do
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defmodule Pleroma.Search.QdrantSearch.OllamaClient do
|
||||||
|
use Tesla
|
||||||
|
|
||||||
|
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
|
||||||
|
plug(Tesla.Middleware.JSON)
|
||||||
|
end
|
||||||
|
|
||||||
|
defmodule Pleroma.Search.QdrantSearch.QdrantClient do
|
||||||
|
use Tesla
|
||||||
|
|
||||||
|
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
|
||||||
|
plug(Tesla.Middleware.JSON)
|
||||||
|
|
||||||
|
plug(Tesla.Middleware.Headers, [
|
||||||
|
{"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
|
||||||
|
])
|
||||||
|
end
|
|
@ -21,4 +21,9 @@ defmodule Pleroma.Search.SearchBackend do
|
||||||
from index.
|
from index.
|
||||||
"""
|
"""
|
||||||
@callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
|
@callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Create the index
|
||||||
|
"""
|
||||||
|
@callback create_index() :: :ok | {:error, any()}
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue