Merge remote-tracking branch 'upstream/qdrant-search-2' into spc2

2024-05-19 14:15:06 +00:00 · 2024-05-19 14:15:06 +00:00 · a5b041c03b
parent 9bbf4c9238 1b4f1db9b2
commit a5b041c03b
14 changed files with 239 additions and 30 deletions
--- a/changelog.d/qdrant_search.add
+++ b/changelog.d/qdrant_search.add
@ -0,0 +1 @@
+Add Qdrant/OpenAI embedding search
--- a/config/config.exs
+++ b/config/config.exs
@ -913,9 +913,10 @@

 config :pleroma, Pleroma.Search.QdrantSearch,
  qdrant_url: "http://127.0.0.1:6333/",
-  qdrant_api_key: nil,
-  ollama_url: "http://127.0.0.1:11434",
-  ollama_model: "snowflake-arctic-embed:xs",
+  qdrant_api_key: "",
+  openai_url: "http://127.0.0.1:11345",
+  openai_model: "snowflake/snowflake-arctic-embed-xs",
+  openai_api_key: "",
  qdrant_index_configuration: %{
    vectors: %{size: 384, distance: "Cosine"}
  }
--- a/docs/configuration/search.md
+++ b/docs/configuration/search.md
@ -12,9 +12,27 @@ While it has no external dependencies, it has problems with performance and rele

 ## QdrantSearch

-This uses the vector search engine [Qdrant](https://qdrant.tech) to search the posts in a vector space. This needs a way to generate embeddings, for now only the [Ollama](Ollama) api is supported.
+This uses the vector search engine [Qdrant](https://qdrant.tech) to search the posts in a vector space. This needs a way to generate embeddings and uses the [OpenAI API](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). This is implemented by several project besides OpenAI itself, including the python-based fastembed-server found in `supplemental/search/fastembed-api`.

-The default settings will support a setup where both Ollama and Qdrant run on the same system as pleroma. The embedding model used by Ollama will need to be pulled first (e.g. `ollama pull snowflake-arctic-embed:xs`) for the embedding to work.
+The default settings will support a setup where both the fastembed server and Qdrant run on the same system as pleroma. To use it, set the search provider and run the fastembed server, see the README in `supplemental/search/fastembed-api`:
+
+> config :pleroma, Pleroma.Search, module: Pleroma.Search.QdrantSearch
+
+Then, start the Qdrant server, see [here](https://qdrant.tech/documentation/quick-start/) for instructions.
+
+You will also need to create the Qdrant index once by running `mix pleroma.search.indexer create_index`. Running `mix pleroma.search.indexer index` will retroactively index the last 100_000 activities.
+
+### Indexing and model options
+
+To see the available configuration options, check out the QdrantSearch section in `config/config.exs`.
+
+The default indexing option work for the default model (`snowflake-arctic-embed-xs`). To optimize for a low memory footprint, adjust the index configuration as described in the [Qdrant docs](https://qdrant.tech/documentation/guides/optimize/). See also [this blog post](https://qdrant.tech/articles/memory-consumption/) that goes into detail.
+
+Different embedding models will need different vector size settings. You can see a list of the models supported by the fastembed server [here](https://qdrant.github.io/fastembed/examples/Supported_Models), including their vector dimensions. These vector dimensions need to be set in the `qdrant_index_configuration`. 
+
+E.g, If you want to use `sentence-transformers/all-MiniLM-L6-v2` as a model, you will not need to adjust things, because it and `snowflake-arctic-embed-xs` are both 384 dimensional models. If you want to use `snowflake/snowflake-arctic-embed-l`, you will need to adjust the `size` parameter in the `qdrant_index_configuration` to 1024, as it has a dimension of 1024.
+
+When using a different model, you will need do drop the index and recreate it (`mix pleroma.search.indexer drop_index` and `mix pleroma.search.indexer create_index`), as the different embeddings are not compatible with each other.

 ## Meilisearch

--- a/lib/mix/tasks/pleroma/search/indexer.ex
+++ b/lib/mix/tasks/pleroma/search/indexer.ex
@ -9,9 +9,23 @@ defmodule Mix.Tasks.Pleroma.Search.Indexer do
  alias Pleroma.Workers.SearchIndexingWorker

  def run(["create_index"]) do
-    Application.ensure_all_started(:pleroma)
+    start_pleroma()

-    Pleroma.Config.get([Pleroma.Search, :module]).create_index()
+    with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).create_index() do
+      IO.puts("Index created")
+    else
+      e -> IO.puts("Could not create index: #{inspect(e)}")
+    end
+  end
+
+  def run(["drop_index"]) do
+    start_pleroma()
+
+    with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).drop_index() do
+      IO.puts("Index dropped")
+    else
+      e -> IO.puts("Could not drop index: #{inspect(e)}")
+    end
  end

  def run(["index" | options]) do
--- a/lib/pleroma/search/database_search.ex
+++ b/lib/pleroma/search/database_search.ex
@ -48,6 +48,12 @@ def add_to_index(_activity), do: :ok
  @impl true
  def remove_from_index(_object), do: :ok

+  @impl true
+  def create_index, do: :ok
+
+  @impl true
+  def drop_index, do: :ok
+
  def maybe_restrict_author(query, %User{} = author) do
    Activity.Queries.by_author(query, author)
  end
--- a/lib/pleroma/search/meilisearch.ex
+++ b/lib/pleroma/search/meilisearch.ex
@ -10,6 +10,12 @@ defmodule Pleroma.Search.Meilisearch do

  @behaviour Pleroma.Search.SearchBackend

+  @impl true
+  def create_index, do: :ok
+
+  @impl true
+  def drop_index, do: :ok
+
  defp meili_headers do
    private_key = Config.get([Pleroma.Search.Meilisearch, :private_key])

--- a/lib/pleroma/search/qdrant_search.ex
+++ b/lib/pleroma/search/qdrant_search.ex
@ -1,28 +1,40 @@
 defmodule Pleroma.Search.QdrantSearch do
  @behaviour Pleroma.Search.SearchBackend
  import Ecto.Query
-  alias Pleroma.Activity

+  alias Pleroma.Activity
+  alias Pleroma.Config.Getting, as: Config
+
+  alias __MODULE__.OpenAIClient
  alias __MODULE__.QdrantClient
-  alias __MODULE__.OllamaClient

  import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]

  @impl true
-  def create_index() do
-    payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
-    QdrantClient.put("/collections/posts", payload)
+  def create_index do
+    payload = Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
+
+    with {:ok, %{status: 200}} <- QdrantClient.put("/collections/posts", payload) do
+      :ok
+    else
+      e -> {:error, e}
+    end
  end

-  def drop_index() do
-    QdrantClient.delete("/collections/posts")
+  @impl true
+  def drop_index do
+    with {:ok, %{status: 200}} <- QdrantClient.delete("/collections/posts") do
+      :ok
+    else
+      e -> {:error, e}
+    end
  end

  def get_embedding(text) do
-    with {:ok, %{body: %{"embedding" => embedding}}} <-
-           OllamaClient.post("/api/embeddings", %{
-             prompt: text,
-             model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
+    with {:ok, %{body: %{"data" => [%{"embedding" => embedding}]}}} <-
+           OpenAIClient.post("/v1/embeddings", %{
+             input: text,
+             model: Config.get([Pleroma.Search.QdrantSearch, :openai_model])
           }) do
      {:ok, embedding}
    else
@ -42,10 +54,11 @@ defp build_index_payload(activity, embedding) do
    }
  end

-  defp build_search_payload(embedding) do
+  defp build_search_payload(embedding, options) do
    %{
      vector: embedding,
-      limit: 20
+      limit: options[:limit] || 20,
+      offset: options[:offset] || 0
    }
  end

@ -71,12 +84,28 @@ def add_to_index(activity) do
  end

  @impl true
-  def search(_user, query, _options) do
+  def remove_from_index(object) do
+    activity = Activity.get_by_object_ap_id_with_object(object.data["id"])
+    id = activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()
+
+    with {:ok, %{status: 200}} <-
+           QdrantClient.post("/collections/posts/points/delete", %{"points" => [id]}) do
+      :ok
+    else
+      e -> {:error, e}
+    end
+  end
+
+  @impl true
+  def search(_user, query, options) do
    query = "Represent this sentence for searching relevant passages: #{query}"

    with {:ok, embedding} <- get_embedding(query),
         {:ok, %{body: %{"result" => result}}} <-
-           QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
+           QdrantClient.post(
+             "/collections/posts/points/search",
+             build_search_payload(embedding, options)
+           ) do
      ids =
        Enum.map(result, fn %{"id" => id} ->
          Ecto.UUID.dump!(id)
@ -92,24 +121,26 @@ def search(_user, query, _options) do
        []
    end
  end
-
-  @impl true
-  def remove_from_index(_object) do
-    :ok
-  end
 end

-defmodule Pleroma.Search.QdrantSearch.OllamaClient do
+defmodule Pleroma.Search.QdrantSearch.OpenAIClient do
  use Tesla
+  alias Pleroma.Config.Getting, as: Config

-  plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
+  plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :openai_url]))
  plug(Tesla.Middleware.JSON)
+
+  plug(Tesla.Middleware.Headers, [
+    {"Authorization",
+     "Bearer #{Pleroma.Config.get([Pleroma.Search.QdrantSearch, :openai_api_key])}"}
+  ])
 end

 defmodule Pleroma.Search.QdrantSearch.QdrantClient do
  use Tesla
+  alias Pleroma.Config.Getting, as: Config

-  plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
+  plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
  plug(Tesla.Middleware.JSON)

  plug(Tesla.Middleware.Headers, [
--- a/lib/pleroma/search/search_backend.ex
+++ b/lib/pleroma/search/search_backend.ex
@ -26,4 +26,9 @@ defmodule Pleroma.Search.SearchBackend do
  Create the index
  """
  @callback create_index() :: :ok | {:error, any()}
+
+  @doc """
+  Drop the index
+  """
+  @callback drop_index() :: :ok | {:error, any()}
 end
--- a/supplemental/search/fastembed-api/Dockerfile
+++ b/supplemental/search/fastembed-api/Dockerfile
@ -0,0 +1,9 @@
+FROM python:3.9
+
+WORKDIR /code
+COPY fastembed-server.py /workdir/fastembed-server.py
+COPY requirements.txt /workdir/requirements.txt
+
+RUN pip install -r /workdir/requirements.txt
+
+CMD ["python", "/workdir/fastembed-server.py"]
--- a/supplemental/search/fastembed-api/README.md
+++ b/supplemental/search/fastembed-api/README.md
@ -0,0 +1,6 @@
+# About
+This is a minimal implementation of the [OpenAI Embeddings API](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) meant to be used with the QdrantSearch backend. 
+
+# Usage
+
+The easiest way to run it is to just use docker compose with `docker compose up`. This starts the server on the default configured port. Different models can be used, for a full list of supported models, check the [fastembed documentation](https://qdrant.github.io/fastembed/examples/Supported_Models/). The first time a model is requested it will be downloaded, which can take a few seconds.
--- a/supplemental/search/fastembed-api/compose.yml
+++ b/supplemental/search/fastembed-api/compose.yml
@ -0,0 +1,5 @@
+services:
+  web:
+    build: .
+    ports:
+      - "11345:11345"
--- a/supplemental/search/fastembed-api/fastembed-server.py
+++ b/supplemental/search/fastembed-api/fastembed-server.py
@ -0,0 +1,23 @@
+from fastembed import TextEmbedding
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+models = {}
+
+app = FastAPI()
+
+class EmbeddingRequest(BaseModel):
+    model: str
+    input: str
+
+@app.post("/v1/embeddings")
+def embeddings(request: EmbeddingRequest):
+    model = models.get(request.model) or TextEmbedding(request.model)
+    models[request.model] = model
+    embeddings = next(model.embed(request.input)).tolist()
+    return {"data": [{"embedding": embeddings}]}
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=11345)
--- a/supplemental/search/fastembed-api/requirements.txt
+++ b/supplemental/search/fastembed-api/requirements.txt
@ -0,0 +1,4 @@
+fastapi==0.111.0
+fastembed==0.2.7
+pydantic==1.10.15
+uvicorn==0.29.0
--- a/test/pleroma/search/qdrant_search_test.exs
+++ b/test/pleroma/search/qdrant_search_test.exs
@ -0,0 +1,80 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Search.QdrantSearchTest do
+  use Pleroma.DataCase, async: true
+  use Oban.Testing, repo: Pleroma.Repo
+
+  import Pleroma.Factory
+  import Mox
+
+  alias Pleroma.Search.QdrantSearch
+  alias Pleroma.UnstubbedConfigMock, as: Config
+  alias Pleroma.Web.CommonAPI
+  alias Pleroma.Workers.SearchIndexingWorker
+
+  describe "Qdrant search" do
+    test "indexes a public post on creation, deletes from the index on deletion" do
+      user = insert(:user)
+
+      Tesla.Mock.mock(fn
+        %{method: :post, url: "https://openai.url/v1/embeddings"} ->
+          send(self(), "posted_to_openai")
+
+          Tesla.Mock.json(%{
+            data: [%{embedding: [1, 2, 3]}]
+          })
+
+        %{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} ->
+          send(self(), "posted_to_qdrant")
+
+          assert match?(%{"points" => [%{"vector" => [1, 2, 3]}]}, Jason.decode!(body))
+
+          Tesla.Mock.json("ok")
+
+        %{method: :post, url: "https://qdrant.url/collections/posts/points/delete"} ->
+          send(self(), "deleted_from_qdrant")
+          Tesla.Mock.json("ok")
+      end)
+
+      Config
+      |> expect(:get, 6, fn
+        [Pleroma.Search, :module], nil ->
+          QdrantSearch
+
+        [Pleroma.Search.QdrantSearch, key], nil ->
+          %{
+            openai_model: "a_model",
+            openai_url: "https://openai.url",
+            qdrant_url: "https://qdrant.url"
+          }[key]
+      end)
+
+      {:ok, activity} =
+        CommonAPI.post(user, %{
+          status: "guys i just don't wanna leave the swamp",
+          visibility: "public"
+        })
+
+      args = %{"op" => "add_to_index", "activity" => activity.id}
+
+      assert_enqueued(
+        worker: SearchIndexingWorker,
+        args: args
+      )
+
+      assert :ok = perform_job(SearchIndexingWorker, args)
+      assert_received("posted_to_openai")
+      assert_received("posted_to_qdrant")
+
+      {:ok, _} = CommonAPI.delete(activity.id, user)
+
+      delete_args = %{"op" => "remove_from_index", "object" => activity.object.id}
+      assert_enqueued(worker: SearchIndexingWorker, args: delete_args)
+      assert :ok = perform_job(SearchIndexingWorker, delete_args)
+
+      assert_received("deleted_from_qdrant")
+    end
+  end
+end