Make the indexing batch differently and more, show number indexed

This commit is contained in:
Ekaterina Vaartis 2021-08-16 22:30:56 +03:00
parent 365024abec
commit ea6a6a1287
1 changed files with 39 additions and 26 deletions

View File

@ -28,17 +28,29 @@ def run(["index"]) do
]) ])
) )
Pleroma.Repo.chunk_stream( chunk_size = 100_000
Pleroma.Repo.transaction(
fn ->
Pleroma.Repo.stream(
from(Pleroma.Object, from(Pleroma.Object,
# Only index public posts which are notes and have some text # Only index public posts which are notes and have some text
where: where:
fragment("data->>'type' = 'Note'") and fragment("data->>'type' = 'Note'") and
fragment("LENGTH(data->>'source') > 0") and fragment("LENGTH(data->>'source') > 0") and
fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
order_by: fragment("data->'published' DESC")
), ),
200, timeout: :infinity
:batches
) )
|> Stream.chunk_every(chunk_size)
|> Stream.transform(0, fn objects, acc ->
new_acc = acc + Enum.count(objects)
IO.puts("Indexed #{new_acc} entries")
{[objects], new_acc}
end)
|> Stream.map(fn objects -> |> Stream.map(fn objects ->
Enum.map(objects, fn object -> Enum.map(objects, fn object ->
data = object.data data = object.data
@ -51,10 +63,11 @@ def run(["index"]) do
"#{endpoint}/indexes/objects/documents", "#{endpoint}/indexes/objects/documents",
Jason.encode!(objects) Jason.encode!(objects)
) )
IO.puts("Indexed #{Enum.count(objects)} entries")
end) end)
|> Stream.run() |> Stream.run()
end,
timeout: :infinity
)
end end
def run(["clear"]) do def run(["clear"]) do