Insert text representation of hashtags into object["hashtags"]

Includes a new mix task: pleroma.database fill_old_hashtags
This commit is contained in:
Haelwenn (lanodan) Monnier 2020-07-31 16:46:35 +02:00
parent 8e1f32e715
commit acb03d591b
No known key found for this signature in database
GPG Key ID: D5B7A8E43C997DEE
22 changed files with 139 additions and 51 deletions

View File

@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed ### Changed
- **Breaking:** Changed storage of hashtags in plain-text to `object->hashtags`, run [`pleroma.database fill_old_hashtags` mix task](docs/administration/CLI_tasks/database.md) for old objects (works while pleroma is running).
- Polls now always return a `voters_count`, even if they are single-choice. - Polls now always return a `voters_count`, even if they are single-choice.
- Admin Emails: The ap id is used as the user link in emails now. - Admin Emails: The ap id is used as the user link in emails now.
- Improved registration workflow for email confirmation and account approval modes. - Improved registration workflow for email confirmation and account approval modes.
@ -432,7 +433,6 @@ switched to a new configuration mechanism, however it was not officially removed
- Static-FE: Fix remote posts not being sanitized - Static-FE: Fix remote posts not being sanitized
### Fixed ### Fixed
=======
- Rate limiter crashes when there is no explicitly specified ip in the config - Rate limiter crashes when there is no explicitly specified ip in the config
- 500 errors when no `Accept` header is present if Static-FE is enabled - 500 errors when no `Accept` header is present if Static-FE is enabled
- Instance panel not being updated immediately due to wrong `Cache-Control` headers - Instance panel not being updated immediately due to wrong `Cache-Control` headers

View File

@ -91,6 +91,16 @@ Can be safely re-run
mix pleroma.database fix_likes_collections mix pleroma.database fix_likes_collections
``` ```
## Fill hashtags for old objects
```sh tab="OTP"
./bin/pleroma_ctl database fill_old_hashtags
```
```sh tab="From Source"
mix pleroma.database fill_old_hashtags
```
## Vacuum the database ## Vacuum the database
### Analyze ### Analyze

View File

@ -128,6 +128,49 @@ def run(["fix_likes_collections"]) do
|> Stream.run() |> Stream.run()
end end
def run(["fill_old_hashtags"]) do
import Ecto.Query
start_pleroma()
from(
o in Object,
where: fragment("(?)->>'hashtags' is null", o.data),
where: fragment("(?)->>'tag' != '[]'", o.data),
select: %{id: o.id, tag: fragment("(?)->>'tag'", o.data)},
order_by: [:desc, o.id]
)
|> Pleroma.Repo.chunk_stream(200, :batches)
|> Stream.each(fn objects ->
Repo.transaction(fn ->
objects_first = objects |> List.first()
objects_last = objects |> List.last()
Logger.info(
"fill_old_hashtags: #{objects_first.id} (#{objects_first.inserted_at}) -- #{
objects_last.id
} (#{objects_last.inserted_at})"
)
objects
|> Enum.map(fn object ->
tags =
object.tag
|> Jason.decode!()
|> Enum.filter(&is_bitstring(&1))
Object
|> where([o], o.id == ^object.id)
|> update([o],
set: [data: fragment("safe_jsonb_set(?, '{hashtags}', ?, true)", o.data, ^tags)]
)
|> Repo.update_all([], timeout: :infinity)
end)
end)
end)
|> Stream.run()
end
def run(["vacuum", args]) do def run(["vacuum", args]) do
start_pleroma() start_pleroma()

View File

@ -48,6 +48,10 @@ defp item_creation_tags(tags, _, _) do
tags tags
end end
defp hashtags_to_topics(%{data: %{"hashtags" => tags}}) do
Enum.map(tags, fn tag -> "hashtag:" <> tag end)
end
defp hashtags_to_topics(%{data: %{"tag" => tags}}) do defp hashtags_to_topics(%{data: %{"tag" => tags}}) do
tags tags
|> Enum.filter(&is_bitstring(&1)) |> Enum.filter(&is_bitstring(&1))

View File

@ -18,7 +18,8 @@ defmodule Pleroma.Constants do
"emoji", "emoji",
"context_id", "context_id",
"deleted_activity_id", "deleted_activity_id",
"pleroma_internal" "pleroma_internal",
"hashtags"
] ]
) )

View File

@ -666,7 +666,7 @@ defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true})
defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) where: fragment("not (?)->'hashtags' \\?| (?)", object.data, ^tag_reject)
) )
end end
@ -679,7 +679,7 @@ defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) where: fragment("(?)->'hashtags' \\?& (?)", object.data, ^tag_all)
) )
end end
@ -692,14 +692,14 @@ defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do
defp restrict_tag(query, %{tag: tag}) when is_list(tag) do defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) where: fragment("(?)->'hashtags' \\?| (?)", object.data, ^tag)
) )
end end
defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\? (?)", object.data, ^tag) where: fragment("(?)->'hashtags' \\? (?)", object.data, ^tag)
) )
end end

View File

@ -74,9 +74,11 @@ defp check_media_nsfw(
object = object =
if MRF.subdomain_match?(media_nsfw, actor_host) do if MRF.subdomain_match?(media_nsfw, actor_host) do
tags = (child_object["tag"] || []) ++ ["nsfw"] child_object =
child_object = Map.put(child_object, "tag", tags) child_object
child_object = Map.put(child_object, "sensitive", true) |> Map.put("hashtags", (child_object["hashtags"] || []) ++ ["nsfw"])
|> Map.put("sensitive", true)
Map.put(object, "object", child_object) Map.put(object, "object", child_object)
else else
object object

View File

@ -312,16 +312,15 @@ def fix_emoji(%{"tag" => %{"type" => "Emoji"} = tag} = object) do
def fix_emoji(object), do: object def fix_emoji(object), do: object
def fix_tag(%{"tag" => tag} = object) when is_list(tag) do def fix_tag(%{"tag" => tag} = object) when is_list(tag) do
tags = hashtags =
tag tag
|> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end) |> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end)
|> Enum.map(fn %{"name" => name} -> |> Enum.map(fn
name %{"name" => "#" <> hashtag} -> String.downcase(hashtag)
|> String.slice(1..-1) %{"name" => hashtag} -> String.downcase(hashtag)
|> String.downcase()
end) end)
Map.put(object, "tag", tag ++ tags) Map.put(object, "hashtags", hashtags)
end end
def fix_tag(%{"tag" => %{} = tag} = object) do def fix_tag(%{"tag" => %{} = tag} = object) do
@ -865,7 +864,7 @@ def maybe_fix_object_url(data), do: data
def add_hashtags(object) do def add_hashtags(object) do
tags = tags =
(object["tag"] || []) ((object["hashtags"] || []) ++ (object["tag"] || []))
|> Enum.map(fn |> Enum.map(fn
# Expand internal representation tags into AS2 tags. # Expand internal representation tags into AS2 tags.
tag when is_binary(tag) -> tag when is_binary(tag) ->
@ -936,7 +935,7 @@ def set_sensitive(%{"sensitive" => _} = object) do
end end
def set_sensitive(object) do def set_sensitive(object) do
tags = object["tag"] || [] tags = object["hashtags"] || object["tag"] || []
Map.put(object, "sensitive", "nsfw" in tags) Map.put(object, "sensitive", "nsfw" in tags)
end end

View File

@ -310,7 +310,16 @@ def make_note_data(%ActivityDraft{} = draft) do
"context" => draft.context, "context" => draft.context,
"attachment" => draft.attachments, "attachment" => draft.attachments,
"actor" => draft.user.ap_id, "actor" => draft.user.ap_id,
"tag" => Keyword.values(draft.tags) |> Enum.uniq() "tag" => Enum.filter(draft.tags, &is_map(&1)) |> Enum.uniq(),
"hashtags" =>
draft.tags
|> Enum.reduce([], fn
# Why so many formats
{:name, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc
{"#" <> _, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc
x, acc -> if is_bitstring(x), do: [x | acc], else: acc
end)
|> Enum.uniq()
} }
|> add_in_reply_to(draft.in_reply_to) |> add_in_reply_to(draft.in_reply_to)
|> Map.merge(draft.extra) |> Map.merge(draft.extra)

View File

@ -347,7 +347,7 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
media_attachments: attachments, media_attachments: attachments,
poll: render(PollView, "show.json", object: object, for: opts[:for]), poll: render(PollView, "show.json", object: object, for: opts[:for]),
mentions: mentions, mentions: mentions,
tags: build_tags(tags), tags: build_tags(object.data["hashtags"] || tags),
application: %{ application: %{
name: "Web", name: "Web",
website: nil website: nil

View File

@ -22,8 +22,8 @@
<link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/> <link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/>
<% end %> <% end %>
<%= for tag <- @data["tag"] || [] do %> <%= for hashtag <- @data["hashtags"] || [] do %>
<category term="<%= tag %>"></category> <category term="<%= hashtag %>"></category>
<% end %> <% end %>
<%= for attachment <- @data["attachment"] || [] do %> <%= for attachment <- @data["attachment"] || [] do %>

View File

@ -21,8 +21,8 @@
<link><%= @data["external_url"] %></link> <link><%= @data["external_url"] %></link>
<% end %> <% end %>
<%= for tag <- @data["tag"] || [] do %> <%= for hashtag <- @data["hashtags"] || [] do %>
<category term="<%= tag %>"></category> <category term="<%= hashtag %>"></category>
<% end %> <% end %>
<%= for attachment <- @data["attachment"] || [] do %> <%= for attachment <- @data["attachment"] || [] do %>

View File

@ -41,8 +41,8 @@
<% end %> <% end %>
<% end %> <% end %>
<%= for tag <- @data["tag"] || [] do %> <%= for hashtag <- @data["hashtags"] || [] do %>
<category term="<%= tag %>"></category> <category term="<%= hashtag %>"></category>
<% end %> <% end %>
<%= for {emoji, file} <- @data["emoji"] || %{} do %> <%= for {emoji, file} <- @data["emoji"] || %{} do %>

View File

@ -0,0 +1,11 @@
defmodule Pleroma.Repo.Migrations.AddHashtagsIndexToObjects do
use Ecto.Migration
def change do
drop_if_exists(index(:objects, ["(data->'tag')"], using: :gin, name: :objects_tags))
create_if_not_exists(
index(:objects, ["(data->'hashtags')"], using: :gin, name: :objects_hashtags)
)
end
end

View File

@ -78,7 +78,7 @@ test "with no attachments doesn't produce public:media topics", %{activity: acti
end end
test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do
tagged_data = Map.put(data, "tag", ["foo", "bar"]) tagged_data = Map.put(data, "hashtags", ["foo", "bar"])
activity = %{activity | object: %{object | data: tagged_data}} activity = %{activity | object: %{object | data: tagged_data}}
topics = Topics.get_activity_topics(activity) topics = Topics.get_activity_topics(activity)

View File

@ -78,7 +78,7 @@ test "has a matching host" do
assert SimplePolicy.filter(media_message) == assert SimplePolicy.filter(media_message) ==
{:ok, {:ok,
media_message media_message
|> put_in(["object", "tag"], ["foo", "nsfw"]) |> put_in(["object", "hashtags"], ["foo", "nsfw"])
|> put_in(["object", "sensitive"], true)} |> put_in(["object", "sensitive"], true)}
assert SimplePolicy.filter(local_message) == {:ok, local_message} assert SimplePolicy.filter(local_message) == {:ok, local_message}
@ -92,7 +92,7 @@ test "match with wildcard domain" do
assert SimplePolicy.filter(media_message) == assert SimplePolicy.filter(media_message) ==
{:ok, {:ok,
media_message media_message
|> put_in(["object", "tag"], ["foo", "nsfw"]) |> put_in(["object", "hashtags"], ["foo", "nsfw"])
|> put_in(["object", "sensitive"], true)} |> put_in(["object", "sensitive"], true)}
assert SimplePolicy.filter(local_message) == {:ok, local_message} assert SimplePolicy.filter(local_message) == {:ok, local_message}
@ -105,7 +105,7 @@ defp build_media_message do
"type" => "Create", "type" => "Create",
"object" => %{ "object" => %{
"attachment" => [%{}], "attachment" => [%{}],
"tag" => ["foo"], "hashtags" => ["foo"],
"sensitive" => false "sensitive" => false
} }
} }

View File

@ -39,7 +39,7 @@ test "it works for incoming notices with tag not being an array (kroeg)" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"]) object = Object.normalize(data["object"])
assert "test" in object.data["tag"] assert ["test"] == object.data["hashtags"]
end end
test "it cleans up incoming notices which are not really DMs" do test "it cleans up incoming notices which are not really DMs" do
@ -220,7 +220,7 @@ test "it works for incoming notices with hashtags" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"]) object = Object.normalize(data["object"])
assert Enum.at(object.data["tag"], 2) == "moo" assert object.data["hashtags"] == ["moo"]
end end
test "it works for incoming notices with contentMap" do test "it works for incoming notices with contentMap" do

View File

@ -204,30 +204,37 @@ test "it strips internal fields" do
{:ok, activity} = CommonAPI.post(user, %{status: "#2hu :firefox:"}) {:ok, activity} = CommonAPI.post(user, %{status: "#2hu :firefox:"})
{:ok, modified} = Transmogrifier.prepare_outgoing(activity.data) {:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data)
assert length(modified["object"]["tag"]) == 2 assert [
%{"name" => "#2hu", "type" => "Hashtag"},
%{"name" => ":firefox:", "type" => "Emoji"}
] = modified_object["tag"]
assert is_nil(modified["object"]["emoji"]) refute Map.has_key?(modified_object, "hashtags")
assert is_nil(modified["object"]["like_count"]) refute Map.has_key?(modified_object, "emoji")
assert is_nil(modified["object"]["announcements"]) refute Map.has_key?(modified_object, "like_count")
assert is_nil(modified["object"]["announcement_count"]) refute Map.has_key?(modified_object, "announcements")
assert is_nil(modified["object"]["context_id"]) refute Map.has_key?(modified_object, "announcement_count")
refute Map.has_key?(modified_object, "context_id")
end end
test "it strips internal fields of article" do test "it strips internal fields of article" do
activity = insert(:article_activity) activity = insert(:article_activity)
{:ok, modified} = Transmogrifier.prepare_outgoing(activity.data) {:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data)
assert length(modified["object"]["tag"]) == 2 assert [
%{"name" => "#2hu", "type" => "Hashtag"},
%{"name" => ":2hu:", "type" => "Emoji"}
] = modified_object["tag"]
assert is_nil(modified["object"]["emoji"]) refute Map.has_key?(modified_object, "hashtags")
assert is_nil(modified["object"]["like_count"]) refute Map.has_key?(modified_object, "emoji")
assert is_nil(modified["object"]["announcements"]) refute Map.has_key?(modified_object, "like_count")
assert is_nil(modified["object"]["announcement_count"]) refute Map.has_key?(modified_object, "announcements")
assert is_nil(modified["object"]["context_id"]) refute Map.has_key?(modified_object, "announcement_count")
assert is_nil(modified["object"]["likes"]) refute Map.has_key?(modified_object, "context_id")
end end
test "the directMessage flag is present" do test "the directMessage flag is present" do

View File

@ -591,7 +591,8 @@ test "returns note data" do
"context" => "2hu", "context" => "2hu",
"sensitive" => false, "sensitive" => false,
"summary" => "test summary", "summary" => "test summary",
"tag" => ["jimm"], "hashtags" => ["jimm"],
"tag" => [],
"to" => [user2.ap_id], "to" => [user2.ap_id],
"type" => "Note", "type" => "Note",
"custom_tag" => "test" "custom_tag" => "test"

View File

@ -493,7 +493,8 @@ test "it de-duplicates tags" do
object = Object.normalize(activity) object = Object.normalize(activity)
assert object.data["tag"] == ["2hu"] assert object.data["tag"] == []
assert object.data["hashtags"] == ["2hu"]
end end
test "it adds emoji in the object" do test "it adds emoji in the object" do

View File

@ -262,8 +262,8 @@ test "a note activity" do
mentions: [], mentions: [],
tags: [ tags: [
%{ %{
name: "#{object_data["tag"]}", name: "2hu",
url: "/tag/#{object_data["tag"]}" url: "/tag/2hu"
} }
], ],
application: %{ application: %{

View File

@ -93,7 +93,7 @@ def note_factory(attrs \\ %{}) do
"like_count" => 0, "like_count" => 0,
"context" => "2hu", "context" => "2hu",
"summary" => "2hu", "summary" => "2hu",
"tag" => ["2hu"], "hashtags" => ["2hu"],
"emoji" => %{ "emoji" => %{
"2hu" => "corndog.png" "2hu" => "corndog.png"
} }