Merge branch 'rich-media-cache' into 'develop'
Fix Rich Media Previews for updated activities See merge request pleroma/pleroma!4052
This commit is contained in:
commit
e957362779
|
@ -0,0 +1 @@
|
||||||
|
Rich Media Preview cache eviction when the activity is updated.
|
|
@ -28,7 +28,7 @@ defp get_cache_keys_for(activity_id) do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp add_cache_key_for(activity_id, additional_key) do
|
def add_cache_key_for(activity_id, additional_key) do
|
||||||
current = get_cache_keys_for(activity_id)
|
current = get_cache_keys_for(activity_id)
|
||||||
|
|
||||||
unless additional_key in current do
|
unless additional_key in current do
|
||||||
|
|
|
@ -6,8 +6,6 @@ defmodule Pleroma.HTML do
|
||||||
# Scrubbers are compiled on boot so they can be configured in OTP releases
|
# Scrubbers are compiled on boot so they can be configured in OTP releases
|
||||||
# @on_load :compile_scrubbers
|
# @on_load :compile_scrubbers
|
||||||
|
|
||||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
|
||||||
|
|
||||||
def compile_scrubbers do
|
def compile_scrubbers do
|
||||||
dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
|
dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
|
||||||
|
|
||||||
|
@ -67,27 +65,20 @@ def ensure_scrubbed_html(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
|
@spec extract_first_external_url_from_object(Pleroma.Object.t()) ::
|
||||||
|
{:ok, String.t()} | {:error, :no_content}
|
||||||
|
def extract_first_external_url_from_object(%{data: %{"content" => content}})
|
||||||
when is_binary(content) do
|
when is_binary(content) do
|
||||||
unless object.data["fake"] do
|
url =
|
||||||
key = "URL|#{object.id}"
|
|
||||||
|
|
||||||
@cachex.fetch!(:scrubber_cache, key, fn _key ->
|
|
||||||
{:commit, {:ok, extract_first_external_url(content)}}
|
|
||||||
end)
|
|
||||||
else
|
|
||||||
{:ok, extract_first_external_url(content)}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def extract_first_external_url_from_object(_), do: {:error, :no_content}
|
|
||||||
|
|
||||||
def extract_first_external_url(content) do
|
|
||||||
content
|
content
|
||||||
|> Floki.parse_fragment!()
|
|> Floki.parse_fragment!()
|
||||||
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|
||||||
|> Enum.take(1)
|
|> Enum.take(1)
|
||||||
|> Floki.attribute("href")
|
|> Floki.attribute("href")
|
||||||
|> Enum.at(0)
|
|> Enum.at(0)
|
||||||
|
|
||||||
|
{:ok, url}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def extract_first_external_url_from_object(_), do: {:error, :no_content}
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,6 +8,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.Web.RichMedia.Parser
|
alias Pleroma.Web.RichMedia.Parser
|
||||||
|
|
||||||
|
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||||
|
|
||||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||||
|
|
||||||
@options [
|
@options [
|
||||||
|
@ -25,9 +27,11 @@ defp validate_page_url(page_url) when is_binary(page_url) do
|
||||||
|> parse_uri(page_url)
|
|> parse_uri(page_url)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
|
defp validate_page_url(%URI{host: host, scheme: "https"}) do
|
||||||
when is_binary(authority) do
|
|
||||||
cond do
|
cond do
|
||||||
|
Linkify.Parser.ip?(host) ->
|
||||||
|
:error
|
||||||
|
|
||||||
host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
|
host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
|
||||||
:error
|
:error
|
||||||
|
|
||||||
|
@ -71,7 +75,24 @@ def fetch_data_for_object(object) do
|
||||||
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
|
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
|
||||||
with true <- @config_impl.get([:rich_media, :enabled]),
|
with true <- @config_impl.get([:rich_media, :enabled]),
|
||||||
%Object{} = object <- Object.normalize(activity, fetch: false) do
|
%Object{} = object <- Object.normalize(activity, fetch: false) do
|
||||||
|
if object.data["fake"] do
|
||||||
fetch_data_for_object(object)
|
fetch_data_for_object(object)
|
||||||
|
else
|
||||||
|
key = "URL|#{activity.id}"
|
||||||
|
|
||||||
|
@cachex.fetch!(:scrubber_cache, key, fn _ ->
|
||||||
|
result = fetch_data_for_object(object)
|
||||||
|
|
||||||
|
cond do
|
||||||
|
match?(%{page_url: _, rich_media: _}, result) ->
|
||||||
|
Activity.HTML.add_cache_key_for(activity.id, key)
|
||||||
|
{:commit, result}
|
||||||
|
|
||||||
|
true ->
|
||||||
|
{:ignore, %{}}
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
_ -> %{}
|
_ -> %{}
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
<meta property="og:url" content="https://google.com">
|
||||||
|
<meta property="og:type" content="website">
|
||||||
|
<meta property="og:title" content="Google">
|
||||||
|
<meta property="og:description" content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for.">
|
||||||
|
<meta property="og:image" content="">
|
||||||
|
|
||||||
|
<meta name="twitter:card" content="summary_large_image">
|
||||||
|
<meta property="twitter:domain" content="google.com">
|
||||||
|
<meta property="twitter:url" content="https://google.com">
|
||||||
|
<meta name="twitter:title" content="Google">
|
||||||
|
<meta name="twitter:description" content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for.">
|
||||||
|
<meta name="twitter:image" content="">
|
|
@ -0,0 +1,12 @@
|
||||||
|
<meta property="og:url" content="https://yahoo.com">
|
||||||
|
<meta property="og:type" content="website">
|
||||||
|
<meta property="og:title" content="Yahoo | Mail, Weather, Search, Politics, News, Finance, Sports & Videos">
|
||||||
|
<meta property="og:description" content="Latest news coverage, email, free stock quotes, live scores and video are just the beginning. Discover more every day at Yahoo!">
|
||||||
|
<meta property="og:image" content="https://s.yimg.com/cv/apiv2/social/images/yahoo_default_logo.png">
|
||||||
|
|
||||||
|
<meta name="twitter:card" content="summary_large_image">
|
||||||
|
<meta property="twitter:domain" content="yahoo.com">
|
||||||
|
<meta property="twitter:url" content="https://yahoo.com">
|
||||||
|
<meta name="twitter:title" content="Yahoo | Mail, Weather, Search, Politics, News, Finance, Sports & Videos">
|
||||||
|
<meta name="twitter:description" content="Latest news coverage, email, free stock quotes, live scores and video are just the beginning. Discover more every day at Yahoo!">
|
||||||
|
<meta name="twitter:image" content="https://s.yimg.com/cv/apiv2/social/images/yahoo_default_logo.png">
|
|
@ -83,8 +83,34 @@ test "crawls valid, complete URLs" do
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
||||||
end
|
end
|
||||||
|
|
||||||
# This does not seem to work. The urls are being fetched.
|
test "recrawls URLs on updates" do
|
||||||
@tag skip: true
|
original_url = "https://google.com/"
|
||||||
|
updated_url = "https://yahoo.com/"
|
||||||
|
|
||||||
|
Pleroma.StaticStubbedConfigMock
|
||||||
|
|> stub(:get, fn
|
||||||
|
[:rich_media, :enabled] -> true
|
||||||
|
path -> Pleroma.Test.StaticConfig.get(path)
|
||||||
|
end)
|
||||||
|
|
||||||
|
user = insert(:user)
|
||||||
|
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
|
||||||
|
|
||||||
|
assert match?(
|
||||||
|
%{page_url: ^original_url, rich_media: _},
|
||||||
|
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
||||||
|
)
|
||||||
|
|
||||||
|
{:ok, _} = CommonAPI.update(user, activity, %{status: "I like this site #{updated_url}"})
|
||||||
|
|
||||||
|
activity = Pleroma.Activity.get_by_id(activity.id)
|
||||||
|
|
||||||
|
assert match?(
|
||||||
|
%{page_url: ^updated_url, rich_media: _},
|
||||||
|
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
test "refuses to crawl URLs of private network from posts" do
|
test "refuses to crawl URLs of private network from posts" do
|
||||||
user = insert(:user)
|
user = insert(:user)
|
||||||
|
|
||||||
|
@ -102,10 +128,10 @@ test "refuses to crawl URLs of private network from posts" do
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
path -> Pleroma.Test.StaticConfig.get(path)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
assert %{} = Helpers.fetch_data_for_activity(activity)
|
assert %{} == Helpers.fetch_data_for_activity(activity)
|
||||||
assert %{} = Helpers.fetch_data_for_activity(activity2)
|
assert %{} == Helpers.fetch_data_for_activity(activity2)
|
||||||
assert %{} = Helpers.fetch_data_for_activity(activity3)
|
assert %{} == Helpers.fetch_data_for_activity(activity3)
|
||||||
assert %{} = Helpers.fetch_data_for_activity(activity4)
|
assert %{} == Helpers.fetch_data_for_activity(activity4)
|
||||||
assert %{} = Helpers.fetch_data_for_activity(activity5)
|
assert %{} == Helpers.fetch_data_for_activity(activity5)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1464,6 +1464,14 @@ def get("https://misskey.io/notes/8vs6wxufd0", _, _, _) do
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def get("https://google.com/", _, _, _) do
|
||||||
|
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/google.html")}}
|
||||||
|
end
|
||||||
|
|
||||||
|
def get("https://yahoo.com/", _, _, _) do
|
||||||
|
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}}
|
||||||
|
end
|
||||||
|
|
||||||
def get(url, query, body, headers) do
|
def get(url, query, body, headers) do
|
||||||
{:error,
|
{:error,
|
||||||
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
|
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
|
||||||
|
@ -1539,7 +1547,10 @@ def post(url, query, body, headers) do
|
||||||
@rich_media_mocks [
|
@rich_media_mocks [
|
||||||
"https://example.com/ogp",
|
"https://example.com/ogp",
|
||||||
"https://example.com/ogp-missing-data",
|
"https://example.com/ogp-missing-data",
|
||||||
"https://example.com/twitter-card"
|
"https://example.com/twitter-card",
|
||||||
|
"https://google.com/",
|
||||||
|
"https://yahoo.com/",
|
||||||
|
"https://pleroma.local/notice/9kCP7V"
|
||||||
]
|
]
|
||||||
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
|
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
|
||||||
{:ok, %Tesla.Env{status: 404, body: ""}}
|
{:ok, %Tesla.Env{status: 404, body: ""}}
|
||||||
|
|
Loading…
Reference in New Issue