diff --git a/changelog.d/rich_media_tests.skip b/changelog.d/rich_media_tests.skip new file mode 100644 index 000000000..e69de29bb diff --git a/lib/pleroma/caching.ex b/lib/pleroma/caching.ex index eb0588708..796a465af 100644 --- a/lib/pleroma/caching.ex +++ b/lib/pleroma/caching.ex @@ -8,10 +8,13 @@ defmodule Pleroma.Caching do @callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()} @callback fetch!(Cachex.cache(), any(), function() | nil) :: any() + @callback fetch(Cachex.cache(), any(), function() | nil) :: + {atom(), any()} | {atom(), any(), any()} # @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback stream!(Cachex.cache(), any()) :: Enumerable.t() @callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} + @callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} @callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback execute!(Cachex.cache(), function()) :: any() @callback get_and_update(Cachex.cache(), any(), function()) :: diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 1501776d9..a711dc436 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -18,53 +18,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do recv_timeout: 2_000 ] - @spec validate_page_url(URI.t() | binary()) :: :ok | :error - defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) - - page_url - |> Linkify.Parser.url?(validate_tld: validate_tld) - |> parse_uri(page_url) - end - - defp validate_page_url(%URI{host: host, scheme: "https"}) do - cond do - Linkify.Parser.ip?(host) -> - :error - - host in @config_impl.get([:rich_media, :ignore_hosts], []) -> - :error - - get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> - :error - - true -> - :ok - end - end - - defp validate_page_url(_), do: :error - - defp parse_uri(true, url) do - url - |> URI.parse() - |> validate_page_url - end - - defp parse_uri(_, _), do: :error - - defp get_tld(host) do - host - |> String.split(".") - |> Enum.reverse() - |> hd - end - def fetch_data_for_object(object) do with true <- @config_impl.get([:rich_media, :enabled]), {:ok, page_url} <- HTML.extract_first_external_url_from_object(object), - :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} else diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 837182d8a..a73fbc4b9 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -6,6 +6,7 @@ defmodule Pleroma.Web.RichMedia.Parser do require Logger @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) defp parsers do Pleroma.Config.get([:rich_media, :parsers]) @@ -13,70 +14,66 @@ defp parsers do def parse(nil), do: {:error, "No URL provided"} - if Pleroma.Config.get(:env) == :test do - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url), do: parse_url(url) - else - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url) do - with {:ok, data} <- get_cached_or_parse(url), - {:ok, _} <- set_ttl_based_on_image(data, url) do - {:ok, data} - end + @spec parse(String.t()) :: {:ok, map()} | {:error, any()} + def parse(url) do + with :ok <- validate_page_url(url), + {:ok, data} <- get_cached_or_parse(url), + {:ok, _} <- set_ttl_based_on_image(data, url) do + {:ok, data} end + end - defp get_cached_or_parse(url) do - case @cachex.fetch(:rich_media_cache, url, fn -> - case parse_url(url) do - {:ok, _} = res -> - {:commit, res} + defp get_cached_or_parse(url) do + case @cachex.fetch(:rich_media_cache, url, fn -> + case parse_url(url) do + {:ok, _} = res -> + {:commit, res} - {:error, reason} = e -> - # Unfortunately we have to log errors here, instead of doing that - # along with ttl setting at the bottom. Otherwise we can get log spam - # if more than one process was waiting for the rich media card - # while it was generated. Ideally we would set ttl here as well, - # so we don't override it number_of_waiters_on_generation - # times, but one, obviously, can't set ttl for not-yet-created entry - # and Cachex doesn't support returning ttl from the fetch callback. - log_error(url, reason) - {:commit, e} - end - end) do - {action, res} when action in [:commit, :ok] -> - case res do - {:ok, _data} = res -> - res + {:error, reason} = e -> + # Unfortunately we have to log errors here, instead of doing that + # along with ttl setting at the bottom. Otherwise we can get log spam + # if more than one process was waiting for the rich media card + # while it was generated. Ideally we would set ttl here as well, + # so we don't override it number_of_waiters_on_generation + # times, but one, obviously, can't set ttl for not-yet-created entry + # and Cachex doesn't support returning ttl from the fetch callback. + log_error(url, reason) + {:commit, e} + end + end) do + {action, res} when action in [:commit, :ok] -> + case res do + {:ok, _data} = res -> + res - {:error, reason} = e -> - if action == :commit, do: set_error_ttl(url, reason) - e - end + {:error, reason} = e -> + if action == :commit, do: set_error_ttl(url, reason) + e + end - {:error, e} -> - {:error, {:cachex_error, e}} - end + {:error, e} -> + {:error, {:cachex_error, e}} end + end - defp set_error_ttl(_url, :body_too_large), do: :ok - defp set_error_ttl(_url, {:content_type, _}), do: :ok + defp set_error_ttl(_url, :body_too_large), do: :ok + defp set_error_ttl(_url, {:content_type, _}), do: :ok - # The TTL is not set for the errors above, since they are unlikely to change - # with time + # The TTL is not set for the errors above, since they are unlikely to change + # with time - defp set_error_ttl(url, _reason) do - ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) - @cachex.expire(:rich_media_cache, url, ttl) - :ok - end + defp set_error_ttl(url, _reason) do + ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) + @cachex.expire(:rich_media_cache, url, ttl) + :ok + end - defp log_error(url, {:invalid_metadata, data}) do - Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) - end + defp log_error(url, {:invalid_metadata, data}) do + Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) + end - defp log_error(url, reason) do - Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) - end + defp log_error(url, reason) do + Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) end @doc """ @@ -166,4 +163,46 @@ defp clean_parsed_data(data) do end) |> Map.new() end + + @spec validate_page_url(URI.t() | binary()) :: :ok | :error + defp validate_page_url(page_url) when is_binary(page_url) do + validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) + + page_url + |> Linkify.Parser.url?(validate_tld: validate_tld) + |> parse_uri(page_url) + end + + defp validate_page_url(%URI{host: host, scheme: "https"}) do + cond do + Linkify.Parser.ip?(host) -> + :error + + host in @config_impl.get([:rich_media, :ignore_hosts], []) -> + :error + + get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> + :error + + true -> + :ok + end + end + + defp validate_page_url(_), do: :error + + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end end diff --git a/test/fixtures/rich_media/oembed.html b/test/fixtures/rich_media/oembed.html index 55f17004b..5429630d0 100644 --- a/test/fixtures/rich_media/oembed.html +++ b/test/fixtures/rich_media/oembed.html @@ -1,3 +1,3 @@ diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs index 7bbe46cbe..f95f15ec3 100644 --- a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -336,13 +336,7 @@ test "fake statuses' preview card is not cached", %{conn: conn} do path -> Pleroma.Test.StaticConfig.get(path) end) - Tesla.Mock.mock(fn - %{ - method: :get, - url: "https://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs index 44d40269c..c8b3cb391 100644 --- a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs +++ b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs @@ -49,6 +49,7 @@ test "it displays a chat message" do :chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"} cache, key -> NullCache.get(cache, key) end) + |> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end) chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref) diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs index bf7372476..13d2341ad 100644 --- a/test/pleroma/web/rich_media/helpers_test.exs +++ b/test/pleroma/web/rich_media/helpers_test.exs @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.HelpersTest do - use Pleroma.DataCase, async: true + use Pleroma.DataCase, async: false alias Pleroma.StaticStubbedConfigMock, as: ConfigMock alias Pleroma.Web.CommonAPI @@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do import Tesla.Mock setup do - mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) ConfigMock |> stub(:get, fn diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 9064138a6..a05b89a2b 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -3,95 +3,26 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.ParserTest do - use ExUnit.Case, async: true + use Pleroma.DataCase, async: false alias Pleroma.Web.RichMedia.Parser + import Tesla.Mock + setup do - Tesla.Mock.mock(fn - %{ - method: :get, - url: "http://example.com/ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} - - %{ - method: :get, - url: "http://example.com/non-ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} - - %{ - method: :get, - url: "http://example.com/ogp-missing-title" - } -> - %Tesla.Env{ - status: 200, - body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") - } - - %{ - method: :get, - url: "http://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - - %{ - method: :get, - url: "http://example.com/oembed" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} - - %{ - method: :get, - url: "http://example.com/oembed.json" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} - - %{method: :get, url: "http://example.com/empty"} -> - %Tesla.Env{status: 200, body: "hello"} - - %{method: :get, url: "http://example.com/malformed"} -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} - - %{method: :get, url: "http://example.com/error"} -> - {:error, :overload} - - %{ - method: :head, - url: "http://example.com/huge-page" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] - } - - %{ - method: :head, - url: "http://example.com/pdf-file" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] - } - - %{method: :head} -> - %Tesla.Env{status: 404, body: "", headers: []} - end) - - :ok + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) end test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + assert {:error, _} = Parser.parse("https://example.com/empty") end test "doesn't just add a title" do - assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp") end test "parses ogp" do - assert Parser.parse("http://example.com/ogp") == + assert Parser.parse("https://example.com/ogp") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -99,12 +30,12 @@ test "parses ogp" do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp" + "url" => "https://example.com/ogp" }} end test "falls back to when ogp:title is missing" do - assert Parser.parse("http://example.com/ogp-missing-title") == + assert Parser.parse("https://example.com/ogp-missing-title") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -112,12 +43,12 @@ test "falls back to <title> when ogp:title is missing" do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp-missing-title" + "url" => "https://example.com/ogp-missing-title" }} end test "parses twitter card" do - assert Parser.parse("http://example.com/twitter-card") == + assert Parser.parse("https://example.com/twitter-card") == {:ok, %{ "card" => "summary", @@ -125,12 +56,12 @@ test "parses twitter card" do "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission", "description" => "View the album on Flickr.", - "url" => "http://example.com/twitter-card" + "url" => "https://example.com/twitter-card" }} end test "parses OEmbed and filters HTML tags" do - assert Parser.parse("http://example.com/oembed") == + assert Parser.parse("https://example.com/oembed") == {:ok, %{ "author_name" => "\u202E\u202D\u202Cbees\u202C", @@ -150,7 +81,7 @@ test "parses OEmbed and filters HTML tags" do "thumbnail_width" => 150, "title" => "Bacon Lollys", "type" => "photo", - "url" => "http://example.com/oembed", + "url" => "https://example.com/oembed", "version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc", @@ -159,18 +90,18 @@ test "parses OEmbed and filters HTML tags" do end test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + assert {:error, _} = Parser.parse("https://example.com/malformed") end test "returns error if getting page was not successful" do - assert {:error, :overload} = Parser.parse("http://example.com/error") + assert {:error, :overload} = Parser.parse("https://example.com/error") end test "does a HEAD request to check if the body is too large" do - assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page") end test "does a HEAD request to check if the body is html" do - assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file") end end diff --git a/test/support/cachex_proxy.ex b/test/support/cachex_proxy.ex index 83ae5610f..8f27986a9 100644 --- a/test/support/cachex_proxy.ex +++ b/test/support/cachex_proxy.ex @@ -26,9 +26,15 @@ defmodule Pleroma.CachexProxy do @impl true defdelegate fetch!(cache, key, func), to: Cachex + @impl true + defdelegate fetch(cache, key, func), to: Cachex + @impl true defdelegate expire_at(cache, str, num), to: Cachex + @impl true + defdelegate expire(cache, str, num), to: Cachex + @impl true defdelegate exists?(cache, key), to: Cachex diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index df3371a75..f4b6f1f9f 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -1059,7 +1059,7 @@ def get("https://example.com/ogp-missing-data", _, _, _) do }} end - def get("http://example.com/malformed", _, _, _) do + def get("https://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} end @@ -1472,6 +1472,37 @@ def get("https://yahoo.com/", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}} end + def get("https://example.com/error", _, _, _), do: {:error, :overload} + + def get("https://example.com/ogp-missing-title", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + }} + end + + def get("https://example.com/oembed", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}} + end + + def get("https://example.com/oembed.json", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}} + end + + def get("https://example.com/twitter-card", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}} + end + + def get("https://example.com/non-ogp", _, _, _) do + {:ok, + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}} + end + + def get("https://example.com/empty", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: "hello"}} + end + def get(url, query, body, headers) do {:error, "Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} @@ -1545,17 +1576,41 @@ def post(url, query, body, headers) do # Most of the rich media mocks are missing HEAD requests, so we just return 404. @rich_media_mocks [ + "https://example.com/empty", + "https://example.com/error", + "https://example.com/malformed", + "https://example.com/non-ogp", + "https://example.com/oembed", + "https://example.com/oembed.json", "https://example.com/ogp", "https://example.com/ogp-missing-data", + "https://example.com/ogp-missing-title", "https://example.com/twitter-card", "https://google.com/", - "https://yahoo.com/", - "https://pleroma.local/notice/9kCP7V" + "https://pleroma.local/notice/9kCP7V", + "https://yahoo.com/" ] + def head(url, _query, _body, _headers) when url in @rich_media_mocks do {:ok, %Tesla.Env{status: 404, body: ""}} end + def head("https://example.com/pdf-file", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + }} + end + + def head("https://example.com/huge-page", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + }} + end + def head(url, query, body, headers) do {:error, "Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} diff --git a/test/support/null_cache.ex b/test/support/null_cache.ex index 9f1d45f1d..47c84174e 100644 --- a/test/support/null_cache.ex +++ b/test/support/null_cache.ex @@ -28,6 +28,9 @@ def fetch!(_, key, func) do end end + @impl true + def fetch(_, key, func), do: func.(key) + @impl true def get_and_update(_, _, func) do func.(nil) @@ -36,6 +39,9 @@ def get_and_update(_, _, func) do @impl true def expire_at(_, _, _), do: {:ok, true} + @impl true + def expire(_, _, _), do: {:ok, true} + @impl true def exists?(_, _), do: {:ok, false}