add ignore hosts and TLDs for rich_media

This commit is contained in:
Maksim Pechnikov 2019-06-25 22:25:37 +03:00
parent 0cb8e710fb
commit 4ad15ad2a9
7 changed files with 67 additions and 21 deletions

View File

@ -330,7 +330,10 @@
config :pleroma, :mrf_subchain, match_actor: %{} config :pleroma, :mrf_subchain, match_actor: %{}
config :pleroma, :rich_media, enabled: true config :pleroma, :rich_media,
enabled: true,
ignore_hosts: [],
ignore_tld: ["local", "localdomain", "lan"]
config :pleroma, :media_proxy, config :pleroma, :media_proxy,
enabled: false, enabled: false,

View File

@ -43,7 +43,11 @@
config :pbkdf2_elixir, rounds: 1 config :pbkdf2_elixir, rounds: 1
config :tesla, adapter: Tesla.Mock config :tesla, adapter: Tesla.Mock
config :pleroma, :rich_media, enabled: false
config :pleroma, :rich_media,
enabled: false,
ignore_hosts: [],
ignore_tld: ["local", "localdomain", "lan"]
config :web_push_encryption, :vapid_details, config :web_push_encryption, :vapid_details,
subject: "mailto:administrator@example.com", subject: "mailto:administrator@example.com",

View File

@ -417,6 +417,8 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`.
## :rich_media ## :rich_media
* `enabled`: if enabled the instance will parse metadata from attached links to generate link previews * `enabled`: if enabled the instance will parse metadata from attached links to generate link previews
* `ignore_hosts`: list host which will ignore for parse metadata. default is [].
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]
## :fetch_initial_posts ## :fetch_initial_posts
* `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts * `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts

View File

@ -4,35 +4,53 @@
defmodule Pleroma.Web.RichMedia.Helpers do defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Config
alias Pleroma.HTML alias Pleroma.HTML
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser
@private_ip_regexp ~r/(127\.)|(10\.\d+\.\d+.\d+)|(192\.168\.) @validate_tld Application.get_env(:auto_linker, :opts)[:validate_tld]
|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(localhost)/
@spec validate_page_url(any()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] page_url
|> AutoLinker.Parser.url?(scheme: true, validate_tld: @validate_tld)
|> parse_uri(page_url)
end
defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
when scheme == "https" and not is_nil(authority) do
cond do cond do
Regex.match?(@private_ip_regexp, page_url) -> host in Config.get([:rich_media, :ignore_hosts], []) ->
:error :error
AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) -> get_tld(host) in Config.get([:rich_media, :ignore_tld], []) ->
URI.parse(page_url) |> validate_page_url :error
true -> true ->
:error :ok
end end
end end
defp validate_page_url(%URI{authority: nil}), do: :error
defp validate_page_url(%URI{scheme: nil}), do: :error
defp validate_page_url(%URI{}), do: :ok
defp validate_page_url(_), do: :error defp validate_page_url(_), do: :error
defp parse_uri(true, url) do
url
|> URI.parse()
|> validate_page_url
end
defp parse_uri(_, _), do: :error
defp get_tld(host) do
host
|> String.split(".")
|> Enum.reverse()
|> hd
end
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
with true <- Pleroma.Config.get([:rich_media, :enabled]), with true <- Config.get([:rich_media, :enabled]),
%Object{} = object <- Object.normalize(activity), %Object{} = object <- Object.normalize(activity),
false <- object.data["sensitive"] || false, false <- object.data["sensitive"] || false,
{:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),

View File

@ -757,6 +757,14 @@ def get("http://example.com/ogp", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end end
def get("https://example.com/ogp", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("https://pleroma.local/notice/9kCP7V", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("http://example.com/ogp-missing-data", _, _, _) do def get("http://example.com/ogp-missing-data", _, _, _) do
{:ok, {:ok,
%Tesla.Env{ %Tesla.Env{
@ -765,6 +773,14 @@ def get("http://example.com/ogp-missing-data", _, _, _) do
}} }}
end end
def get("https://example.com/ogp-missing-data", _, _, _) do
{:ok,
%Tesla.Env{
status: 200,
body: File.read!("test/fixtures/rich_media/ogp-missing-data.html")
}}
end
def get("http://example.com/malformed", _, _, _) do def get("http://example.com/malformed", _, _, _) do
{:ok, {:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}

View File

@ -312,7 +312,7 @@ test "posting a status with OGP link preview", %{conn: conn} do
conn conn
|> assign(:user, user) |> assign(:user, user)
|> post("/api/v1/statuses", %{ |> post("/api/v1/statuses", %{
"status" => "http://example.com/ogp" "status" => "https://example.com/ogp"
}) })
assert %{"id" => id, "card" => %{"title" => "The Rock"}} = json_response(conn, 200) assert %{"id" => id, "card" => %{"title" => "The Rock"}} = json_response(conn, 200)
@ -2557,7 +2557,7 @@ test "max pinned statuses", %{conn: conn, user: user, activity: activity_one} do
end end
test "returns rich-media card", %{conn: conn, user: user} do test "returns rich-media card", %{conn: conn, user: user} do
{:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp"}) {:ok, activity} = CommonAPI.post(user, %{"status" => "https://example.com/ogp"})
card_data = %{ card_data = %{
"image" => "http://ia.media-imdb.com/images/rock.jpg", "image" => "http://ia.media-imdb.com/images/rock.jpg",
@ -2589,7 +2589,7 @@ test "returns rich-media card", %{conn: conn, user: user} do
# works with private posts # works with private posts
{:ok, activity} = {:ok, activity} =
CommonAPI.post(user, %{"status" => "http://example.com/ogp", "visibility" => "direct"}) CommonAPI.post(user, %{"status" => "https://example.com/ogp", "visibility" => "direct"})
response_two = response_two =
conn conn
@ -2601,7 +2601,8 @@ test "returns rich-media card", %{conn: conn, user: user} do
end end
test "replaces missing description with an empty string", %{conn: conn, user: user} do test "replaces missing description with an empty string", %{conn: conn, user: user} do
{:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp-missing-data"}) {:ok, activity} =
CommonAPI.post(user, %{"status" => "https://example.com/ogp-missing-data"})
response = response =
conn conn

View File

@ -50,13 +50,13 @@ test "crawls valid, complete URLs" do
{:ok, activity} = {:ok, activity} =
CommonAPI.post(user, %{ CommonAPI.post(user, %{
"status" => "[test](http://example.com/ogp)", "status" => "[test](https://example.com/ogp)",
"content_type" => "text/markdown" "content_type" => "text/markdown"
}) })
Config.put([:rich_media, :enabled], true) Config.put([:rich_media, :enabled], true)
assert %{page_url: "http://example.com/ogp", rich_media: _} = assert %{page_url: "https://example.com/ogp", rich_media: _} =
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end end
@ -97,19 +97,21 @@ test "refuses to crawl URLs from posts tagged NSFW" do
test "refuses to crawl URLs of private network from posts" do test "refuses to crawl URLs of private network from posts" do
user = insert(:user) user = insert(:user)
Config.put([:rich_media, :enabled], true)
{:ok, activity} = {:ok, activity} =
CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"}) CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"})
{:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"}) {:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"})
{:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"}) {:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"})
{:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"}) {:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"})
{:ok, activity5} = CommonAPI.post(user, %{"status" => "https://pleroma.local/notice/9kCP7V"})
Config.put([:rich_media, :enabled], true)
assert %{} = Helpers.fetch_data_for_activity(activity) assert %{} = Helpers.fetch_data_for_activity(activity)
assert %{} = Helpers.fetch_data_for_activity(activity2) assert %{} = Helpers.fetch_data_for_activity(activity2)
assert %{} = Helpers.fetch_data_for_activity(activity3) assert %{} = Helpers.fetch_data_for_activity(activity3)
assert %{} = Helpers.fetch_data_for_activity(activity4) assert %{} = Helpers.fetch_data_for_activity(activity4)
assert %{} = Helpers.fetch_data_for_activity(activity5)
end end
end end