Fix atom leak in Rich Media Parser
This commit is contained in:
parent
e2793744c5
commit
520367d6fd
|
@ -377,8 +377,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
|
||||||
page_url_data = URI.parse(page_url)
|
page_url_data = URI.parse(page_url)
|
||||||
|
|
||||||
page_url_data =
|
page_url_data =
|
||||||
if rich_media[:url] != nil do
|
if is_binary(rich_media["url"]) do
|
||||||
URI.merge(page_url_data, URI.parse(rich_media[:url]))
|
URI.merge(page_url_data, URI.parse(rich_media["url"]))
|
||||||
else
|
else
|
||||||
page_url_data
|
page_url_data
|
||||||
end
|
end
|
||||||
|
@ -386,11 +386,9 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
|
||||||
page_url = page_url_data |> to_string
|
page_url = page_url_data |> to_string
|
||||||
|
|
||||||
image_url =
|
image_url =
|
||||||
if rich_media[:image] != nil do
|
if is_binary(rich_media["image"]) do
|
||||||
URI.merge(page_url_data, URI.parse(rich_media[:image]))
|
URI.merge(page_url_data, URI.parse(rich_media["image"]))
|
||||||
|> to_string
|
|> to_string
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
end
|
||||||
|
|
||||||
%{
|
%{
|
||||||
|
@ -399,8 +397,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
|
||||||
provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
|
provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
|
||||||
url: page_url,
|
url: page_url,
|
||||||
image: image_url |> MediaProxy.url(),
|
image: image_url |> MediaProxy.url(),
|
||||||
title: rich_media[:title] || "",
|
title: rich_media["title"] || "",
|
||||||
description: rich_media[:description] || "",
|
description: rich_media["description"] || "",
|
||||||
pleroma: %{
|
pleroma: %{
|
||||||
opengraph: rich_media
|
opengraph: rich_media
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.Web.RichMedia.Parser
|
alias Pleroma.Web.RichMedia.Parser
|
||||||
|
|
||||||
@spec validate_page_url(any()) :: :ok | :error
|
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
|
||||||
defp validate_page_url(page_url) when is_binary(page_url) do
|
defp validate_page_url(page_url) when is_binary(page_url) do
|
||||||
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
|
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
|
||||||
|
|
||||||
|
@ -18,8 +18,8 @@ defp validate_page_url(page_url) when is_binary(page_url) do
|
||||||
|> parse_uri(page_url)
|
|> parse_uri(page_url)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
|
defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
|
||||||
when scheme == "https" and not is_nil(authority) do
|
when is_binary(authority) do
|
||||||
cond do
|
cond do
|
||||||
host in Config.get([:rich_media, :ignore_hosts], []) ->
|
host in Config.get([:rich_media, :ignore_hosts], []) ->
|
||||||
:error
|
:error
|
||||||
|
|
|
@ -91,7 +91,7 @@ defp parse_url(url) do
|
||||||
html
|
html
|
||||||
|> parse_html()
|
|> parse_html()
|
||||||
|> maybe_parse()
|
|> maybe_parse()
|
||||||
|> Map.put(:url, url)
|
|> Map.put("url", url)
|
||||||
|> clean_parsed_data()
|
|> clean_parsed_data()
|
||||||
|> check_parsed_data()
|
|> check_parsed_data()
|
||||||
rescue
|
rescue
|
||||||
|
@ -111,8 +111,8 @@ defp maybe_parse(html) do
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp check_parsed_data(%{title: title} = data)
|
defp check_parsed_data(%{"title" => title} = data)
|
||||||
when is_binary(title) and byte_size(title) > 0 do
|
when is_binary(title) and title != "" do
|
||||||
{:ok, data}
|
{:ok, data}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -123,11 +123,7 @@ defp check_parsed_data(data) do
|
||||||
defp clean_parsed_data(data) do
|
defp clean_parsed_data(data) do
|
||||||
data
|
data
|
||||||
|> Enum.reject(fn {key, val} ->
|
|> Enum.reject(fn {key, val} ->
|
||||||
with {:ok, _} <- Jason.encode(%{key => val}) do
|
not match?({:ok, _}, Jason.encode(%{key => val}))
|
||||||
false
|
|
||||||
else
|
|
||||||
_ -> true
|
|
||||||
end
|
|
||||||
end)
|
end)
|
||||||
|> Map.new()
|
|> Map.new()
|
||||||
end
|
end
|
||||||
|
|
|
@ -29,19 +29,19 @@ defp normalize_attributes(html_node, prefix, key_name, value_name) do
|
||||||
{_tag, attributes, _children} = html_node
|
{_tag, attributes, _children} = html_node
|
||||||
|
|
||||||
data =
|
data =
|
||||||
Enum.into(attributes, %{}, fn {name, value} ->
|
Map.new(attributes, fn {name, value} ->
|
||||||
{name, String.trim_leading(value, "#{prefix}:")}
|
{name, String.trim_leading(value, "#{prefix}:")}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
%{String.to_atom(data[key_name]) => data[value_name]}
|
%{data[key_name] => data[value_name]}
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_put_title(%{title: _} = meta, _), do: meta
|
defp maybe_put_title(%{"title" => _} = meta, _), do: meta
|
||||||
|
|
||||||
defp maybe_put_title(meta, html) when meta != %{} do
|
defp maybe_put_title(meta, html) when meta != %{} do
|
||||||
case get_page_title(html) do
|
case get_page_title(html) do
|
||||||
"" -> meta
|
"" -> meta
|
||||||
title -> Map.put_new(meta, :title, title)
|
title -> Map.put_new(meta, "title", title)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
|
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
|
||||||
def parse(html, _data) do
|
def parse(html, _data) do
|
||||||
with elements = [_ | _] <- get_discovery_data(html),
|
with elements = [_ | _] <- get_discovery_data(html),
|
||||||
{:ok, oembed_url} <- get_oembed_url(elements),
|
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
|
||||||
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
|
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
|
||||||
{:ok, oembed_data}
|
{:ok, oembed_data}
|
||||||
else
|
else
|
||||||
|
@ -17,19 +17,13 @@ defp get_discovery_data(html) do
|
||||||
html |> Floki.find("link[type='application/json+oembed']")
|
html |> Floki.find("link[type='application/json+oembed']")
|
||||||
end
|
end
|
||||||
|
|
||||||
defp get_oembed_url(nodes) do
|
defp get_oembed_url([{"link", attributes, _children} | _]) do
|
||||||
{"link", attributes, _children} = nodes |> hd()
|
Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
|
||||||
|
|
||||||
{:ok, Enum.into(attributes, %{})["href"]}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp get_oembed_data(url) do
|
defp get_oembed_data(url) do
|
||||||
{:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
|
with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
|
||||||
|
Jason.decode(json)
|
||||||
{:ok, data} = Jason.decode(json)
|
end
|
||||||
|
|
||||||
data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
|
|
||||||
|
|
||||||
{:ok, data}
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -60,19 +60,19 @@ test "returns error when no metadata present" do
|
||||||
test "doesn't just add a title" do
|
test "doesn't just add a title" do
|
||||||
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
|
||||||
{:error,
|
{:error,
|
||||||
"Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"}
|
"Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"}
|
||||||
end
|
end
|
||||||
|
|
||||||
test "parses ogp" do
|
test "parses ogp" do
|
||||||
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
image: "http://ia.media-imdb.com/images/rock.jpg",
|
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
||||||
title: "The Rock",
|
"title" => "The Rock",
|
||||||
description:
|
"description" =>
|
||||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
||||||
type: "video.movie",
|
"type" => "video.movie",
|
||||||
url: "http://example.com/ogp"
|
"url" => "http://example.com/ogp"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -80,12 +80,12 @@ test "falls back to <title> when ogp:title is missing" do
|
||||||
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
image: "http://ia.media-imdb.com/images/rock.jpg",
|
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
||||||
title: "The Rock (1996)",
|
"title" => "The Rock (1996)",
|
||||||
description:
|
"description" =>
|
||||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
||||||
type: "video.movie",
|
"type" => "video.movie",
|
||||||
url: "http://example.com/ogp-missing-title"
|
"url" => "http://example.com/ogp-missing-title"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -93,12 +93,12 @@ test "parses twitter card" do
|
||||||
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
card: "summary",
|
"card" => "summary",
|
||||||
site: "@flickr",
|
"site" => "@flickr",
|
||||||
image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
|
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
|
||||||
title: "Small Island Developing States Photo Submission",
|
"title" => "Small Island Developing States Photo Submission",
|
||||||
description: "View the album on Flickr.",
|
"description" => "View the album on Flickr.",
|
||||||
url: "http://example.com/twitter-card"
|
"url" => "http://example.com/twitter-card"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -106,27 +106,28 @@ test "parses OEmbed" do
|
||||||
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
author_name: "bees",
|
"author_name" => "bees",
|
||||||
author_url: "https://www.flickr.com/photos/bees/",
|
"author_url" => "https://www.flickr.com/photos/bees/",
|
||||||
cache_age: 3600,
|
"cache_age" => 3600,
|
||||||
flickr_type: "photo",
|
"flickr_type" => "photo",
|
||||||
height: "768",
|
"height" => "768",
|
||||||
html:
|
"html" =>
|
||||||
"<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
|
"<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
|
||||||
license: "All Rights Reserved",
|
"license" => "All Rights Reserved",
|
||||||
license_id: 0,
|
"license_id" => 0,
|
||||||
provider_name: "Flickr",
|
"provider_name" => "Flickr",
|
||||||
provider_url: "https://www.flickr.com/",
|
"provider_url" => "https://www.flickr.com/",
|
||||||
thumbnail_height: 150,
|
"thumbnail_height" => 150,
|
||||||
thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
|
"thumbnail_url" =>
|
||||||
thumbnail_width: 150,
|
"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
|
||||||
title: "Bacon Lollys",
|
"thumbnail_width" => 150,
|
||||||
type: "photo",
|
"title" => "Bacon Lollys",
|
||||||
url: "http://example.com/oembed",
|
"type" => "photo",
|
||||||
version: "1.0",
|
"url" => "http://example.com/oembed",
|
||||||
web_page: "https://www.flickr.com/photos/bees/2362225867/",
|
"version" => "1.0",
|
||||||
web_page_short_url: "https://flic.kr/p/4AK2sc",
|
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
|
||||||
width: "1024"
|
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
|
||||||
|
"width" => "1024"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -19,11 +19,11 @@ test "parses twitter card with only name attributes" do
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
"app:id:googleplay": "com.nytimes.android",
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:name:googleplay": "NYTimes",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:url:googleplay": "nytimes://reader/id/100000006583622",
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
||||||
site: nil,
|
"site" => nil,
|
||||||
title:
|
"title" =>
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
@ -36,15 +36,15 @@ test "parses twitter card with only property attributes" do
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
card: "summary_large_image",
|
"card" => "summary_large_image",
|
||||||
description:
|
"description" =>
|
||||||
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
image:
|
"image" =>
|
||||||
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
||||||
"image:alt": "",
|
"image:alt" => "",
|
||||||
title:
|
"title" =>
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
||||||
url:
|
"url" =>
|
||||||
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
@ -57,19 +57,19 @@ test "parses twitter card with name & property attributes" do
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
"app:id:googleplay": "com.nytimes.android",
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:name:googleplay": "NYTimes",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:url:googleplay": "nytimes://reader/id/100000006583622",
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
||||||
card: "summary_large_image",
|
"card" => "summary_large_image",
|
||||||
description:
|
"description" =>
|
||||||
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
image:
|
"image" =>
|
||||||
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
||||||
"image:alt": "",
|
"image:alt" => "",
|
||||||
site: nil,
|
"site" => nil,
|
||||||
title:
|
"title" =>
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
||||||
url:
|
"url" =>
|
||||||
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
@ -86,11 +86,11 @@ test "respect only first title tag on the page" do
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
site: "@atlasobscura",
|
"site" => "@atlasobscura",
|
||||||
title:
|
"title" =>
|
||||||
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
|
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
|
||||||
card: "summary_large_image",
|
"card" => "summary_large_image",
|
||||||
image: image_path
|
"image" => image_path
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -102,12 +102,12 @@ test "takes first founded title in html head if there is html markup error" do
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
{:ok,
|
||||||
%{
|
%{
|
||||||
site: nil,
|
"site" => nil,
|
||||||
title:
|
"title" =>
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
|
||||||
"app:id:googleplay": "com.nytimes.android",
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:name:googleplay": "NYTimes",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:url:googleplay": "nytimes://reader/id/100000006583622"
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622"
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue