Merge branch 'merge-ogp-twitter-parsers' into 'develop'
Merge OGP parser with TwitterCard Closes #1835 See merge request pleroma/pleroma!2642
This commit is contained in:
commit
1e49bfa9ac
|
@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- MFR policy to set global expiration for all local Create activities
|
- MFR policy to set global expiration for all local Create activities
|
||||||
|
- OGP rich media parser merged with TwitterCard
|
||||||
<details>
|
<details>
|
||||||
<summary>API Changes</summary>
|
<summary>API Changes</summary>
|
||||||
- **Breaking:** Emoji API: changed methods and renamed routes.
|
- **Breaking:** Emoji API: changed methods and renamed routes.
|
||||||
|
|
|
@ -387,7 +387,6 @@
|
||||||
ignore_tld: ["local", "localdomain", "lan"],
|
ignore_tld: ["local", "localdomain", "lan"],
|
||||||
parsers: [
|
parsers: [
|
||||||
Pleroma.Web.RichMedia.Parsers.TwitterCard,
|
Pleroma.Web.RichMedia.Parsers.TwitterCard,
|
||||||
Pleroma.Web.RichMedia.Parsers.OGP,
|
|
||||||
Pleroma.Web.RichMedia.Parsers.OEmbed
|
Pleroma.Web.RichMedia.Parsers.OEmbed
|
||||||
],
|
],
|
||||||
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
|
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
|
||||||
|
|
|
@ -2104,9 +2104,7 @@
|
||||||
description:
|
description:
|
||||||
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
|
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
|
||||||
suggestions: [
|
suggestions: [
|
||||||
Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
|
|
||||||
Pleroma.Web.RichMedia.Parsers.OEmbed,
|
Pleroma.Web.RichMedia.Parsers.OEmbed,
|
||||||
Pleroma.Web.RichMedia.Parsers.OGP,
|
|
||||||
Pleroma.Web.RichMedia.Parsers.TwitterCard
|
Pleroma.Web.RichMedia.Parsers.TwitterCard
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -105,8 +105,8 @@ defp parse_html(html), do: Floki.parse_document!(html)
|
||||||
defp maybe_parse(html) do
|
defp maybe_parse(html) do
|
||||||
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
||||||
case parser.parse(html, acc) do
|
case parser.parse(html, acc) do
|
||||||
{:ok, data} -> {:halt, data}
|
data when data != %{} -> {:halt, data}
|
||||||
{:error, _msg} -> {:cont, acc}
|
_ -> {:cont, acc}
|
||||||
end
|
end
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,22 +3,15 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
|
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
|
||||||
def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
|
def parse(data, html, prefix, key_name, value_name \\ "content") do
|
||||||
meta_data =
|
html
|
||||||
html
|
|> get_elements(key_name, prefix)
|
||||||
|> get_elements(key_name, prefix)
|
|> Enum.reduce(data, fn el, acc ->
|
||||||
|> Enum.reduce(data, fn el, acc ->
|
attributes = normalize_attributes(el, prefix, key_name, value_name)
|
||||||
attributes = normalize_attributes(el, prefix, key_name, value_name)
|
|
||||||
|
|
||||||
Map.merge(acc, attributes)
|
Map.merge(acc, attributes)
|
||||||
end)
|
end)
|
||||||
|> maybe_put_title(html)
|
|> maybe_put_title(html)
|
||||||
|
|
||||||
if Enum.empty?(meta_data) do
|
|
||||||
{:error, error_message}
|
|
||||||
else
|
|
||||||
{:ok, meta_data}
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp get_elements(html, key_name, prefix) do
|
defp get_elements(html, key_name, prefix) do
|
||||||
|
|
|
@ -7,9 +7,9 @@ def parse(html, _data) do
|
||||||
with elements = [_ | _] <- get_discovery_data(html),
|
with elements = [_ | _] <- get_discovery_data(html),
|
||||||
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
|
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
|
||||||
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
|
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
|
||||||
{:ok, oembed_data}
|
oembed_data
|
||||||
else
|
else
|
||||||
_e -> {:error, "No OEmbed data found"}
|
_e -> %{}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -3,13 +3,8 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
|
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
|
||||||
def parse(html, data) do
|
@deprecated "OGP parser is deprecated. Use TwitterCard instead."
|
||||||
Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
|
def parse(_html, _data) do
|
||||||
html,
|
%{}
|
||||||
data,
|
|
||||||
"og",
|
|
||||||
"No OGP metadata found",
|
|
||||||
"property"
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -5,18 +5,11 @@
|
||||||
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
|
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
|
||||||
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
|
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
|
||||||
|
|
||||||
@spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
|
@spec parse(list(), map()) :: map()
|
||||||
def parse(html, data) do
|
def parse(html, data) do
|
||||||
data
|
data
|
||||||
|> parse_name_attrs(html)
|
|> MetaTagsParser.parse(html, "og", "property")
|
||||||
|> parse_property_attrs(html)
|
|> MetaTagsParser.parse(html, "twitter", "name")
|
||||||
end
|
|> MetaTagsParser.parse(html, "twitter", "property")
|
||||||
|
|
||||||
defp parse_name_attrs(data, html) do
|
|
||||||
MetaTagsParser.parse(html, data, "twitter", %{}, "name")
|
|
||||||
end
|
|
||||||
|
|
||||||
defp parse_property_attrs({_, data}, html) do
|
|
||||||
MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|
||||||
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
|
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
|
||||||
|
|
||||||
test "returns error when html not contains twitter card" do
|
test "returns error when html not contains twitter card" do
|
||||||
assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
|
assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
|
||||||
{:error, "No twitter card metadata found"}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "parses twitter card with only name attributes" do
|
test "parses twitter card with only name attributes" do
|
||||||
|
@ -17,15 +16,21 @@ test "parses twitter card with only name attributes" do
|
||||||
|> Floki.parse_document!()
|
|> Floki.parse_document!()
|
||||||
|
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
%{
|
||||||
%{
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:id:googleplay" => "com.nytimes.android",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:name:googleplay" => "NYTimes",
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
||||||
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
"site" => nil,
|
||||||
"site" => nil,
|
"description" =>
|
||||||
"title" =>
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
|
"image" =>
|
||||||
}}
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
|
||||||
|
"type" => "article",
|
||||||
|
"url" =>
|
||||||
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
|
||||||
|
"title" =>
|
||||||
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
test "parses twitter card with only property attributes" do
|
test "parses twitter card with only property attributes" do
|
||||||
|
@ -34,19 +39,19 @@ test "parses twitter card with only property attributes" do
|
||||||
|> Floki.parse_document!()
|
|> Floki.parse_document!()
|
||||||
|
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
%{
|
||||||
%{
|
"card" => "summary_large_image",
|
||||||
"card" => "summary_large_image",
|
"description" =>
|
||||||
"description" =>
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
"image" =>
|
||||||
"image" =>
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
||||||
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
"image:alt" => "",
|
||||||
"image:alt" => "",
|
"title" =>
|
||||||
"title" =>
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
"url" =>
|
||||||
"url" =>
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
|
||||||
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
"type" => "article"
|
||||||
}}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
test "parses twitter card with name & property attributes" do
|
test "parses twitter card with name & property attributes" do
|
||||||
|
@ -55,23 +60,23 @@ test "parses twitter card with name & property attributes" do
|
||||||
|> Floki.parse_document!()
|
|> Floki.parse_document!()
|
||||||
|
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
%{
|
||||||
%{
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:id:googleplay" => "com.nytimes.android",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:name:googleplay" => "NYTimes",
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
||||||
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
"card" => "summary_large_image",
|
||||||
"card" => "summary_large_image",
|
"description" =>
|
||||||
"description" =>
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
"image" =>
|
||||||
"image" =>
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
||||||
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
|
"image:alt" => "",
|
||||||
"image:alt" => "",
|
"site" => nil,
|
||||||
"site" => nil,
|
"title" =>
|
||||||
"title" =>
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
"url" =>
|
||||||
"url" =>
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
|
||||||
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
"type" => "article"
|
||||||
}}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
test "respect only first title tag on the page" do
|
test "respect only first title tag on the page" do
|
||||||
|
@ -84,14 +89,17 @@ test "respect only first title tag on the page" do
|
||||||
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
|
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
|
||||||
|
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
%{
|
||||||
%{
|
"site" => "@atlasobscura",
|
||||||
"site" => "@atlasobscura",
|
"title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
|
||||||
"title" =>
|
"card" => "summary_large_image",
|
||||||
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
|
"image" => image_path,
|
||||||
"card" => "summary_large_image",
|
"description" =>
|
||||||
"image" => image_path
|
"She's the only woman veteran honored with a monument at West Point. But where was she buried?",
|
||||||
}}
|
"site_name" => "Atlas Obscura",
|
||||||
|
"type" => "article",
|
||||||
|
"url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
test "takes first founded title in html head if there is html markup error" do
|
test "takes first founded title in html head if there is html markup error" do
|
||||||
|
@ -100,14 +108,20 @@ test "takes first founded title in html head if there is html markup error" do
|
||||||
|> Floki.parse_document!()
|
|> Floki.parse_document!()
|
||||||
|
|
||||||
assert TwitterCard.parse(html, %{}) ==
|
assert TwitterCard.parse(html, %{}) ==
|
||||||
{:ok,
|
%{
|
||||||
%{
|
"site" => nil,
|
||||||
"site" => nil,
|
"title" =>
|
||||||
"title" =>
|
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
|
||||||
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
|
"app:id:googleplay" => "com.nytimes.android",
|
||||||
"app:id:googleplay" => "com.nytimes.android",
|
"app:name:googleplay" => "NYTimes",
|
||||||
"app:name:googleplay" => "NYTimes",
|
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
|
||||||
"app:url:googleplay" => "nytimes://reader/id/100000006583622"
|
"description" =>
|
||||||
}}
|
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
|
||||||
|
"image" =>
|
||||||
|
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
|
||||||
|
"type" => "article",
|
||||||
|
"url" =>
|
||||||
|
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
|
||||||
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue