Merge branch 'merge-ogp-twitter-parsers' into 'develop'

Merge OGP parser with TwitterCard Closes #1835 See merge request pleroma/pleroma!2642
2020-06-15 12:41:48 +00:00 · 2020-06-15 12:41:48 +00:00 · 1e49bfa9ac
parent 448e93ce2c bd63089a63
commit 1e49bfa9ac
9 changed files with 92 additions and 99 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Changed
 - MFR policy to set global expiration for all local Create activities
 - OGP rich media parser merged with TwitterCard
 <details>
  <summary>API Changes</summary>
 - **Breaking:** Emoji API: changed methods and renamed routes.
--- a/config/config.exs
+++ b/config/config.exs
@ -387,7 +387,6 @@
  ignore_tld: ["local", "localdomain", "lan"],
  parsers: [
    Pleroma.Web.RichMedia.Parsers.TwitterCard,
    Pleroma.Web.RichMedia.Parsers.OGP,
    Pleroma.Web.RichMedia.Parsers.OEmbed
  ],
  ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
--- a/config/description.exs
+++ b/config/description.exs
@ -2104,9 +2104,7 @@
        description:
          "List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
        suggestions: [
          Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
          Pleroma.Web.RichMedia.Parsers.OEmbed,
          Pleroma.Web.RichMedia.Parsers.OGP,
          Pleroma.Web.RichMedia.Parsers.TwitterCard
        ]
      },
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@ -105,8 +105,8 @@ defp parse_html(html), do: Floki.parse_document!(html)
  defp maybe_parse(html) do
    Enum.reduce_while(parsers(), %{}, fn parser, acc ->
      case parser.parse(html, acc) do
-        {:ok, data} -> {:halt, data}
+        data when data != %{} -> {:halt, data}
-        {:error, _msg} -> {:cont, acc}
+        _ -> {:cont, acc}
      end
    end)
  end
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@ -3,22 +3,15 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
-  def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
+  def parse(data, html, prefix, key_name, value_name \\ "content") do
-    meta_data =
+    html
-      html
+    |> get_elements(key_name, prefix)
-      |> get_elements(key_name, prefix)
+    |> Enum.reduce(data, fn el, acc ->
-      |> Enum.reduce(data, fn el, acc ->
+      attributes = normalize_attributes(el, prefix, key_name, value_name)
        attributes = normalize_attributes(el, prefix, key_name, value_name)
-        Map.merge(acc, attributes)
+      Map.merge(acc, attributes)
-      end)
+    end)
-      |> maybe_put_title(html)
+    |> maybe_put_title(html)
    if Enum.empty?(meta_data) do
      {:error, error_message}
    else
      {:ok, meta_data}
    end
  end
  defp get_elements(html, key_name, prefix) do
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@ -7,9 +7,9 @@ def parse(html, _data) do
    with elements = [_ | _] <- get_discovery_data(html),
         oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
         {:ok, oembed_data} <- get_oembed_data(oembed_url) do
-      {:ok, oembed_data}
+      oembed_data
    else
-      _e -> {:error, "No OEmbed data found"}
+      _e -> %{}
    end
  end
--- a/lib/pleroma/web/rich_media/parsers/ogp.ex
+++ b/lib/pleroma/web/rich_media/parsers/ogp.ex
@ -3,13 +3,8 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 defmodule Pleroma.Web.RichMedia.Parsers.OGP do
-  def parse(html, data) do
+  @deprecated "OGP parser is deprecated. Use TwitterCard instead."
-    Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
+  def parse(_html, _data) do
-      html,
+    %{}
      data,
      "og",
      "No OGP metadata found",
      "property"
    )
  end
 end
--- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@ -5,18 +5,11 @@
 defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
  alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
-  @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+  @spec parse(list(), map()) :: map()
  def parse(html, data) do
    data
-    |> parse_name_attrs(html)
+    |> MetaTagsParser.parse(html, "og", "property")
-    |> parse_property_attrs(html)
+    |> MetaTagsParser.parse(html, "twitter", "name")
-  end
+    |> MetaTagsParser.parse(html, "twitter", "property")
  defp parse_name_attrs(data, html) do
    MetaTagsParser.parse(html, data, "twitter", %{}, "name")
  end
  defp parse_property_attrs({_, data}, html) do
    MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
  end
 end
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
  alias Pleroma.Web.RichMedia.Parsers.TwitterCard
  test "returns error when html not contains twitter card" do
-    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
+    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
             {:error, "No twitter card metadata found"}
  end
  test "parses twitter card with only name attributes" do
@ -17,15 +16,21 @@ test "parses twitter card with only name attributes" do
      |> Floki.parse_document!()
    assert TwitterCard.parse(html, %{}) ==
-             {:ok,
+             %{
-              %{
+               "app:id:googleplay" => "com.nytimes.android",
-                "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
-                "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
-                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+               "site" => nil,
-                "site" => nil,
+               "description" =>
-                "title" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
+               "image" =>
-              }}
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
               "type" => "article",
               "url" =>
                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
               "title" =>
                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
             }
  end
  test "parses twitter card with only property attributes" do
@ -34,19 +39,19 @@ test "parses twitter card with only property attributes" do
      |> Floki.parse_document!()
    assert TwitterCard.parse(html, %{}) ==
-             {:ok,
+             %{
-              %{
+               "card" => "summary_large_image",
-                "card" => "summary_large_image",
+               "description" =>
-                "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
-                "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt" => "",
-                "image:alt" => "",
+               "title" =>
-                "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "url" =>
-                "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+               "type" => "article"
-              }}
+             }
  end
  test "parses twitter card with name & property attributes" do
@ -55,23 +60,23 @@ test "parses twitter card with name & property attributes" do
      |> Floki.parse_document!()
    assert TwitterCard.parse(html, %{}) ==
-             {:ok,
+             %{
-              %{
+               "app:id:googleplay" => "com.nytimes.android",
-                "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
-                "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
-                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+               "card" => "summary_large_image",
-                "card" => "summary_large_image",
+               "description" =>
-                "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
-                "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt" => "",
-                "image:alt" => "",
+               "site" => nil,
-                "site" => nil,
+               "title" =>
-                "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "url" =>
-                "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+               "type" => "article"
-              }}
+             }
  end
  test "respect only first title tag on the page" do
@ -84,14 +89,17 @@ test "respect only first title tag on the page" do
      File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
    assert TwitterCard.parse(html, %{}) ==
-             {:ok,
+             %{
-              %{
+               "site" => "@atlasobscura",
-                "site" => "@atlasobscura",
+               "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
-                "title" =>
+               "card" => "summary_large_image",
-                  "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
+               "image" => image_path,
-                "card" => "summary_large_image",
+               "description" =>
-                "image" => image_path
+                 "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
-              }}
+               "site_name" => "Atlas Obscura",
               "type" => "article",
               "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
             }
  end
  test "takes first founded title in html head if there is html markup error" do
@ -100,14 +108,20 @@ test "takes first founded title in html head if there is html markup error" do
      |> Floki.parse_document!()
    assert TwitterCard.parse(html, %{}) ==
-             {:ok,
+             %{
-              %{
+               "site" => nil,
-                "site" => nil,
+               "title" =>
-                "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+               "app:id:googleplay" => "com.nytimes.android",
-                "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
-                "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
-                "app:url:googleplay" => "nytimes://reader/id/100000006583622"
+               "description" =>
-              }}
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
               "image" =>
                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
               "type" => "article",
               "url" =>
                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
             }
  end
 end