title parse improvement

This commit is contained in:
Alexander Strizhakov 2020-01-29 11:13:34 +03:00
parent f1d5c0f079
commit 1f4fbe9d98
No known key found for this signature in database
GPG Key ID: 022896A53AEF1381
3 changed files with 244 additions and 1 deletions

View File

@ -48,6 +48,6 @@ defp maybe_put_title(meta, html) when meta != %{} do
defp maybe_put_title(meta, _), do: meta defp maybe_put_title(meta, _), do: meta
defp get_page_title(html) do defp get_page_title(html) do
Floki.find(html, "title") |> List.first() |> Floki.text() Floki.find(html, "html head title") |> List.first() |> Floki.text()
end end
end end

File diff suppressed because one or more lines are too long

View File

@ -85,4 +85,19 @@ test "respect only first title tag on the page" do
image: image_path image: image_path
}} }}
end end
test "takes first founded title in html head if there is html markup error" do
html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
site: nil,
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
"app:id:googleplay": "com.nytimes.android",
"app:name:googleplay": "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622"
}}
end
end end