Merge branch 'feature/richmedia-ttl' into 'develop'

add the rich media ttl based on image exp time

See merge request pleroma/pleroma!1438
This commit is contained in:
kaniini 2019-07-19 21:36:36 +00:00
commit 33729bbb28
8 changed files with 222 additions and 1 deletions

View File

@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text
- Admin API: changed json structure for saving config settings.
- RichMedia: parsers and their order are configured in `rich_media` config.
- RichMedia: add the rich media ttl based on image expiration time.
## [1.0.1] - 2019-07-14
### Security

View File

@ -345,7 +345,8 @@
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
]
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
config :pleroma, :media_proxy,
enabled: false,

View File

@ -0,0 +1,33 @@
# How to set rich media cache ttl based on image ttl
## Explanation
Richmedia are cached without the ttl but the rich media may have image which can expire, like aws signed url.
In such cases the old image url (expired) is returned from the media cache.
So to avoid such situation we can define a module that will set ttl based on image.
The module must adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
### Example
```exs
defmodule MyModule do
@behaviour Pleroma.Web.RichMedia.Parser.TTL
@impl Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# return ttl is unix time
parse_ttl_from_url(image_url)
end
end
```
And update the config
```exs
config :pleroma, :rich_media,
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, MyModule]
```
> For reference there is a parser for AWS signed URL `Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl`, it's enabled by default.

View File

@ -24,6 +24,7 @@ def parse(url) do
Cachex.fetch!(:rich_media_cache, url, fn _ ->
{:commit, parse_url(url)}
end)
|> set_ttl_based_on_image(url)
rescue
e ->
{:error, "Cachex error: #{inspect(e)}"}
@ -31,6 +32,50 @@ def parse(url) do
end
end
@doc """
Set the rich media cache based on the expiration time of image.
Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
## Example
defmodule MyModule do
@behaviour Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# and return ttl is unix time
parse_ttl_from_url(image_url)
end
end
Define the module in the config
config :pleroma, :rich_media,
ttl_setters: [MyModule]
"""
def set_ttl_based_on_image({:ok, data}, url) do
with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url) do
ttl = get_ttl_from_image(data, url)
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
{:ok, data}
else
_ ->
{:ok, data}
end
end
defp get_ttl_from_image(data, url) do
Pleroma.Config.get([:rich_media, :ttl_setters])
|> Enum.reduce({:ok, nil}, fn
module, {:ok, _ttl} ->
module.ttl(data, url)
_, error ->
error
end)
end
defp parse_url(url) do
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)

View File

@ -0,0 +1,52 @@
defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
@behaviour Pleroma.Web.RichMedia.Parser.TTL
@impl Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, _url) do
image = Map.get(data, :image)
if is_aws_signed_url(image) do
image
|> parse_query_params()
|> format_query_params()
|> get_expiration_timestamp()
end
end
defp is_aws_signed_url(""), do: nil
defp is_aws_signed_url(nil), do: nil
defp is_aws_signed_url(image) when is_binary(image) do
%URI{host: host, query: query} = URI.parse(image)
if String.contains?(host, "amazonaws.com") and
String.contains?(query, "X-Amz-Expires") do
image
else
nil
end
end
defp is_aws_signed_url(_), do: nil
defp parse_query_params(image) do
%URI{query: query} = URI.parse(image)
query
end
defp format_query_params(query) do
query
|> String.split(~r/&|=/)
|> Enum.chunk_every(2)
|> Map.new(fn [k, v] -> {k, v} end)
end
defp get_expiration_timestamp(params) when is_map(params) do
{:ok, date} =
params
|> Map.get("X-Amz-Date")
|> Timex.parse("{ISO:Basic:Z}")
Timex.to_unix(date) + String.to_integer(Map.get(params, "X-Amz-Expires"))
end
end

View File

@ -0,0 +1,3 @@
defmodule Pleroma.Web.RichMedia.Parser.TTL do
@callback ttl(Map.t(), String.t()) :: {:ok, Integer.t()} | {:error, String.t()}
end

5
test/fixtures/rich_media/amz.html vendored Normal file
View File

@ -0,0 +1,5 @@
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@flickr" />
<meta name="twitter:title" content="Small Island Developing States Photo Submission" />
<meta name="twitter:description" content="View the album on Flickr." />
<meta name="twitter:image" content="https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=20190716T175105Z&X-Amz-Expires=300000&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" />

View File

@ -0,0 +1,81 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do
use ExUnit.Case, async: true
test "s3 signed url is parsed correct for expiration time" do
url = "https://pleroma.social/amz"
{:ok, timestamp} =
Timex.now()
|> DateTime.truncate(:second)
|> Timex.format("{ISO:Basic:Z}")
# in seconds
valid_till = 30
metadata = construct_metadata(timestamp, valid_till, url)
expire_time =
Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till)
assert expire_time == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url)
end
test "s3 signed url is parsed and correct ttl is set for rich media" do
url = "https://pleroma.social/amz"
{:ok, timestamp} =
Timex.now()
|> DateTime.truncate(:second)
|> Timex.format("{ISO:Basic:Z}")
# in seconds
valid_till = 30
metadata = construct_metadata(timestamp, valid_till, url)
body = """
<meta name="twitter:card" content="Pleroma" />
<meta name="twitter:site" content="Pleroma" />
<meta name="twitter:title" content="Pleroma" />
<meta name="twitter:description" content="Pleroma" />
<meta name="twitter:image" content="#{Map.get(metadata, :image)}" />
"""
Tesla.Mock.mock(fn
%{
method: :get,
url: "https://pleroma.social/amz"
} ->
%Tesla.Env{status: 200, body: body}
end)
Cachex.put(:rich_media_cache, url, metadata)
Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image({:ok, metadata}, url)
{:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)
# as there is delay in setting and pulling the data from cache we ignore 1 second
assert_in_delta(valid_till * 1000, cache_ttl, 1000)
end
defp construct_s3_url(timestamp, valid_till) do
"https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{
timestamp
}&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host"
end
defp construct_metadata(timestamp, valid_till, url) do
%{
image: construct_s3_url(timestamp, valid_till),
site: "Pleroma",
title: "Pleroma",
description: "Pleroma",
url: url
}
end
end