Dedupe uploads

This commit is contained in:
Sir_Boops 2018-04-15 17:37:51 -06:00
parent b5d8213e70
commit 3f0440ac3c
No known key found for this signature in database
GPG Key ID: 6DF2B9312201B66B
4 changed files with 117 additions and 49 deletions

View File

@ -8,7 +8,8 @@ config :pleroma, :instance,
name: "<%= name %>", name: "<%= name %>",
email: "<%= email %>", email: "<%= email %>",
limit: 5000, limit: 5000,
registrations_open: true registrations_open: true,
dedupe_media: true
config :pleroma, :media_proxy, config :pleroma, :media_proxy,
enabled: false, enabled: false,

View File

@ -2,20 +2,21 @@ defmodule Pleroma.Upload do
alias Ecto.UUID alias Ecto.UUID
alias Pleroma.Web alias Pleroma.Web
def store(%Plug.Upload{} = file) do def store(%Plug.Upload{} = file, should_dedupe) do
uuid = UUID.generate() content_type = get_content_type(file.path)
upload_folder = Path.join(upload_path(), uuid) uuid = get_uuid(file, should_dedupe)
File.mkdir_p!(upload_folder) name = get_name(file, uuid, content_type, should_dedupe)
result_file = Path.join(upload_folder, file.filename) upload_folder = get_upload_path(uuid, should_dedupe)
File.cp!(file.path, result_file) url_path = get_url(name, uuid, should_dedupe)
# fix content type on some image uploads File.mkdir_p!(upload_folder)
content_type = result_file = Path.join(upload_folder, name)
if file.content_type in [nil, "application/octet-stream"] do
get_content_type(file.path) if File.exists?(result_file) do
else File.rm!(file.path)
file.content_type else
end File.cp!(file.path, result_file)
end
%{ %{
"type" => "Image", "type" => "Image",
@ -23,26 +24,48 @@ def store(%Plug.Upload{} = file) do
%{ %{
"type" => "Link", "type" => "Link",
"mediaType" => content_type, "mediaType" => content_type,
"href" => url_for(Path.join(uuid, :cow_uri.urlencode(file.filename))) "href" => url_path
} }
], ],
"name" => file.filename, "name" => name
"uuid" => uuid
} }
end end
def store(%{"img" => "data:image/" <> image_data}) do def store(%{"img" => "data:image/" <> image_data}, should_dedupe) do
parsed = Regex.named_captures(~r/(?<filetype>jpeg|png|gif);base64,(?<data>.*)/, image_data) parsed = Regex.named_captures(~r/(?<filetype>jpeg|png|gif);base64,(?<data>.*)/, image_data)
data = Base.decode64!(parsed["data"]) data = Base.decode64!(parsed["data"], ignore: :whitespace)
uuid = UUID.generate() uuid = UUID.generate()
upload_folder = Path.join(upload_path(), uuid) uuidpath = Path.join(upload_path(), uuid)
uuid = UUID.generate()
File.mkdir_p!(upload_path())
File.write!(uuidpath, data)
content_type = get_content_type(uuidpath)
name =
create_name(
String.downcase(Base.encode16(:crypto.hash(:sha256, data))),
parsed["filetype"],
content_type
)
upload_folder = get_upload_path(uuid, should_dedupe)
url_path = get_url(name, uuid, should_dedupe)
File.mkdir_p!(upload_folder) File.mkdir_p!(upload_folder)
filename = Base.encode16(:crypto.hash(:sha256, data)) <> ".#{parsed["filetype"]}" result_file = Path.join(upload_folder, name)
result_file = Path.join(upload_folder, filename)
File.write!(result_file, data) if should_dedupe do
if !File.exists?(result_file) do
content_type = "image/#{parsed["filetype"]}" File.rename(uuidpath, result_file)
else
File.rm!(uuidpath)
end
else
File.rename(uuidpath, result_file)
end
%{ %{
"type" => "Image", "type" => "Image",
@ -50,11 +73,10 @@ def store(%{"img" => "data:image/" <> image_data}) do
%{ %{
"type" => "Link", "type" => "Link",
"mediaType" => content_type, "mediaType" => content_type,
"href" => url_for(Path.join(uuid, :cow_uri.urlencode(filename))) "href" => url_path
} }
], ],
"name" => filename, "name" => name
"uuid" => uuid
} }
end end
@ -63,6 +85,46 @@ def upload_path do
Keyword.fetch!(settings, :uploads) Keyword.fetch!(settings, :uploads)
end end
defp create_name(uuid, ext, type) do
if type == "application/octet-stream" do
String.downcase(Enum.join([uuid, ext], "."))
else
String.downcase(Enum.join([uuid, List.last(String.split(type, "/"))], "."))
end
end
defp get_uuid(file, should_dedupe) do
if should_dedupe do
Base.encode16(:crypto.hash(:sha256, File.read!(file.path)))
else
UUID.generate()
end
end
defp get_name(file, uuid, type, should_dedupe) do
if should_dedupe do
create_name(uuid, List.last(String.split(file.filename, ".")), type)
else
file.filename
end
end
defp get_upload_path(uuid, should_dedupe) do
if should_dedupe do
upload_path()
else
Path.join(upload_path(), uuid)
end
end
defp get_url(name, uuid, should_dedupe) do
if should_dedupe do
url_for(:cow_uri.urlencode(name))
else
url_for(Path.join(uuid, :cow_uri.urlencode(name)))
end
end
defp url_for(file) do defp url_for(file) do
"#{Web.base_url()}/media/#{file}" "#{Web.base_url()}/media/#{file}"
end end

View File

@ -492,7 +492,7 @@ def fetch_activities(recipients, opts \\ %{}) do
end end
def upload(file) do def upload(file) do
data = Upload.store(file) data = Upload.store(file, Application.get_env(:pleroma, :instance)[:dedupe_media])
Repo.insert(%Object{data: data}) Repo.insert(%Object{data: data})
end end

View File

@ -3,40 +3,45 @@ defmodule Pleroma.UploadTest do
use Pleroma.DataCase use Pleroma.DataCase
describe "Storing a file" do describe "Storing a file" do
test "copies the file to the configured folder" do test "copies the file to the configured folder with deduping" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{ file = %Plug.Upload{
content_type: "image/jpg", content_type: "image/jpg",
path: Path.absname("test/fixtures/image.jpg"), path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg" filename: "an [image.jpg"
} }
data = Upload.store(file) data = Upload.store(file, true)
assert data["name"] == "an [image.jpg"
assert List.first(data["url"])["href"] == assert data["name"] ==
"http://localhost:4001/media/#{data["uuid"]}/an%20%5Bimage.jpg" "e7a6d0cf595bff76f14c9a98b6c199539559e8b844e02e51e5efcfd1f614a2df.jpeg"
end end
test "fixes an incorrect content type" do test "copies the file to the configured folder without deduping" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{
content_type: "image/jpg",
path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file, false)
assert data["name"] == "an [image.jpg"
end
test "fixes incorrect content type" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{ file = %Plug.Upload{
content_type: "application/octet-stream", content_type: "application/octet-stream",
path: Path.absname("test/fixtures/image.jpg"), path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg" filename: "an [image.jpg"
} }
data = Upload.store(file) data = Upload.store(file, true)
assert hd(data["url"])["mediaType"] == "image/jpeg" assert hd(data["url"])["mediaType"] == "image/jpeg"
end end
test "does not modify a valid content type" do
file = %Plug.Upload{
content_type: "image/png",
path: Path.absname("test/fixtures/image.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file)
assert hd(data["url"])["mediaType"] == "image/png"
end
end end
end end