refactor processor architecture

This commit is contained in:
byte[] 2019-12-07 00:49:20 -05:00
parent e00b16ab74
commit 8a8281eaba
12 changed files with 424 additions and 242 deletions

View file

@ -23,11 +23,15 @@ services:
image: postgres:12.1
volumes:
- postgres_data:/var/lib/postgresql/data
logging:
driver: "none"
elasticsearch:
image: elasticsearch:6.8.5
volumes:
- elastic_data:/var/lib/elasticsearch
logging:
driver: "none"
ulimits:
nofile:
soft: 65536
@ -35,6 +39,8 @@ services:
redis:
image: redis:5.0.7
logging:
driver: "none"
web:
build:
@ -42,6 +48,8 @@ services:
dockerfile: ./docker/web/Dockerfile
volumes:
- .:/srv/philomena
logging:
driver: "none"
depends_on:
- app
ports:

View file

@ -0,0 +1,55 @@
defmodule Philomena.Analyzers do
@moduledoc """
Utilities for analyzing the format and various attributes of uploaded files.
"""
alias Philomena.Mime
alias Philomena.Analyzers.Gif
alias Philomena.Analyzers.Jpeg
alias Philomena.Analyzers.Png
alias Philomena.Analyzers.Svg
alias Philomena.Analyzers.Webm
@doc """
Returns an {:ok, analyzer} tuple, with the analyzer being a module capable
of analyzing this content type, or :error.
To use an analyzer, call the analyze/1 method on it with the path to the
file. It will return a map such as the following:
%{
animated?: false,
dimensions: {800, 600},
duration: 0.0,
extension: "png",
mime_type: "image/png"
}
"""
@spec analyzer(binary()) :: {:ok, module()} | :error
def analyzer(content_type)
def analyzer("image/gif"), do: {:ok, Gif}
def analyzer("image/jpeg"), do: {:ok, Jpeg}
def analyzer("image/png"), do: {:ok, Png}
def analyzer("image/svg+xml"), do: {:ok, Svg}
def analyzer("video/webm"), do: {:ok, Webm}
def analyzer(_content_type), do: :error
@doc """
Attempts a mime check and analysis on the given pathname or Plug.Upload.
"""
@spec analyze(Plug.Upload.t() | String.t()) :: {:ok, map()} | :error
def analyze(%Plug.Upload{path: path}), do: analyze(path)
def analyze(path) when is_binary(path) do
with {:ok, mime} <- Mime.file(path),
{:ok, analyzer} <- analyzer(mime)
do
{:ok, analyzer.analyze(path)}
else
error ->
error
end
end
def analyze(_path), do: :error
end

35
lib/philomena/filename.ex Normal file
View file

@ -0,0 +1,35 @@
defmodule Philomena.Filename do
@moduledoc """
Utilities for building arbitrary filenames for uploaded files.
"""
@spec build(String.t()) :: String.t()
def build(extension) do
[
time_identifier(DateTime.utc_now()),
"/",
usec_identifier(),
pid_identifier(),
".",
extension
]
|> Enum.join()
end
defp time_identifier(time) do
Enum.join([time.year, time.month, time.day], "/")
end
defp usec_identifier do
DateTime.utc_now()
|> DateTime.to_unix(:microsecond)
|> to_string()
end
defp pid_identifier do
self()
|> :erlang.pid_to_list()
|> to_string()
|> String.replace(~r/[^0-9]/, "")
end
end

View file

@ -9,11 +9,11 @@ defmodule Philomena.Images do
alias Philomena.Repo
alias Philomena.Images.Image
alias Philomena.Images.Uploader
alias Philomena.SourceChanges.SourceChange
alias Philomena.TagChanges.TagChange
alias Philomena.Tags
alias Philomena.Tags.Tag
alias Philomena.Processors
alias Philomena.Notifications
@doc """
@ -53,7 +53,7 @@ defmodule Philomena.Images do
%Image{}
|> Image.creation_changeset(attrs, attribution)
|> Image.tag_changeset(attrs, [], tags)
|> Processors.after_upload(attrs)
|> Uploader.analyze_upload(attrs)
Multi.new
|> Multi.insert(:image, image)
@ -74,7 +74,7 @@ defmodule Philomena.Images do
create_subscription(image, attribution[:user])
end)
|> Multi.run(:after, fn _repo, %{image: image} ->
Processors.after_insert(image)
Uploader.persist_upload(image)
{:ok, nil}
end)

View file

@ -53,6 +53,7 @@ defmodule Philomena.Images.Image do
field :image_format, :string
field :image_mime_type, :string
field :image_aspect_ratio, :float
field :image_is_animated, :boolean, source: :is_animated
field :ip, EctoNetwork.INET
field :fingerprint, :string
field :user_agent, :string, default: ""
@ -77,7 +78,6 @@ defmodule Philomena.Images.Image do
field :tag_editing_allowed, :boolean, default: true
field :description_editing_allowed, :boolean, default: true
field :commenting_allowed, :boolean, default: true
field :is_animated, :boolean
field :first_seen_at, :naive_datetime
field :destroyed_content, :boolean
field :hidden_image_key, :string
@ -129,13 +129,13 @@ defmodule Philomena.Images.Image do
:image, :image_name, :image_width, :image_height, :image_size,
:image_format, :image_mime_type, :image_aspect_ratio,
:image_orig_sha512_hash, :image_sha512_hash, :uploaded_image,
:is_animated
:image_is_animated
])
|> validate_required([
:image, :image_width, :image_height, :image_size,
:image_format, :image_mime_type, :image_aspect_ratio,
:image_orig_sha512_hash, :image_sha512_hash, :uploaded_image,
:is_animated
:image_is_animated
])
|> validate_number(:image_size, greater_than: 0, less_than_or_equal_to: 26214400)
|> validate_number(:image_width, greater_than: 0, less_than_or_equal_to: 32767)

View file

@ -0,0 +1,122 @@
defmodule Philomena.Images.Thumbnailer do
@moduledoc """
Prevewing and thumbnailing logic for Images.
"""
alias Philomena.DuplicateReports
alias Philomena.ImageIntensities
alias Philomena.Images.Image
alias Philomena.Processors
alias Philomena.Analyzers
alias Philomena.Sha512
alias Philomena.Repo
@versions [
thumb_tiny: {50, 50},
thumb_small: {150, 150},
thumb: {250, 250},
small: {320, 240},
medium: {800, 600},
large: {1280, 1024},
tall: {1024, 4096},
full: nil
]
def generate_thumbnails(image_id) do
image = Repo.get!(Image, image_id)
file = image_file(image)
{:ok, analysis} = Analyzers.analyze(file)
apply_edit_script(image, Processors.process(analysis, file, @versions))
recompute_sha512(image, file, &Image.thumbnail_changeset/2)
generate_dupe_reports(image)
apply_edit_script(image, Processors.post_process(analysis, file))
recompute_sha512(image, file, &Image.process_changeset/2)
end
defp apply_edit_script(image, changes),
do: Enum.map(changes, &apply_change(image, &1))
defp apply_change(image, {:intensities, intensities}),
do: ImageIntensities.create_image_intensity(image, intensities)
defp apply_change(image, {:replace_original, new_file}),
do: copy(new_file, image_file(image))
defp apply_change(image, {:thumbnails, thumbnails}),
do: Enum.map(thumbnails, &apply_thumbnail(image, image_thumb_dir(image), &1))
defp apply_thumbnail(_image, thumb_dir, {:copy, new_file, destination}),
do: copy(new_file, Path.join(thumb_dir, destination))
defp apply_thumbnail(image, thumb_dir, {:symlink_original, destination}),
do: symlink(image_file(image), Path.join(thumb_dir, destination))
defp recompute_sha512(image, file, changeset_fn),
do: Repo.update!(changeset_fn.(image, %{"image_sha512_hash" => Sha512.file(file)}))
defp generate_dupe_reports(image),
do: DuplicateReports.generate_reports(image)
# Copy from source to destination, creating parent directories along
# the way and setting the appropriate permission bits when necessary.
defp copy(source, destination) do
prepare_dir(destination)
File.rm(destination)
File.cp!(source, destination)
set_perms(destination)
end
# Try to handle filesystems that don't support symlinks
# by falling back to a copy.
defp symlink(source, destination) do
source = Path.absname(source)
prepare_dir(destination)
case File.ln_s(source, destination) do
:ok ->
set_perms(destination)
_err ->
copy(source, destination)
end
end
# 0o644 = (S_IRUSR | S_IWUSR) | S_IRGRP | S_IROTH
defp set_perms(destination),
do: File.chmod(destination, 0o644)
# Prepare the directory by creating it if it does not yet exist.
defp prepare_dir(destination) do
destination
|> Path.dirname()
|> File.mkdir_p!()
end
defp image_file(%Image{image: image}),
do: Path.join(image_file_root(), image)
defp image_thumb_dir(%Image{created_at: created_at, id: id}),
do: Path.join([image_thumbnail_root(), time_identifier(created_at), to_string(id)])
defp time_identifier(time),
do: Enum.join([time.year, time.month, time.day], "/")
defp image_file_root,
do: Application.get_env(:philomena, :image_file_root)
defp image_thumbnail_root,
do: Application.get_env(:philomena, :image_file_root) <> "/thumbs"
end

View file

@ -0,0 +1,20 @@
defmodule Philomena.Images.Uploader do
@moduledoc """
Upload and processing callback logic for Images.
"""
alias Philomena.Images.Image
alias Philomena.Uploader
def analyze_upload(image, params) do
Uploader.analyze_upload(image, "image", params["image"], &Image.image_changeset/2)
end
def persist_upload(image) do
Uploader.persist_upload(image, image_file_root(), "image")
end
defp image_file_root do
Application.get_env(:philomena, :image_file_root)
end
end

View file

@ -1,16 +1,37 @@
defmodule Philomena.Mime do
@type mime :: String.t()
@doc """
Gets the MIME type of the given pathname.
Gets the mime type of the given pathname.
"""
@spec file(String.t()) :: {:ok, binary()} | :error
@spec file(String.t()) :: {:ok, mime()} | :error
def file(path) do
System.cmd("file", ["-b", "--mime-type", path])
|> case do
{output, 0} ->
{:ok, String.trim(output)}
true_mime(String.trim(output))
_error ->
:error
end
end
@doc """
Provides the "true" content type of this file.
Some files are identified incorrectly as a mime type they should not be.
These incorrect mime types (and their "corrected") versions are:
- image/svg -> image/svg+xml
- audio/webm -> video/webm
"""
@spec true_mime(String.t()) :: {:ok, mime()}
def true_mime("image/svg"), do: {:ok, "image/svg+xml"}
def true_mime("audio/webm"), do: {:ok, "video/webm"}
def true_mime(mime)
when mime in ~W(image/gif image/jpeg image/png image/svg+xml video/webm),
do: {:ok, mime}
def true_mime(_mime), do: :error
end

View file

@ -1,225 +1,69 @@
defmodule Philomena.Processors do
alias Philomena.Images.Image
alias Philomena.DuplicateReports
alias Philomena.ImageIntensities
alias Philomena.Repo
alias Philomena.Mime
alias Philomena.Sha512
@moduledoc """
Utilities for processing uploads.
def mimes(type) do
%{
"image/gif" => "image/gif",
"image/jpeg" => "image/jpeg",
"image/png" => "image/png",
"image/svg+xml" => "image/svg+xml",
"video/webm" => "video/webm",
"image/svg" => "image/svg+xml",
"audio/webm" => "video/webm"
}
|> Map.get(type)
Processors have 3 methods available:
- process/3:
Takes an analysis, file path, and version list and generates an
"edit script" that represents how to store this file according to the
given version list. See Philomena.Images.Thumbnailer for more
information on how this works.
- post_process/2:
Takes an analysis and file path and performs optimizations on the
upload. See Philomena.Images.Thumbnailer for more information on how this
works.
- intensities/2:
Takes an analysis and file path and generates an intensities map
appropriate for use by Philomena.DuplicateReports.
"""
alias Philomena.Processors.Gif
alias Philomena.Processors.Jpeg
alias Philomena.Processors.Png
alias Philomena.Processors.Svg
alias Philomena.Processors.Webm
@doc """
Returns a processor, with the processor being a module capable
of processing this content type, or nil.
"""
@spec processor(String.t()) :: module() | nil
def processor(content_type)
def processor("image/gif"), do: Gif
def processor("image/jpeg"), do: Jpeg
def processor("image/png"), do: Png
def processor("image/svg+xml"), do: Svg
def processor("video/webm"), do: Webm
def processor(_content_type), do: nil
@doc """
Takes an analyzer, file path, and version list and runs the appropriate
processor's process/3.
"""
@spec process(map(), String.t(), keyword) :: map()
def process(analysis, file, versions) do
processor(analysis.mime_type).process(analysis, file, versions)
end
def analyzers(type) do
%{
"image/gif" => Philomena.Analyzers.Gif,
"image/jpeg" => Philomena.Analyzers.Jpeg,
"image/png" => Philomena.Analyzers.Png,
"image/svg+xml" => Philomena.Analyzers.Svg,
"video/webm" => Philomena.Analyzers.Webm
}
|> Map.get(type)
@doc """
Takes an analyzer and file path and runs the appropriate processor's
post_process/2.
"""
@spec post_process(map(), String.t()) :: map()
def post_process(analysis, file) do
processor(analysis.mime_type).post_process(analysis, file)
end
def processors(type) do
%{
"image/gif" => Philomena.Processors.Gif,
"image/jpeg" => Philomena.Processors.Jpeg,
"image/png" => Philomena.Processors.Png,
"image/svg+xml" => Philomena.Processors.Svg,
"video/webm" => Philomena.Processors.Webm
}
|> Map.get(type)
end
@versions [
thumb_tiny: {50, 50},
thumb_small: {150, 150},
thumb: {250, 250},
small: {320, 240},
medium: {800, 600},
large: {1280, 1024},
tall: {1024, 4096},
full: nil
]
def after_upload(image, params) do
with upload when not is_nil(upload) <- params["image"],
file <- upload.path,
{:ok, mime} <- Mime.file(file),
mime <- mimes(mime),
analyzer when not is_nil(analyzer) <- analyzers(mime),
analysis <- analyzer.analyze(file),
changes <- analysis_to_changes(analysis, file, upload.filename)
do
image
|> Image.image_changeset(changes)
else
_ ->
image
|> Image.image_changeset(%{})
end
end
def after_insert(image) do
file = image_file(image)
dir = Path.dirname(file)
File.mkdir_p!(dir)
File.cp!(image.uploaded_image, file)
end
def process_image(image_id) do
image = Repo.get!(Image, image_id)
mime = image.image_mime_type
file = image_file(image)
analyzer = analyzers(mime)
analysis = analyzer.analyze(file)
processor = processors(mime)
process = processor.process(analysis, file, @versions)
apply_edit_script(image, process)
sha512 = Sha512.file(file)
changeset = Image.thumbnail_changeset(image, %{"image_sha512_hash" => sha512})
image = Repo.update!(changeset)
spawn fn -> DuplicateReports.generate_reports(image) end
process = processor.post_process(analysis, file)
apply_edit_script(image, process)
sha512 = Sha512.file(file)
changeset = Image.process_changeset(image, %{"image_sha512_hash" => sha512})
Repo.update!(changeset)
end
defp apply_edit_script(image, changes) do
for change <- changes do
apply_change(image, change)
end
end
defp apply_change(image, {:intensities, intensities}) do
ImageIntensities.create_image_intensity(image, intensities)
end
defp apply_change(image, {:replace_original, new_file}) do
file = image_file(image)
File.cp(new_file, file)
File.chmod(file, 0o755)
end
defp apply_change(image, {:thumbnails, thumbnails}) do
thumb_dir = image_thumb_dir(image)
for thumbnail <- thumbnails do
apply_thumbnail(image, thumb_dir, thumbnail)
end
end
defp apply_thumbnail(_image, thumb_dir, {:copy, new_file, destination}) do
new_destination = Path.join([thumb_dir, destination])
dir = Path.dirname(new_destination)
File.mkdir_p!(dir)
File.cp!(new_file, new_destination)
File.chmod!(new_destination, 0o755)
end
defp apply_thumbnail(image, thumb_dir, {:symlink_original, destination}) do
file = Path.absname(image_file(image))
new_destination = Path.join([thumb_dir, destination])
dir = Path.dirname(new_destination)
File.mkdir_p!(dir)
File.rm(new_destination)
platform_symlink(file, new_destination)
File.chmod(new_destination, 0o755)
end
defp platform_symlink(source, destination) do
case File.ln_s(source, destination) do
:ok -> :ok
_err -> File.cp!(source, destination)
end
end
defp analysis_to_changes(analysis, file, upload_name) do
{width, height} = analysis.dimensions
{:ok, %{size: size}} = File.stat(file)
sha512 = Sha512.file(file)
filename = build_filename(analysis.extension)
%{
"image" => filename,
"image_name" => upload_name,
"image_width" => width,
"image_height" => height,
"image_size" => size,
"image_format" => analysis.extension,
"image_mime_type" => analysis.mime_type,
"image_aspect_ratio" => aspect_ratio(width, height),
"image_orig_sha512_hash" => sha512,
"image_sha512_hash" => sha512,
"is_animated" => analysis.animated?,
"uploaded_image" => file
}
end
defp aspect_ratio(_, 0), do: 0.0
defp aspect_ratio(w, h), do: w / h
defp image_file(image) do
Path.join([image_file_root(), image.image])
end
defp image_thumb_dir(image) do
Path.join([image_thumbnail_root(), time_identifier(image.created_at), to_string(image.id)])
end
defp build_filename(extension) do
[
time_identifier(DateTime.utc_now()),
"/",
usec_identifier(),
pid_identifier(),
".",
extension
]
|> Enum.join()
end
defp time_identifier(time) do
Enum.join([time.year, time.month, time.day], "/")
end
defp usec_identifier do
DateTime.utc_now()
|> DateTime.to_unix(:microsecond)
|> to_string()
end
defp pid_identifier do
self()
|> :erlang.pid_to_list()
|> to_string()
|> String.replace(~r/[^0-9]/, "")
end
defp image_file_root do
Application.get_env(:philomena, :image_file_root)
end
defp image_thumbnail_root do
image_file_root() <> "/thumbs"
@doc """
Takes an analyzer and file path and runs the appropriate processor's
intensities/2.
"""
@spec intensities(map(), String.t()) :: map()
def intensities(analysis, file) do
processor(analysis.mime_type).intensities(analysis, file)
end
end

View file

@ -32,7 +32,7 @@ defmodule Philomena.Servers.ImageProcessor do
end
defp process(image_id) do
Philomena.Processors.process_image(image_id)
Philomena.Images.Thumbnailer.generate_thumbnails(image_id)
rescue
_ ->
nil

86
lib/philomena/uploader.ex Normal file
View file

@ -0,0 +1,86 @@
defmodule Philomena.Uploader do
@moduledoc """
Upload and processing callback logic for image files.
"""
alias Philomena.Filename
alias Philomena.Analyzers
alias Philomena.Sha512
@doc """
Performs analysis of the passed Plug.Upload, and invokes a changeset
callback on the model or changeset passed in with attributes set on
the field_name.
"""
@spec analyze_upload(any(), String.t(), Plug.Upload.t(), (any(), map() -> Ecto.Changeset.t())) :: Ecto.Changeset.t()
def analyze_upload(model_or_changeset, field_name, upload_parameter, changeset_fn) do
with {:ok, analysis} <- Analyzers.analyze(upload_parameter),
analysis <- extra_attributes(analysis, upload_parameter)
do
attributes =
%{
"name" => analysis.name,
"width" => analysis.width,
"height" => analysis.height,
"size" => analysis.size,
"format" => analysis.extension,
"mime_type" => analysis.mime_type,
"aspect_ratio" => analysis.aspect_ratio,
"orig_sha512_hash" => analysis.sha512,
"sha512_hash" => analysis.sha512,
"is_animated" => analysis.animated?
}
|> prefix_attributes(field_name)
|> Map.put(field_name, analysis.new_name)
|> Map.put(upload_key(field_name), upload_parameter.path)
changeset_fn.(model_or_changeset, attributes)
else
_error ->
changeset_fn.(model_or_changeset, %{})
end
end
@doc """
Writes the file to permanent storage. This should be the last step in the
transaction.
"""
@spec persist_upload(any(), String.t(), String.t()) :: any()
def persist_upload(model, file_root, field_name) do
source = Map.get(model, String.to_existing_atom(upload_key(field_name)))
dest = Map.get(model, String.to_existing_atom(field_name))
target = Path.join(file_root, dest)
dir = Path.dirname(target)
# Create the target directory if it doesn't exist yet,
# then write the file.
File.mkdir_p!(dir)
File.cp!(source, target)
end
defp extra_attributes(analysis, %Plug.Upload{path: path, filename: filename}) do
{width, height} = analysis.dimensions
aspect_ratio = aspect_ratio(width, height)
stat = File.stat!(path)
sha512 = Sha512.file(path)
new_name = Filename.build(analysis.extension)
analysis
|> Map.put(:size, stat.size)
|> Map.put(:name, filename)
|> Map.put(:width, width)
|> Map.put(:height, height)
|> Map.put(:sha512, sha512)
|> Map.put(:new_name, new_name)
|> Map.put(:aspect_ratio, aspect_ratio)
end
defp aspect_ratio(_, 0), do: 0.0
defp aspect_ratio(w, h), do: w / h
defp prefix_attributes(map, prefix),
do: Map.new(map, fn {key, value} -> {"#{prefix}_#{key}", value} end)
defp upload_key(field_name), do: "uploaded_#{field_name}"
end

View file

@ -1,4 +1,5 @@
defmodule PhilomenaWeb.ImageReverse do
alias Philomena.Analyzers
alias Philomena.Processors
alias Philomena.DuplicateReports
alias Philomena.Repo
@ -7,8 +8,6 @@ defmodule PhilomenaWeb.ImageReverse do
def images(image_params) do
image_params
|> Map.get("image")
|> Map.get(:path)
|> mime()
|> analyze()
|> intensities()
|> case do
@ -26,22 +25,14 @@ defmodule PhilomenaWeb.ImageReverse do
end
end
defp mime(file) do
{:ok, mime} = Philomena.Mime.file(file)
{mime, file}
end
defp analyze({mime, file}) do
case Processors.analyzers(mime) do
nil -> :error
a -> {a.analyze(file), mime, file}
end
defp analyze(%Plug.Upload{path: path}) do
{:ok, analysis} = Analyzers.analyze(path)
{analysis, path}
end
defp intensities(:error), do: :error
defp intensities({analysis, mime, file}) do
{analysis, Processors.processors(mime).intensities(analysis, file)}
defp intensities({analysis, path}) do
{analysis, Processors.intensities(analysis, path)}
end
# The distance metric is taxicab distance, not Euclidean,