From c8d696540f7d1d6fb322ab5522647456ccef8078 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 2 Jun 2024 00:15:09 -0400 Subject: [PATCH] Split out media processing tools into PhilomenaMedia namespace --- lib/mix/tasks/upload_to_s3.ex | 2 +- lib/philomena/adverts/uploader.ex | 2 +- lib/philomena/analyzers.ex | 56 --- lib/philomena/badges/uploader.ex | 2 +- lib/philomena/filename.ex | 21 - lib/philomena/image_intensities.ex | 6 +- lib/philomena/images/thumbnailer.ex | 11 +- lib/philomena/images/uploader.ex | 2 +- lib/philomena/intensities.ex | 23 -- lib/philomena/mime.ex | 37 -- lib/philomena/objects.ex | 148 ------- lib/philomena/processors.ex | 78 ---- lib/philomena/tags/uploader.ex | 2 +- lib/philomena/uploader.ex | 125 ------ lib/philomena/users/uploader.ex | 2 +- lib/philomena_media/analyzers.ex | 71 ++++ lib/philomena_media/analyzers/analyzer.ex | 5 + .../analyzers/gif.ex | 12 +- .../analyzers/jpeg.ex | 12 +- .../analyzers/png.ex | 12 +- lib/philomena_media/analyzers/result.ex | 36 ++ .../analyzers/svg.ex | 12 +- .../analyzers/webm.ex | 12 +- lib/philomena_media/filename.ex | 36 ++ lib/philomena_media/intensities.ex | 68 ++++ lib/philomena_media/mime.ex | 67 ++++ lib/philomena_media/objects.ex | 236 ++++++++++++ lib/philomena_media/processors.ex | 202 ++++++++++ .../processors/gif.ex | 21 +- .../processors/jpeg.ex | 21 +- .../processors/png.ex | 23 +- lib/philomena_media/processors/processor.ex | 21 + .../processors/svg.ex | 21 +- .../processors/webm.ex | 21 +- lib/{philomena => philomena_media}/sha512.ex | 17 +- lib/philomena_media/uploader.ex | 360 ++++++++++++++++++ lib/philomena_web/image_reverse.ex | 4 +- 37 files changed, 1266 insertions(+), 541 deletions(-) delete mode 100644 lib/philomena/analyzers.ex delete mode 100644 lib/philomena/filename.ex delete mode 100644 lib/philomena/intensities.ex delete mode 100644 lib/philomena/mime.ex delete mode 100644 lib/philomena/objects.ex delete mode 100644 lib/philomena/processors.ex delete mode 100644 lib/philomena/uploader.ex create mode 100644 lib/philomena_media/analyzers.ex create mode 100644 lib/philomena_media/analyzers/analyzer.ex rename lib/{philomena => philomena_media}/analyzers/gif.ex (74%) rename lib/{philomena => philomena_media}/analyzers/jpeg.ex (73%) rename lib/{philomena => philomena_media}/analyzers/png.ex (74%) create mode 100644 lib/philomena_media/analyzers/result.ex rename lib/{philomena => philomena_media}/analyzers/svg.ex (71%) rename lib/{philomena => philomena_media}/analyzers/webm.ex (74%) create mode 100644 lib/philomena_media/filename.ex create mode 100644 lib/philomena_media/intensities.ex create mode 100644 lib/philomena_media/mime.ex create mode 100644 lib/philomena_media/objects.ex create mode 100644 lib/philomena_media/processors.ex rename lib/{philomena => philomena_media}/processors/gif.ex (83%) rename lib/{philomena => philomena_media}/processors/jpeg.ex (79%) rename lib/{philomena => philomena_media}/processors/png.ex (74%) create mode 100644 lib/philomena_media/processors/processor.ex rename lib/{philomena => philomena_media}/processors/svg.ex (66%) rename lib/{philomena => philomena_media}/processors/webm.ex (90%) rename lib/{philomena => philomena_media}/sha512.ex (63%) create mode 100644 lib/philomena_media/uploader.ex diff --git a/lib/mix/tasks/upload_to_s3.ex b/lib/mix/tasks/upload_to_s3.ex index ae6b22fd..87ab48d3 100644 --- a/lib/mix/tasks/upload_to_s3.ex +++ b/lib/mix/tasks/upload_to_s3.ex @@ -10,7 +10,7 @@ defmodule Mix.Tasks.UploadToS3 do } alias Philomena.Images.Thumbnailer - alias Philomena.Objects + alias PhilomenaMedia.Objects alias PhilomenaQuery.Batch import Ecto.Query diff --git a/lib/philomena/adverts/uploader.ex b/lib/philomena/adverts/uploader.ex index 5260e415..d575ebe4 100644 --- a/lib/philomena/adverts/uploader.ex +++ b/lib/philomena/adverts/uploader.ex @@ -4,7 +4,7 @@ defmodule Philomena.Adverts.Uploader do """ alias Philomena.Adverts.Advert - alias Philomena.Uploader + alias PhilomenaMedia.Uploader def analyze_upload(advert, params) do Uploader.analyze_upload(advert, "image", params["image"], &Advert.image_changeset/2) diff --git a/lib/philomena/analyzers.ex b/lib/philomena/analyzers.ex deleted file mode 100644 index 1a3961ec..00000000 --- a/lib/philomena/analyzers.ex +++ /dev/null @@ -1,56 +0,0 @@ -defmodule Philomena.Analyzers do - @moduledoc """ - Utilities for analyzing the format and various attributes of uploaded files. - """ - - alias Philomena.Mime - - alias Philomena.Analyzers.Gif - alias Philomena.Analyzers.Jpeg - alias Philomena.Analyzers.Png - alias Philomena.Analyzers.Svg - alias Philomena.Analyzers.Webm - - @doc """ - Returns an {:ok, analyzer} tuple, with the analyzer being a module capable - of analyzing this content type, or :error. - - To use an analyzer, call the analyze/1 method on it with the path to the - file. It will return a map such as the following: - - %{ - animated?: false, - dimensions: {800, 600}, - duration: 0.0, - extension: "png", - mime_type: "image/png" - } - """ - @spec analyzer(binary()) :: {:ok, module()} | :error - def analyzer(content_type) - - def analyzer("image/gif"), do: {:ok, Gif} - def analyzer("image/jpeg"), do: {:ok, Jpeg} - def analyzer("image/png"), do: {:ok, Png} - def analyzer("image/svg+xml"), do: {:ok, Svg} - def analyzer("video/webm"), do: {:ok, Webm} - def analyzer(_content_type), do: :error - - @doc """ - Attempts a mime check and analysis on the given pathname or Plug.Upload. - """ - @spec analyze(Plug.Upload.t() | String.t()) :: {:ok, map()} | :error - def analyze(%Plug.Upload{path: path}), do: analyze(path) - - def analyze(path) when is_binary(path) do - with {:ok, mime} <- Mime.file(path), - {:ok, analyzer} <- analyzer(mime) do - {:ok, analyzer.analyze(path)} - else - error -> - error - end - end - - def analyze(_path), do: :error -end diff --git a/lib/philomena/badges/uploader.ex b/lib/philomena/badges/uploader.ex index 8ab44f9f..6410bae9 100644 --- a/lib/philomena/badges/uploader.ex +++ b/lib/philomena/badges/uploader.ex @@ -4,7 +4,7 @@ defmodule Philomena.Badges.Uploader do """ alias Philomena.Badges.Badge - alias Philomena.Uploader + alias PhilomenaMedia.Uploader def analyze_upload(badge, params) do Uploader.analyze_upload(badge, "image", params["image"], &Badge.image_changeset/2) diff --git a/lib/philomena/filename.ex b/lib/philomena/filename.ex deleted file mode 100644 index ea0d230d..00000000 --- a/lib/philomena/filename.ex +++ /dev/null @@ -1,21 +0,0 @@ -defmodule Philomena.Filename do - @moduledoc """ - Utilities for building arbitrary filenames for uploaded files. - """ - - @spec build(String.t()) :: String.t() - def build(extension) do - [ - time_identifier(DateTime.utc_now()), - "/", - UUID.uuid1(), - ".", - extension - ] - |> Enum.join() - end - - defp time_identifier(time) do - Enum.join([time.year, time.month, time.day], "/") - end -end diff --git a/lib/philomena/image_intensities.ex b/lib/philomena/image_intensities.ex index 5a4d130b..34c311d5 100644 --- a/lib/philomena/image_intensities.ex +++ b/lib/philomena/image_intensities.ex @@ -36,9 +36,9 @@ defmodule Philomena.ImageIntensities do {:error, %Ecto.Changeset{}} """ - def create_image_intensity(image, attrs \\ %{}) do + def create_image_intensity(image, attrs \\ %PhilomenaMedia.Intensities{}) do %ImageIntensity{image_id: image.id} - |> ImageIntensity.changeset(attrs) + |> ImageIntensity.changeset(Map.from_struct(attrs)) |> Repo.insert() end @@ -56,7 +56,7 @@ defmodule Philomena.ImageIntensities do """ def update_image_intensity(%ImageIntensity{} = image_intensity, attrs) do image_intensity - |> ImageIntensity.changeset(attrs) + |> ImageIntensity.changeset(Map.from_struct(attrs)) |> Repo.update() end diff --git a/lib/philomena/images/thumbnailer.ex b/lib/philomena/images/thumbnailer.ex index d6d5cc8e..8c566135 100644 --- a/lib/philomena/images/thumbnailer.ex +++ b/lib/philomena/images/thumbnailer.ex @@ -3,15 +3,16 @@ defmodule Philomena.Images.Thumbnailer do Prevewing and thumbnailing logic for Images. """ + alias PhilomenaMedia.Processors + alias PhilomenaMedia.Analyzers + alias PhilomenaMedia.Uploader + alias PhilomenaMedia.Objects + alias PhilomenaMedia.Sha512 + alias Philomena.DuplicateReports alias Philomena.ImageIntensities alias Philomena.ImagePurgeWorker alias Philomena.Images.Image - alias Philomena.Processors - alias Philomena.Analyzers - alias Philomena.Uploader - alias Philomena.Objects - alias Philomena.Sha512 alias Philomena.Repo @versions [ diff --git a/lib/philomena/images/uploader.ex b/lib/philomena/images/uploader.ex index 39111e11..3c54a7db 100644 --- a/lib/philomena/images/uploader.ex +++ b/lib/philomena/images/uploader.ex @@ -5,7 +5,7 @@ defmodule Philomena.Images.Uploader do alias Philomena.Images.Thumbnailer alias Philomena.Images.Image - alias Philomena.Uploader + alias PhilomenaMedia.Uploader def analyze_upload(image, params) do Uploader.analyze_upload(image, "image", params["image"], &Image.image_changeset/2) diff --git a/lib/philomena/intensities.ex b/lib/philomena/intensities.ex deleted file mode 100644 index 250d0fea..00000000 --- a/lib/philomena/intensities.ex +++ /dev/null @@ -1,23 +0,0 @@ -defmodule Philomena.Intensities do - @doc """ - Gets the corner intensities of the given image file. - The image file must be in the PNG or JPEG format. - """ - @spec file(String.t()) :: {:ok, map()} | :error - def file(input) do - System.cmd("image-intensities", [input]) - |> case do - {output, 0} -> - [nw, ne, sw, se] = - output - |> String.trim() - |> String.split("\t") - |> Enum.map(&String.to_float/1) - - {:ok, %{nw: nw, ne: ne, sw: sw, se: se}} - - _error -> - :error - end - end -end diff --git a/lib/philomena/mime.ex b/lib/philomena/mime.ex deleted file mode 100644 index 08d5dfc1..00000000 --- a/lib/philomena/mime.ex +++ /dev/null @@ -1,37 +0,0 @@ -defmodule Philomena.Mime do - @type mime :: String.t() - - @doc """ - Gets the mime type of the given pathname. - """ - @spec file(String.t()) :: {:ok, mime()} | :error - def file(path) do - System.cmd("file", ["-b", "--mime-type", path]) - |> case do - {output, 0} -> - true_mime(String.trim(output)) - - _error -> - :error - end - end - - @doc """ - Provides the "true" content type of this file. - - Some files are identified incorrectly as a mime type they should not be. - These incorrect mime types (and their "corrected") versions are: - - - image/svg -> image/svg+xml - - audio/webm -> video/webm - """ - @spec true_mime(String.t()) :: {:ok, mime()} - def true_mime("image/svg"), do: {:ok, "image/svg+xml"} - def true_mime("audio/webm"), do: {:ok, "video/webm"} - - def true_mime(mime) - when mime in ~W(image/gif image/jpeg image/png image/svg+xml video/webm), - do: {:ok, mime} - - def true_mime(mime), do: {:unsupported_mime, mime} -end diff --git a/lib/philomena/objects.ex b/lib/philomena/objects.ex deleted file mode 100644 index 3df94142..00000000 --- a/lib/philomena/objects.ex +++ /dev/null @@ -1,148 +0,0 @@ -defmodule Philomena.Objects do - @moduledoc """ - Replication wrapper for object storage backends. - """ - alias Philomena.Mime - require Logger - - # - # Fetch a key from the storage backend and - # write it into the destination file. - # - # sobelow_skip ["Traversal.FileModule"] - @spec download_file(String.t(), String.t()) :: any() - def download_file(key, file_path) do - contents = - backends() - |> Enum.find_value(fn opts -> - ExAws.S3.get_object(opts[:bucket], key) - |> ExAws.request(opts[:config_overrides]) - |> case do - {:ok, result} -> result - _ -> nil - end - end) - - File.write!(file_path, contents.body) - end - - # - # Upload a file using a single API call, writing the - # contents from the given path to storage. - # - # sobelow_skip ["Traversal.FileModule"] - @spec put(String.t(), String.t()) :: any() - def put(key, file_path) do - {_, mime} = Mime.file(file_path) - contents = File.read!(file_path) - - run_all(fn opts -> - ExAws.S3.put_object(opts[:bucket], key, contents, content_type: mime) - |> ExAws.request!(opts[:config_overrides]) - end) - end - - # - # Upload a file using multiple API calls, writing the - # contents from the given path to storage. - # - @spec upload(String.t(), String.t()) :: any() - def upload(key, file_path) do - # Workaround for API rate limit issues on R2 - put(key, file_path) - end - - # - # Copies a key from the source to the destination, - # overwriting the destination object if its exists. - # - @spec copy(String.t(), String.t()) :: any() - def copy(source_key, dest_key) do - # Potential workaround for inconsistent PutObjectCopy on R2 - # - # run_all(fn opts-> - # ExAws.S3.put_object_copy(opts[:bucket], dest_key, opts[:bucket], source_key) - # |> ExAws.request!(opts[:config_overrides]) - # end) - - try do - file_path = Briefly.create!() - download_file(source_key, file_path) - upload(dest_key, file_path) - catch - _kind, _value -> Logger.warning("Failed to copy #{source_key} -> #{dest_key}") - end - end - - # - # Removes the key from storage. - # - @spec delete(String.t()) :: any() - def delete(key) do - run_all(fn opts -> - ExAws.S3.delete_object(opts[:bucket], key) - |> ExAws.request!(opts[:config_overrides]) - end) - end - - # - # Removes all given keys from storage. - # - @spec delete_multiple([String.t()]) :: any() - def delete_multiple(keys) do - run_all(fn opts -> - ExAws.S3.delete_multiple_objects(opts[:bucket], keys) - |> ExAws.request!(opts[:config_overrides]) - end) - end - - defp run_all(wrapped) do - fun = fn opts -> - try do - wrapped.(opts) - :ok - catch - _kind, _value -> :error - end - end - - backends() - |> Task.async_stream(fun, timeout: :infinity) - |> Enum.any?(fn {_, v} -> v == :error end) - |> case do - true -> - Logger.warning("Failed to operate on all backends") - - _ -> - :ok - end - end - - defp backends do - primary_opts() ++ replica_opts() - end - - defp primary_opts do - [ - %{ - config_overrides: Application.fetch_env!(:philomena, :s3_primary_options), - bucket: Application.fetch_env!(:philomena, :s3_primary_bucket) - } - ] - end - - defp replica_opts do - replica_bucket = Application.get_env(:philomena, :s3_secondary_bucket) - - if not is_nil(replica_bucket) do - [ - %{ - config_overrides: Application.fetch_env!(:philomena, :s3_secondary_options), - bucket: replica_bucket - } - ] - else - [] - end - end -end diff --git a/lib/philomena/processors.ex b/lib/philomena/processors.ex deleted file mode 100644 index 202da1d4..00000000 --- a/lib/philomena/processors.ex +++ /dev/null @@ -1,78 +0,0 @@ -defmodule Philomena.Processors do - @moduledoc """ - Utilities for processing uploads. - - Processors have 3 methods available: - - - process/3: - Takes an analysis, file path, and version list and generates an - "edit script" that represents how to store this file according to the - given version list. See Philomena.Images.Thumbnailer for more - information on how this works. - - - post_process/2: - Takes an analysis and file path and performs optimizations on the - upload. See Philomena.Images.Thumbnailer for more information on how this - works. - - - intensities/2: - Takes an analysis and file path and generates an intensities map - appropriate for use by Philomena.DuplicateReports. - """ - - alias Philomena.Processors.Gif - alias Philomena.Processors.Jpeg - alias Philomena.Processors.Png - alias Philomena.Processors.Svg - alias Philomena.Processors.Webm - - @doc """ - Returns a processor, with the processor being a module capable - of processing this content type, or nil. - """ - @spec processor(String.t()) :: module() | nil - def processor(content_type) - - def processor("image/gif"), do: Gif - def processor("image/jpeg"), do: Jpeg - def processor("image/png"), do: Png - def processor("image/svg+xml"), do: Svg - def processor("video/webm"), do: Webm - def processor(_content_type), do: nil - - @doc """ - Takes a MIME type and version list and generates a list of versions to be - generated (e.g., ["thumb.png"]). List contents differ based on file type. - """ - @spec versions(String.t(), keyword) :: [String.t()] - def versions(mime_type, valid_sizes) do - processor(mime_type).versions(valid_sizes) - end - - @doc """ - Takes an analyzer, file path, and version list and runs the appropriate - processor's process/3. - """ - @spec process(map(), String.t(), keyword) :: map() - def process(analysis, file, versions) do - processor(analysis.mime_type).process(analysis, file, versions) - end - - @doc """ - Takes an analyzer and file path and runs the appropriate processor's - post_process/2. - """ - @spec post_process(map(), String.t()) :: map() - def post_process(analysis, file) do - processor(analysis.mime_type).post_process(analysis, file) - end - - @doc """ - Takes an analyzer and file path and runs the appropriate processor's - intensities/2. - """ - @spec intensities(map(), String.t()) :: map() - def intensities(analysis, file) do - processor(analysis.mime_type).intensities(analysis, file) - end -end diff --git a/lib/philomena/tags/uploader.ex b/lib/philomena/tags/uploader.ex index d6149221..d7ab63d6 100644 --- a/lib/philomena/tags/uploader.ex +++ b/lib/philomena/tags/uploader.ex @@ -4,7 +4,7 @@ defmodule Philomena.Tags.Uploader do """ alias Philomena.Tags.Tag - alias Philomena.Uploader + alias PhilomenaMedia.Uploader def analyze_upload(tag, params) do Uploader.analyze_upload(tag, "image", params["image"], &Tag.image_changeset/2) diff --git a/lib/philomena/uploader.ex b/lib/philomena/uploader.ex deleted file mode 100644 index df982897..00000000 --- a/lib/philomena/uploader.ex +++ /dev/null @@ -1,125 +0,0 @@ -defmodule Philomena.Uploader do - @moduledoc """ - Upload and processing callback logic for image files. - """ - - alias Philomena.Filename - alias Philomena.Analyzers - alias Philomena.Objects - alias Philomena.Sha512 - import Ecto.Changeset - - @doc """ - Performs analysis of the passed Plug.Upload, and invokes a changeset - callback on the model or changeset passed in with attributes set on - the field_name. - """ - @spec analyze_upload(any(), String.t(), Plug.Upload.t(), (any(), map() -> Ecto.Changeset.t())) :: - Ecto.Changeset.t() - def analyze_upload(model_or_changeset, field_name, upload_parameter, changeset_fn) do - with {:ok, analysis} <- Analyzers.analyze(upload_parameter), - analysis <- extra_attributes(analysis, upload_parameter) do - removed = - model_or_changeset - |> change() - |> get_field(field(field_name)) - - attributes = - %{ - "name" => analysis.name, - "width" => analysis.width, - "height" => analysis.height, - "size" => analysis.size, - "format" => analysis.extension, - "mime_type" => analysis.mime_type, - "duration" => analysis.duration, - "aspect_ratio" => analysis.aspect_ratio, - "orig_sha512_hash" => analysis.sha512, - "sha512_hash" => analysis.sha512, - "is_animated" => analysis.animated? - } - |> prefix_attributes(field_name) - |> Map.put(field_name, analysis.new_name) - |> Map.put(upload_key(field_name), upload_parameter.path) - |> Map.put(remove_key(field_name), removed) - - changeset_fn.(model_or_changeset, attributes) - else - {:unsupported_mime, mime} -> - attributes = prefix_attributes(%{"mime_type" => mime}, field_name) - changeset_fn.(model_or_changeset, attributes) - - _error -> - changeset_fn.(model_or_changeset, %{}) - end - end - - @doc """ - Writes the file to permanent storage. This should be the second-to-last step - in the transaction. - """ - @spec persist_upload(any(), String.t(), String.t()) :: any() - def persist_upload(model, file_root, field_name) do - source = Map.get(model, field(upload_key(field_name))) - dest = Map.get(model, field(field_name)) - target = Path.join(file_root, dest) - - persist_file(target, source) - end - - @doc """ - Persist an arbitrary file to storage at the given path with the correct - content type and permissions. - """ - def persist_file(path, file) do - Objects.upload(path, file) - end - - @doc """ - Removes the old file from permanent storage. This should be the last step in - the transaction. - """ - @spec unpersist_old_upload(any(), String.t(), String.t()) :: any() - def unpersist_old_upload(model, file_root, field_name) do - model - |> Map.get(field(remove_key(field_name))) - |> try_remove(file_root) - end - - defp extra_attributes(analysis, %Plug.Upload{path: path, filename: filename}) do - {width, height} = analysis.dimensions - aspect_ratio = aspect_ratio(width, height) - - stat = File.stat!(path) - sha512 = Sha512.file(path) - new_name = Filename.build(analysis.extension) - - analysis - |> Map.put(:size, stat.size) - |> Map.put(:name, filename) - |> Map.put(:width, width) - |> Map.put(:height, height) - |> Map.put(:sha512, sha512) - |> Map.put(:new_name, new_name) - |> Map.put(:aspect_ratio, aspect_ratio) - end - - defp aspect_ratio(_, 0), do: 0.0 - defp aspect_ratio(w, h), do: w / h - - defp try_remove("", _file_root), do: nil - defp try_remove(nil, _file_root), do: nil - - defp try_remove(file, file_root) do - Objects.delete(Path.join(file_root, file)) - end - - defp prefix_attributes(map, prefix), - do: Map.new(map, fn {key, value} -> {"#{prefix}_#{key}", value} end) - - defp upload_key(field_name), do: "uploaded_#{field_name}" - - defp remove_key(field_name), do: "removed_#{field_name}" - - defp field(field_name), do: String.to_existing_atom(field_name) -end diff --git a/lib/philomena/users/uploader.ex b/lib/philomena/users/uploader.ex index 9d8cb1cb..7e6f0ced 100644 --- a/lib/philomena/users/uploader.ex +++ b/lib/philomena/users/uploader.ex @@ -4,7 +4,7 @@ defmodule Philomena.Users.Uploader do """ alias Philomena.Users.User - alias Philomena.Uploader + alias PhilomenaMedia.Uploader def analyze_upload(user, params) do Uploader.analyze_upload(user, "avatar", params["avatar"], &User.avatar_changeset/2) diff --git a/lib/philomena_media/analyzers.ex b/lib/philomena_media/analyzers.ex new file mode 100644 index 00000000..a010916f --- /dev/null +++ b/lib/philomena_media/analyzers.ex @@ -0,0 +1,71 @@ +defmodule PhilomenaMedia.Analyzers do + @moduledoc """ + Utilities for analyzing the format and various attributes of uploaded files. + """ + + alias PhilomenaMedia.Analyzers.{Gif, Jpeg, Png, Svg, Webm} + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Mime + + @doc """ + Returns an `{:ok, analyzer}` tuple, with the analyzer being a module capable + of analyzing this media type, or `:error`. + + The allowed MIME types are: + - `image/gif` + - `image/jpeg` + - `image/png` + - `image/svg+xml` + - `video/webm` + + > #### Info {: .info} + > + > This is an interface intended for use when the MIME type is already known. + > Using an analyzer not matched to the file may cause unexpected results. + + ## Examples + + {:ok, analyzer} = PhilomenaMedia.Analyzers.analyzer("image/png") + :error = PhilomenaMedia.Analyzers.analyzer("application/octet-stream") + + """ + @spec analyzer(Mime.t()) :: {:ok, module()} | :error + def analyzer(content_type) + + def analyzer("image/gif"), do: {:ok, Gif} + def analyzer("image/jpeg"), do: {:ok, Jpeg} + def analyzer("image/png"), do: {:ok, Png} + def analyzer("image/svg+xml"), do: {:ok, Svg} + def analyzer("video/webm"), do: {:ok, Webm} + def analyzer(_content_type), do: :error + + @doc """ + Attempts a MIME type check and analysis on the given path or `m:Plug.Upload`. + + ## Examples + + file = "image_file.png" + {:ok, %Result{...}} = Analyzers.analyze(file) + + file = %Plug.Upload{...} + {:ok, %Result{...}} = Analyzers.analyze(file) + + file = "text_file.txt" + :error = Analyzers.analyze(file) + + """ + @spec analyze(Plug.Upload.t() | Path.t()) :: {:ok, Result.t()} | :error + def analyze(%Plug.Upload{path: path}), do: analyze(path) + + def analyze(path) when is_binary(path) do + with {:ok, mime} <- Mime.file(path), + {:ok, analyzer} <- analyzer(mime) do + {:ok, analyzer.analyze(path)} + else + error -> + error + end + end + + def analyze(_path), do: :error +end diff --git a/lib/philomena_media/analyzers/analyzer.ex b/lib/philomena_media/analyzers/analyzer.ex new file mode 100644 index 00000000..cf3b28ec --- /dev/null +++ b/lib/philomena_media/analyzers/analyzer.ex @@ -0,0 +1,5 @@ +defmodule PhilomenaMedia.Analyzers.Analyzer do + @moduledoc false + + @callback analyze(Path.t()) :: PhilomenaMedia.Analyzers.Result.t() +end diff --git a/lib/philomena/analyzers/gif.ex b/lib/philomena_media/analyzers/gif.ex similarity index 74% rename from lib/philomena/analyzers/gif.ex rename to lib/philomena_media/analyzers/gif.ex index 2253ac04..982d7a31 100644 --- a/lib/philomena/analyzers/gif.ex +++ b/lib/philomena_media/analyzers/gif.ex @@ -1,8 +1,16 @@ -defmodule Philomena.Analyzers.Gif do +defmodule PhilomenaMedia.Analyzers.Gif do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Analyzer + alias PhilomenaMedia.Analyzers.Result + + @behaviour Analyzer + + @spec analyze(Path.t()) :: Result.t() def analyze(file) do stats = stats(file) - %{ + %Result{ extension: "gif", mime_type: "image/gif", animated?: stats.animated?, diff --git a/lib/philomena/analyzers/jpeg.ex b/lib/philomena_media/analyzers/jpeg.ex similarity index 73% rename from lib/philomena/analyzers/jpeg.ex rename to lib/philomena_media/analyzers/jpeg.ex index c73564b7..60b29e04 100644 --- a/lib/philomena/analyzers/jpeg.ex +++ b/lib/philomena_media/analyzers/jpeg.ex @@ -1,8 +1,16 @@ -defmodule Philomena.Analyzers.Jpeg do +defmodule PhilomenaMedia.Analyzers.Jpeg do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Analyzer + alias PhilomenaMedia.Analyzers.Result + + @behaviour Analyzer + + @spec analyze(Path.t()) :: Result.t() def analyze(file) do stats = stats(file) - %{ + %Result{ extension: "jpg", mime_type: "image/jpeg", animated?: false, diff --git a/lib/philomena/analyzers/png.ex b/lib/philomena_media/analyzers/png.ex similarity index 74% rename from lib/philomena/analyzers/png.ex rename to lib/philomena_media/analyzers/png.ex index 30d3bc9b..83cb506f 100644 --- a/lib/philomena/analyzers/png.ex +++ b/lib/philomena_media/analyzers/png.ex @@ -1,8 +1,16 @@ -defmodule Philomena.Analyzers.Png do +defmodule PhilomenaMedia.Analyzers.Png do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Analyzer + alias PhilomenaMedia.Analyzers.Result + + @behaviour Analyzer + + @spec analyze(Path.t()) :: Result.t() def analyze(file) do stats = stats(file) - %{ + %Result{ extension: "png", mime_type: "image/png", animated?: stats.animated?, diff --git a/lib/philomena_media/analyzers/result.ex b/lib/philomena_media/analyzers/result.ex new file mode 100644 index 00000000..57dc77c0 --- /dev/null +++ b/lib/philomena_media/analyzers/result.ex @@ -0,0 +1,36 @@ +defmodule PhilomenaMedia.Analyzers.Result do + @moduledoc """ + The analysis result. + + - `:animated?` - whether the media file is animated + - `:dimensions` - the maximum dimensions of the media file, as `{width, height}` + - `:duration` - the maximum duration of the media file, or 0 if not applicable + - `:extension` - the file extension the media file should take, based on its contents + - `:mime_type` - the MIME type the media file should take, based on its contents + + ## Example + + %Result{ + animated?: false, + dimensions: {800, 600}, + duration: 0.0, + extension: "png", + mime_type: "image/png" + } + + """ + + @type t :: %__MODULE__{ + animated?: boolean(), + dimensions: {integer(), integer()}, + duration: float(), + extension: String.t(), + mime_type: String.t() + } + + defstruct animated?: false, + dimensions: {0, 0}, + duration: 0.0, + extension: "", + mime_type: "application/octet-stream" +end diff --git a/lib/philomena/analyzers/svg.ex b/lib/philomena_media/analyzers/svg.ex similarity index 71% rename from lib/philomena/analyzers/svg.ex rename to lib/philomena_media/analyzers/svg.ex index d76aab4b..f83a55f0 100644 --- a/lib/philomena/analyzers/svg.ex +++ b/lib/philomena_media/analyzers/svg.ex @@ -1,8 +1,16 @@ -defmodule Philomena.Analyzers.Svg do +defmodule PhilomenaMedia.Analyzers.Svg do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Analyzer + alias PhilomenaMedia.Analyzers.Result + + @behaviour Analyzer + + @spec analyze(Path.t()) :: Result.t() def analyze(file) do stats = stats(file) - %{ + %Result{ extension: "svg", mime_type: "image/svg+xml", animated?: false, diff --git a/lib/philomena/analyzers/webm.ex b/lib/philomena_media/analyzers/webm.ex similarity index 74% rename from lib/philomena/analyzers/webm.ex rename to lib/philomena_media/analyzers/webm.ex index 236978b1..b215e01e 100644 --- a/lib/philomena/analyzers/webm.ex +++ b/lib/philomena_media/analyzers/webm.ex @@ -1,8 +1,16 @@ -defmodule Philomena.Analyzers.Webm do +defmodule PhilomenaMedia.Analyzers.Webm do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Analyzer + alias PhilomenaMedia.Analyzers.Result + + @behaviour Analyzer + + @spec analyze(Path.t()) :: Result.t() def analyze(file) do stats = stats(file) - %{ + %Result{ extension: "webm", mime_type: "video/webm", animated?: stats.animated?, diff --git a/lib/philomena_media/filename.ex b/lib/philomena_media/filename.ex new file mode 100644 index 00000000..ba169fe4 --- /dev/null +++ b/lib/philomena_media/filename.ex @@ -0,0 +1,36 @@ +defmodule PhilomenaMedia.Filename do + @moduledoc """ + Utilities for building arbitrary filenames for uploaded files. + """ + + @type extension :: String.t() + + @doc """ + This function builds a replacement "filename key" based on the supplied file extension. + + Names are generated in the form `year/month/day/uuid.ext`. It is recommended to avoid + providing user-controlled file-extensions to this function; select them from a list of + known extensions instead. + + ## Example + + iex> PhilomenaMedia.Filename.build("png") + "2024/1/1/0bce8eea-17e0-11ef-b7d4-0242ac120006.png" + + """ + @spec build(extension()) :: String.t() + def build(extension) do + [ + time_identifier(DateTime.utc_now()), + "/", + UUID.uuid1(), + ".", + extension + ] + |> Enum.join() + end + + defp time_identifier(time) do + Enum.join([time.year, time.month, time.day], "/") + end +end diff --git a/lib/philomena_media/intensities.ex b/lib/philomena_media/intensities.ex new file mode 100644 index 00000000..5abd71e0 --- /dev/null +++ b/lib/philomena_media/intensities.ex @@ -0,0 +1,68 @@ +defmodule PhilomenaMedia.Intensities do + @moduledoc """ + Corner intensities are a simple mechanism for automatic image deduplication, + designed for a time when computer vision was an expensive technology and + resources were scarce. + + Each image is divided into quadrants; image with odd numbers of pixels + on either dimension overlap quadrants by one pixel. The luma (brightness) + value corresponding each the pixel is computed according to BTU.709 primaries, + and its value is added to a sum for each quadrant. Finally, the value is divided + by the number of pixels in the quadrant to produce an average. The minimum luma + value of any pixel is 0, and the maximum is 255, so an average will be between + these values. Transparent pixels are composited on black before processing. + + By using a range search in the database, this produces a reverse image search which + suffers no dimensionality issues, is exceptionally fast to evaluate, and is independent + of image dimensions, with poor precision and a poor-to-fair accuracy. + """ + + @type t :: %__MODULE__{ + nw: float(), + ne: float(), + sw: float(), + se: float() + } + + defstruct nw: 0.0, + ne: 0.0, + sw: 0.0, + se: 0.0 + + @doc """ + Gets the corner intensities of the given image file. + + The image file must be in the PNG or JPEG format. + + > #### Info {: .info} + > + > Clients should prefer to use `m:PhilomenaMedia.Processors.intensities/2`, as it handles + > media files of any type supported by this library, not just PNG or JPEG. + + ## Examples + + iex> Intensities.file("image.png") + {:ok, %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064}} + + iex> Intensities.file("nonexistent.jpg") + :error + + """ + @spec file(Path.t()) :: {:ok, t()} | :error + def file(input) do + System.cmd("image-intensities", [input]) + |> case do + {output, 0} -> + [nw, ne, sw, se] = + output + |> String.trim() + |> String.split("\t") + |> Enum.map(&String.to_float/1) + + {:ok, %__MODULE__{nw: nw, ne: ne, sw: sw, se: se}} + + _error -> + :error + end + end +end diff --git a/lib/philomena_media/mime.ex b/lib/philomena_media/mime.ex new file mode 100644 index 00000000..1b29aa75 --- /dev/null +++ b/lib/philomena_media/mime.ex @@ -0,0 +1,67 @@ +defmodule PhilomenaMedia.Mime do + @moduledoc """ + Utilities for determining the MIME type of a file via parsing. + + Many MIME type libraries assume the MIME type of the file by reading file extensions. + This is inherently unreliable, as many websites disguise the content types of files with + specific names for cost or bandwidth saving reasons. As processing depends on correctly + identifying the type of a file, parsing the file contents is necessary. + """ + + @type t :: String.t() + + @doc """ + Gets the MIME type of the given pathname. + + ## Examples + + iex> PhilomenaMedia.Mime.file("image.png") + {:ok, "image/png"} + + iex> PhilomenaMedia.Mime.file("file.txt") + {:unsupported_mime, "text/plain"} + + iex> PhilomenaMedia.Mime.file("nonexistent.file") + :error + + """ + @spec file(Path.t()) :: {:ok, t()} | {:unsupported_mime, t()} | :error + def file(path) do + System.cmd("file", ["-b", "--mime-type", path]) + |> case do + {output, 0} -> + true_mime(String.trim(output)) + + _error -> + :error + end + end + + @doc """ + Provides the "true" MIME type of this file. + + Some files are identified as a type they should not be based on how they are used by + this library. These MIME types (and their "corrected") versions are: + + - `image/svg` -> `image/svg+xml` + - `audio/webm` -> `video/webm` + + ## Examples + + iex> PhilomenaMedia.Mime.file("image.svg") + "image/svg+xml" + + iex> PhilomenaMedia.Mime.file("audio.webm") + "video/webm" + + """ + @spec true_mime(String.t()) :: {:ok, t()} | {:unsupported_mime, t()} + def true_mime("image/svg"), do: {:ok, "image/svg+xml"} + def true_mime("audio/webm"), do: {:ok, "video/webm"} + + def true_mime(mime) + when mime in ~W(image/gif image/jpeg image/png image/svg+xml video/webm), + do: {:ok, mime} + + def true_mime(mime), do: {:unsupported_mime, mime} +end diff --git a/lib/philomena_media/objects.ex b/lib/philomena_media/objects.ex new file mode 100644 index 00000000..27a92732 --- /dev/null +++ b/lib/philomena_media/objects.ex @@ -0,0 +1,236 @@ +defmodule PhilomenaMedia.Objects do + @moduledoc """ + Replication wrapper for object storage backends. + + While cloud services can be an inexpensive way to access large amounts of storage, they + are inherently less available than local file-based storage. For this reason, it is generally + recommended to maintain a secondary storage provider, such as in the + [3-2-1 backup strategy](https://www.backblaze.com/blog/the-3-2-1-backup-strategy/). + + Functions in this module replicate operations on both the primary and secondary storage + providers. Alternatively, a mode with only a primary storage provider is supported. + + This module assumes storage endpoints are S3-compatible and can be communicated with via the + `m:ExAws` module. This does not preclude the usage of local file-based storage, which can be + accomplished with the [`s3proxy` project](https://github.com/gaul/s3proxy). The development + repository provides an example of `s3proxy` in use. + + Bucket names should be set with configuration on `s3_primary_bucket` and `s3_secondary_bucket`. + If `s3_secondary_bucket` is not set, then only the primary will be used. However, the primary + bucket name must always be set. + + These are read from environment variables at runtime by Philomena. + + # S3/Object store config + config :philomena, :s3_primary_bucket, System.fetch_env!("S3_BUCKET") + config :philomena, :s3_secondary_bucket, System.get_env("ALT_S3_BUCKET") + + Additional options (e.g. controlling the remote endpoint used) may be set with + `s3_primary_options` and `s3_secondary_options` keys. This allows you to use a provider other + than AWS, like [Cloudflare R2](https://developers.cloudflare.com/r2/). + + These are read from environment variables at runtime by Philomena. + + config :philomena, :s3_primary_options, + region: System.get_env("S3_REGION", "us-east-1"), + scheme: System.fetch_env!("S3_SCHEME"), + host: System.fetch_env!("S3_HOST"), + port: System.fetch_env!("S3_PORT"), + access_key_id: System.fetch_env!("AWS_ACCESS_KEY_ID"), + secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY"), + http_opts: [timeout: 180_000, recv_timeout: 180_000] + + """ + alias PhilomenaMedia.Mime + require Logger + + @type key :: String.t() + + @doc """ + Fetch a key from the storage backend and write it into the destination path. + + ## Example + + key = "2024/1/1/5/full.png" + Objects.download_file(key, file_path) + + """ + # sobelow_skip ["Traversal.FileModule"] + @spec download_file(key(), Path.t()) :: :ok + def download_file(key, file_path) do + contents = + backends() + |> Enum.find_value(fn opts -> + ExAws.S3.get_object(opts[:bucket], key) + |> ExAws.request(opts[:config_overrides]) + |> case do + {:ok, result} -> result + _ -> nil + end + end) + + File.write!(file_path, contents.body) + end + + @doc """ + Upload a file using a single API call, writing the contents from the given path to storage. + + ## Example + + key = "2024/1/1/5/full.png" + Objects.put(key, file_path) + + """ + # sobelow_skip ["Traversal.FileModule"] + @spec put(key(), Path.t()) :: :ok + def put(key, file_path) do + {_, mime} = Mime.file(file_path) + contents = File.read!(file_path) + + run_all(fn opts -> + ExAws.S3.put_object(opts[:bucket], key, contents, content_type: mime) + |> ExAws.request!(opts[:config_overrides]) + end) + end + + @doc """ + Upload a file using multiple API calls, writing the contents from the given path to storage. + + ## Example + + key = "2024/1/1/5/full.png" + Objects.upload(key, file_path) + + """ + @spec upload(key(), Path.t()) :: :ok + def upload(key, file_path) do + # Workaround for API rate limit issues on R2 + put(key, file_path) + end + + @doc """ + Copies a key from the source to the destination, overwriting the destination object if its exists. + + > #### Warning {: .warning} + > + > `copy/2` does not use the `PutObjectCopy` S3 request. It downloads the file and uploads it again. + > This may use more disk space than expected if the file is large. + + ## Example + + source_key = "2024/1/1/5/full.png" + dest_key = "2024/1/1/5-a5323e542e0f/full.png" + Objects.copy(source_key, dest_key) + + """ + @spec copy(key(), key()) :: :ok + def copy(source_key, dest_key) do + # Potential workaround for inconsistent PutObjectCopy on R2 + # + # run_all(fn opts-> + # ExAws.S3.put_object_copy(opts[:bucket], dest_key, opts[:bucket], source_key) + # |> ExAws.request!(opts[:config_overrides]) + # end) + + try do + file_path = Briefly.create!() + download_file(source_key, file_path) + upload(dest_key, file_path) + catch + _kind, _value -> Logger.warning("Failed to copy #{source_key} -> #{dest_key}") + end + + :ok + end + + @doc """ + Removes the key from storage. + + ## Example + + key = "2024/1/1/5/full.png" + Objects.delete(key) + + """ + @spec delete(key()) :: :ok + def delete(key) do + run_all(fn opts -> + ExAws.S3.delete_object(opts[:bucket], key) + |> ExAws.request!(opts[:config_overrides]) + end) + end + + @doc """ + Removes all given keys from storage. + + ## Example + + keys = [ + "2024/1/1/5/full.png", + "2024/1/1/5/small.png", + "2024/1/1/5/thumb.png", + "2024/1/1/5/thumb_tiny.png" + ] + Objects.delete_multiple(keys) + + """ + @spec delete_multiple([key()]) :: :ok + def delete_multiple(keys) do + run_all(fn opts -> + ExAws.S3.delete_multiple_objects(opts[:bucket], keys) + |> ExAws.request!(opts[:config_overrides]) + end) + end + + defp run_all(wrapped) do + fun = fn opts -> + try do + wrapped.(opts) + :ok + catch + _kind, _value -> :error + end + end + + backends() + |> Task.async_stream(fun, timeout: :infinity) + |> Enum.any?(fn {_, v} -> v == :error end) + |> case do + true -> + Logger.warning("Failed to operate on all backends") + + _ -> + :ok + end + + :ok + end + + defp backends do + primary_opts() ++ replica_opts() + end + + defp primary_opts do + [ + %{ + config_overrides: Application.fetch_env!(:philomena, :s3_primary_options), + bucket: Application.fetch_env!(:philomena, :s3_primary_bucket) + } + ] + end + + defp replica_opts do + replica_bucket = Application.get_env(:philomena, :s3_secondary_bucket) + + if not is_nil(replica_bucket) do + [ + %{ + config_overrides: Application.fetch_env!(:philomena, :s3_secondary_options), + bucket: replica_bucket + } + ] + else + [] + end + end +end diff --git a/lib/philomena_media/processors.ex b/lib/philomena_media/processors.ex new file mode 100644 index 00000000..23c49dcf --- /dev/null +++ b/lib/philomena_media/processors.ex @@ -0,0 +1,202 @@ +defmodule PhilomenaMedia.Processors do + @moduledoc """ + Utilities for processing uploads. + + Processors have 4 functions available: + + - `versions/1`: + Takes a version list and generates a list of files which the processor will generate + during the scope of `process/3`. + + - `process/3`: + Takes an analysis result, file path, and version list and generates an "edit script" that + represents how to store this file according to the given version list. See + `m:Philomena.Images.Thumbnailer` for a usage example. + + - `post_process/2`: + Takes an analysis result and file path and performs optimizations on the upload. See + `m:Philomena.Images.Thumbnailer` for a usage example. + + - `intensities/2`: + Takes an analysis result and file path and generates corner intensities, performing. + any conversion necessary before processing. See `m:PhilomenaMedia.Intensities` + for more information. + + ## Version lists + + `process/3` and `post_process/2` take _version lists_ as input. A version list is a structure + like the following, which contains pairs of _version names_ and _dimensions_: + + [ + thumb_tiny: {50, 50}, + thumb_small: {150, 150}, + thumb: {250, 250}, + small: {320, 240}, + medium: {800, 600}, + large: {1280, 1024}, + tall: {1024, 4096} + ] + + When calling these functions, it is recommended prefilter the version list based on the media + dimensions to avoid generating unnecessary versions which are larger than the original file. + See `m:Philomena.Images.Thumbnailer` for an example. + + ## Edit scripts + + `process/3` and `post_process/2` return _edit scripts_. An edit script is a list where each + entry may be one of the following: + + {:thumbnails, [copy_requests]} + {:replace_original, path} + {:intensities, intensities} + + Within the thumbnail request, a copy request is defined with the following structure: + + {:copy, path, version_filename} + + See the respective functions for more information about their return values. + """ + + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm} + alias PhilomenaMedia.Mime + + # The name of a version, like :large + @type version_name :: atom() + + @type dimensions :: {integer(), integer()} + @type version_list :: [{version_name(), dimensions()}] + + # The file name of a processed version, like "large.png" + @type version_filename :: String.t() + + # A single file to be copied to satisfy a request for a version name + @type copy_request :: {:copy, Path.t(), version_filename()} + + # A list of thumbnail versions to copy into place + @type thumbnails :: {:thumbnails, [copy_request()]} + + # Replace the original file to strip metadata or losslessly optimize + @type replace_original :: {:replace_original, Path.t()} + + # Apply the computed corner intensities + @type intensities :: {:intensities, Intensities.t()} + + # An edit script, representing the changes to apply to the storage backend + # after successful processing + @type edit_script :: [thumbnails() | replace_original() | intensities()] + + @doc """ + Returns a processor, with the processor being a module capable + of processing this content type, or nil. + + The allowed MIME types are: + - `image/gif` + - `image/jpeg` + - `image/png` + - `image/svg+xml` + - `video/webm` + + > #### Info {: .info} + > + > This is an interface intended for use when the MIME type is already known. + > Using a processor not matched to the file may cause unexpected results. + + ## Examples + + iex> PhilomenaMedia.Processors.processor("image/png") + PhilomenaMedia.Processors.Png + + iex> PhilomenaMedia.Processors.processor("application/octet-stream") + nil + + """ + @spec processor(Mime.t()) :: module() | nil + def processor(content_type) + + def processor("image/gif"), do: Gif + def processor("image/jpeg"), do: Jpeg + def processor("image/png"), do: Png + def processor("image/svg+xml"), do: Svg + def processor("video/webm"), do: Webm + def processor(_content_type), do: nil + + @doc """ + Takes a MIME type and filtered version list and generates a list of version files to be + generated by `process/2`. List contents may differ based on file type. + + ## Examples + + iex> PhilomenaMedia.Processors.versions("image/png", [thumb_tiny: {50, 50}]) + ["thumb_tiny.png"] + + iex> PhilomenaMedia.Processors.versions("video/webm", [thumb_tiny: {50, 50}]) + ["full.mp4", "rendered.png", "thumb_tiny.webm", "thumb_tiny.mp4", "thumb_tiny.gif"] + + """ + @spec versions(Mime.t(), version_list()) :: [version_name()] + def versions(mime_type, valid_sizes) do + processor(mime_type).versions(valid_sizes) + end + + @doc """ + Takes an analyzer result, file path, and version list and runs the appropriate processor's + `process/3`, processing the media. + + Returns an edit script to apply changes. Depending on the media type, this make take a long + time to execute. + + ## Example + + iex> PhilomenaMedia.Processors.process(%Result{...}, "image.png", [thumb_tiny: {50, 50}]) + [ + intensities: %Intensities{...}, + thumbnails: [ + {:copy, "/tmp/briefly-5764/vSHsM3kn7k4yvrvZH.png", "thumb_tiny.png"} + ] + ] + + """ + @spec process(Result.t(), Path.t(), version_list()) :: edit_script() + def process(analysis, file, versions) do + processor(analysis.mime_type).process(analysis, file, versions) + end + + @doc """ + Takes an analyzer result and file path and runs the appropriate processor's `post_process/2`, + performing long-running optimizations on the media source file. + + Returns an edit script to apply changes. Depending on the media type, this make take a long + time to execute. This may also be an empty list, if there are no changes to perform. + + ## Example + + iex> PhilomenaMedia.Processors.post_process(%Result{...}, "image.gif", [thumb_tiny: {50, 50}]) + [replace_original: "/tmp/briefly-5764/cyZSQnmL59XDRoPoaDxr.gif"] + + """ + @spec post_process(Result.t(), Path.t()) :: edit_script() + def post_process(analysis, file) do + processor(analysis.mime_type).post_process(analysis, file) + end + + @doc """ + Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`, + returning the corner intensities. + + This allows for generating intensities for file types that are not directly supported by + `m:PhilomenaMedia.Intensities`, and should be the preferred function to call when intensities + are needed. + + ## Example + + iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm") + %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064} + + """ + @spec intensities(Result.t(), Path.t()) :: Intensities.t() + def intensities(analysis, file) do + processor(analysis.mime_type).intensities(analysis, file) + end +end diff --git a/lib/philomena/processors/gif.ex b/lib/philomena_media/processors/gif.ex similarity index 83% rename from lib/philomena/processors/gif.ex rename to lib/philomena_media/processors/gif.ex index 8b7557e5..6e185f9f 100644 --- a/lib/philomena/processors/gif.ex +++ b/lib/philomena_media/processors/gif.ex @@ -1,12 +1,21 @@ -defmodule Philomena.Processors.Gif do - alias Philomena.Intensities +defmodule PhilomenaMedia.Processors.Gif do + @moduledoc false + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors.Processor + alias PhilomenaMedia.Processors + + @behaviour Processor + + @spec versions(Processors.version_list()) :: [Processors.version_filename()] def versions(sizes) do sizes |> Enum.map(fn {name, _} -> "#{name}.gif" end) |> Kernel.++(["full.webm", "full.mp4", "rendered.png"]) end + @spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() def process(analysis, file, versions) do duration = analysis.duration preview = preview(duration, file) @@ -17,16 +26,18 @@ defmodule Philomena.Processors.Gif do scaled = Enum.flat_map(versions, &scale(palette, file, &1)) videos = generate_videos(file) - %{ + [ intensities: intensities, thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}] - } + ] end + @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, file) do - %{replace_original: optimize(file)} + [replace_original: optimize(file)] end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) intensities diff --git a/lib/philomena/processors/jpeg.ex b/lib/philomena_media/processors/jpeg.ex similarity index 79% rename from lib/philomena/processors/jpeg.ex rename to lib/philomena_media/processors/jpeg.ex index 24cf65e1..6e9f728e 100644 --- a/lib/philomena/processors/jpeg.ex +++ b/lib/philomena_media/processors/jpeg.ex @@ -1,10 +1,19 @@ -defmodule Philomena.Processors.Jpeg do - alias Philomena.Intensities +defmodule PhilomenaMedia.Processors.Jpeg do + @moduledoc false + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors.Processor + alias PhilomenaMedia.Processors + + @behaviour Processor + + @spec versions(Processors.version_list()) :: [Processors.version_filename()] def versions(sizes) do Enum.map(sizes, fn {name, _} -> "#{name}.jpg" end) end + @spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() def process(_analysis, file, versions) do stripped = optimize(strip(file)) @@ -12,15 +21,17 @@ defmodule Philomena.Processors.Jpeg do scaled = Enum.flat_map(versions, &scale(stripped, &1)) - %{ + [ replace_original: stripped, intensities: intensities, thumbnails: scaled - } + ] end - def post_process(_analysis, _file), do: %{} + @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() + def post_process(_analysis, _file), do: [] + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) intensities diff --git a/lib/philomena/processors/png.ex b/lib/philomena_media/processors/png.ex similarity index 74% rename from lib/philomena/processors/png.ex rename to lib/philomena_media/processors/png.ex index 373ede0d..0fc4c50d 100644 --- a/lib/philomena/processors/png.ex +++ b/lib/philomena_media/processors/png.ex @@ -1,10 +1,19 @@ -defmodule Philomena.Processors.Png do - alias Philomena.Intensities +defmodule PhilomenaMedia.Processors.Png do + @moduledoc false + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors.Processor + alias PhilomenaMedia.Processors + + @behaviour Processor + + @spec versions(Processors.version_list()) :: [Processors.version_filename()] def versions(sizes) do Enum.map(sizes, fn {name, _} -> "#{name}.png" end) end + @spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() def process(analysis, file, versions) do animated? = analysis.animated? @@ -12,21 +21,23 @@ defmodule Philomena.Processors.Png do scaled = Enum.flat_map(versions, &scale(file, animated?, &1)) - %{ + [ intensities: intensities, thumbnails: scaled - } + ] end + @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(analysis, file) do if analysis.animated? do # libpng has trouble with animations, so skip optimization - %{} + [] else - %{replace_original: optimize(file)} + [replace_original: optimize(file)] end end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) intensities diff --git a/lib/philomena_media/processors/processor.ex b/lib/philomena_media/processors/processor.ex new file mode 100644 index 00000000..2c3acc0b --- /dev/null +++ b/lib/philomena_media/processors/processor.ex @@ -0,0 +1,21 @@ +defmodule PhilomenaMedia.Processors.Processor do + @moduledoc false + + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors + alias PhilomenaMedia.Intensities + + # Generate a list of version filenames for the given version list. + @callback versions(Processors.version_list()) :: [Processors.version_filename()] + + # Process the media at the given path against the given version list, and return an + # edit script with the resulting files + @callback process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() + + # Perform post-processing optimization tasks on the file, to reduce its size + # and strip non-essential metadata + @callback post_process(Result.t(), Path.t()) :: Processors.edit_script() + + # Generate corner intensities for the given path + @callback intensities(Result.t(), Path.t()) :: Intensities.t() +end diff --git a/lib/philomena/processors/svg.ex b/lib/philomena_media/processors/svg.ex similarity index 66% rename from lib/philomena/processors/svg.ex rename to lib/philomena_media/processors/svg.ex index 7f45b893..aaa3dd5c 100644 --- a/lib/philomena/processors/svg.ex +++ b/lib/philomena_media/processors/svg.ex @@ -1,12 +1,21 @@ -defmodule Philomena.Processors.Svg do - alias Philomena.Intensities +defmodule PhilomenaMedia.Processors.Svg do + @moduledoc false + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors.Processor + alias PhilomenaMedia.Processors + + @behaviour Processor + + @spec versions(Processors.version_list()) :: [Processors.version_filename()] def versions(sizes) do sizes |> Enum.map(fn {name, _} -> "#{name}.png" end) |> Kernel.++(["rendered.png", "full.png"]) end + @spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() def process(_analysis, file, versions) do preview = preview(file) @@ -15,14 +24,16 @@ defmodule Philomena.Processors.Svg do scaled = Enum.flat_map(versions, &scale(preview, &1)) full = [{:copy, preview, "full.png"}] - %{ + [ intensities: intensities, thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}] - } + ] end - def post_process(_analysis, _file), do: %{} + @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() + def post_process(_analysis, _file), do: [] + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(preview(file)) intensities diff --git a/lib/philomena/processors/webm.ex b/lib/philomena_media/processors/webm.ex similarity index 90% rename from lib/philomena/processors/webm.ex rename to lib/philomena_media/processors/webm.ex index 8459ef8b..c86e1969 100644 --- a/lib/philomena/processors/webm.ex +++ b/lib/philomena_media/processors/webm.ex @@ -1,7 +1,15 @@ -defmodule Philomena.Processors.Webm do - alias Philomena.Intensities +defmodule PhilomenaMedia.Processors.Webm do + @moduledoc false + + alias PhilomenaMedia.Intensities + alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Processors.Processor + alias PhilomenaMedia.Processors import Bitwise + @behaviour Processor + + @spec versions(Processors.version_list()) :: [Processors.version_filename()] def versions(sizes) do webm_versions = Enum.map(sizes, fn {name, _} -> "#{name}.webm" end) mp4_versions = Enum.map(sizes, fn {name, _} -> "#{name}.mp4" end) @@ -14,6 +22,7 @@ defmodule Philomena.Processors.Webm do ["full.mp4", "rendered.png"] ++ webm_versions ++ mp4_versions ++ gif_versions end + @spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script() def process(analysis, file, versions) do dimensions = analysis.dimensions duration = analysis.duration @@ -27,15 +36,17 @@ defmodule Philomena.Processors.Webm do scaled = Enum.flat_map(versions, &scale(stripped, palette, duration, dimensions, &1)) mp4 = [{:copy, mp4, "full.mp4"}] - %{ + [ replace_original: stripped, intensities: intensities, thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}] - } + ] end - def post_process(_analysis, _file), do: %{} + @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() + def post_process(_analysis, _file), do: [] + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) intensities diff --git a/lib/philomena/sha512.ex b/lib/philomena_media/sha512.ex similarity index 63% rename from lib/philomena/sha512.ex rename to lib/philomena_media/sha512.ex index 03c9645e..ba12cfed 100644 --- a/lib/philomena/sha512.ex +++ b/lib/philomena_media/sha512.ex @@ -1,6 +1,21 @@ -defmodule Philomena.Sha512 do +defmodule PhilomenaMedia.Sha512 do + @moduledoc """ + Streaming SHA-512 processor. + """ + @chunk_size 10_485_760 + @doc """ + Generate the SHA2-512 hash of the file at the given path as a string. + + The file is processed in 10MiB chunks. + + ## Example + + iex> Sha512.file("image.png") + "97fd5243cd39e225f1478097acae71fbbff7f3027b24f0e6a8e06a0d7d3e6861cd05691d7470c76e7dfc4eb30459a906918d5ba0d144184fff02b8e34bd9ecf8" + + """ @spec file(Path.t()) :: String.t() def file(path) do hash_ref = :crypto.hash_init(:sha512) diff --git a/lib/philomena_media/uploader.ex b/lib/philomena_media/uploader.ex new file mode 100644 index 00000000..3df61945 --- /dev/null +++ b/lib/philomena_media/uploader.ex @@ -0,0 +1,360 @@ +defmodule PhilomenaMedia.Uploader do + @moduledoc """ + Upload and processing callback logic for media files. + + To use the uploader, the target schema must be modified to add at least the + following fields, assuming the name of the field to write to the database is `foo`: + + field :foo, :string + field :uploaded_foo, :string, virtual: true + field :removed_foo, :string, virtual: true + + The schema should also define a changeset function which casts the file parameters. This may be + the default changeset function, or a function specialized to accept only the file parameters. A + minimal schema must cast at least the following to successfully upload and replace files: + + def foo_changeset(schema, attrs) do + cast(schema, attrs, [:foo, :uploaded_foo, :removed_foo]) + end + + Additional fields may be added to perform validations. For example, specifying a field name + `foo_mime_type` allows the creation of a MIME type filter in the changeset: + + def foo_changeset(schema, attrs) do + schema + |> cast(attrs, [:foo, :foo_mime_type, :uploaded_foo, :removed_foo]) + |> validate_required([:foo, :foo_mime_type]) + |> validate_inclusion(:foo_mime_type, ["image/svg+xml"]) + end + + See `analyze_upload/4` for more information about what fields may be validated in this + fashion. + + Generally, you should expect to create a `Schemas.Uploader` module, which defines functions as + follows, pointing to `m:PhilomenaMedia.Uploader`. Assuming the target field name is `"foo"`, then: + + defmodule Philomena.Schemas.Uploader do + alias Philomena.Schemas.Schema + alias PhilomenaMedia.Uploader + + @field_name "foo" + + def analyze_upload(schema, params) do + Uploader.analyze_upload(schema, @field_name, params[@field_name], &Schema.foo_changeset/2) + end + + def persist_upload(schema) do + Uploader.persist_upload(schema, schema_file_root(), @field_name) + end + + def unpersist_old_upload(schema) do + Uploader.unpersist_old_upload(schema, schema_file_root(), @field_name) + end + + defp schema_file_root do + Application.get_env(:philomena, :schema_file_root) + end + end + + A typical context usage may then look like: + + alias Philomena.Schemas.Schema + alias Philomena.Schemas.Uploader + + @spec create_schema(map()) :: {:ok, Schema.t()} | {:error, Ecto.Changeset.t()} + def create_schema(attrs) do + %Schema{} + |> Uploader.analyze_upload(attrs) + |> Repo.insert() + |> case do + {:ok, schema} -> + Uploader.persist_upload(schema) + + {:ok, schema} + + error -> + error + end + end + + @spec update_schema(Schema.t(), map()) :: {:ok, Schema.t()} | {:error, Ecto.Changeset.t()} + def update_schema(%Schema{} = schema, attrs) do + schema + |> Uploader.analyze_upload(attrs) + |> Repo.update() + |> case do + {:ok, schema} -> + Uploader.persist_upload(schema) + Uploader.unpersist_old_upload(schema) + + {:ok, schema} + + error -> + error + end + end + + This forwards to the core `m:PhilomenaMedia.Uploader` logic with information about the file root. + + The file root is the location at which files of the given schema type are located under + the storage path. For example, the file root for the Adverts schema may be + `/srv/philomena/priv/s3/philomena/adverts` in development with the file backend, + and just `adverts` in production with the S3 backend. + + It is not recommended to perform persist or unpersist operations in the scope of an `m:Ecto.Multi`, + as they may block indefinitely. + """ + + alias PhilomenaMedia.Analyzers + alias PhilomenaMedia.Filename + alias PhilomenaMedia.Objects + alias PhilomenaMedia.Sha512 + import Ecto.Changeset + + @type schema :: struct() + @type schema_or_changeset :: struct() | Ecto.Changeset.t() + + @type field_name :: String.t() + @type file_root :: String.t() + + @doc """ + Performs analysis of the specified `m:Plug.Upload`, and invokes a changeset callback on the schema + or changeset passed in. + + The file name which will be written to is set by the assignment to the schema's `field_name`, and + the below attributes are prefixed by the `field_name`. + + Assuming the file is successfully parsed, this will attempt to cast the following + attributes into the specified changeset function: + * `name` (String) - the name of the file + * `width` (integer) - the width of the file + * `height` (integer) - the height of the file + * `size` (integer) - the size of the file, in bytes + * `format` (String) - the file extension, one of `~w(gif jpg png svg webm)`, determined by reading the file + * `mime_type` (String) - the file's sniffed MIME type, determined by reading the file + * `duration` (float) - the duration of the media file + * `aspect_ratio` (float) - width divided by height. + * `orig_sha512_hash` (String) - the SHA-512 hash of the file + * `sha512_hash` (String) - the SHA-512 hash of the file + * `is_animated` (boolean) - whether the file contains animation + + You may design your changeset callback to accept any of these. Here is an example which accepts + all of them: + + def foo_changeset(schema, attrs) + cast(schema, attrs, [ + :foo, + :foo_name, + :foo_width, + :foo_height, + :foo_size, + :foo_format, + :foo_mime_type, + :foo_duration, + :foo_aspect_ratio, + :foo_orig_sha512_hash, + :foo_sha512_hash, + :foo_is_animated, + :uploaded_foo, + :removed_foo + ]) + end + + Attributes are prefixed, so assuming a `field_name` of `"foo"`, this would result in + the changeset function receiving attributes `"foo_name"`, `"foo_width"`, ... etc. + + Validations on the uploaded media are also possible in the changeset callback. For example, + `m:Philomena.Adverts.Advert` performs validations on MIME type and width of its field, named + `image`: + + def image_changeset(advert, attrs) do + advert + |> cast(attrs, [ + :image, + :image_mime_type, + :image_size, + :image_width, + :image_height, + :uploaded_image, + :removed_image + ]) + |> validate_required([:image]) + |> validate_inclusion(:image_mime_type, ["image/png", "image/jpeg", "image/gif"]) + |> validate_inclusion(:image_width, 699..729) + end + + The key (location to write the persisted file) is passed with the `field_name` attribute into the + changeset callback. The key is calculated using the current date, a UUID, and the computed + extension. A file uploaded may therefore be given a key such as + `2024/1/1/0bce8eea-17e0-11ef-b7d4-0242ac120006.png`. See `PhilomenaMedia.Filename.build/1` for + the actual construction. + + This function does not persist an upload to storage. + + See the module documentation for a complete example. + + ## Example + + @spec analyze_upload(Uploader.schema_or_changeset(), map()) :: Ecto.Changeset.t() + def analyze_upload(schema, params) do + Uploader.analyze_upload(schema, "foo", params["foo"], &Schema.foo_changeset/2) + end + + """ + @spec analyze_upload( + schema_or_changeset(), + field_name(), + Plug.Upload.t(), + (schema_or_changeset(), map() -> Ecto.Changeset.t()) + ) :: Ecto.Changeset.t() + def analyze_upload(schema_or_changeset, field_name, upload_parameter, changeset_fn) do + with {:ok, analysis} <- Analyzers.analyze(upload_parameter), + analysis <- extra_attributes(analysis, upload_parameter) do + removed = + schema_or_changeset + |> change() + |> get_field(field(field_name)) + + attributes = + %{ + "name" => analysis.name, + "width" => analysis.width, + "height" => analysis.height, + "size" => analysis.size, + "format" => analysis.extension, + "mime_type" => analysis.mime_type, + "duration" => analysis.duration, + "aspect_ratio" => analysis.aspect_ratio, + "orig_sha512_hash" => analysis.sha512, + "sha512_hash" => analysis.sha512, + "is_animated" => analysis.animated? + } + |> prefix_attributes(field_name) + |> Map.put(field_name, analysis.new_name) + |> Map.put(upload_key(field_name), upload_parameter.path) + |> Map.put(remove_key(field_name), removed) + + changeset_fn.(schema_or_changeset, attributes) + else + {:unsupported_mime, mime} -> + attributes = prefix_attributes(%{"mime_type" => mime}, field_name) + changeset_fn.(schema_or_changeset, attributes) + + _error -> + changeset_fn.(schema_or_changeset, %{}) + end + end + + @doc """ + Writes the file to permanent storage. This should be the second-to-last step + before completing a file operation. + + The key (location to write the persisted file) is fetched from the schema by `field_name`. + This is then prefixed with the `file_root` specified by the caller. Finally, the file is + written to storage. + + See the module documentation for a complete example. + + ## Example + + @spec persist_upload(Schema.t()) :: :ok + def persist_upload(schema) do + Uploader.persist_upload(schema, schema_file_root(), "foo") + end + + """ + @spec persist_upload(schema(), file_root(), field_name()) :: :ok + def persist_upload(schema, file_root, field_name) do + source = Map.get(schema, field(upload_key(field_name))) + dest = Map.get(schema, field(field_name)) + target = Path.join(file_root, dest) + + persist_file(target, source) + end + + @doc """ + Persist an arbitrary file to storage with the given key. + + > #### Warning {: .warning} + > + > This is exposed for schemas which do not store their files at at an offset from a file root, + > to allow overriding the key. If you do not need to override the key, use + > `persist_upload/3` instead. + + The key (location to write the persisted file) and the file path to upload are passed through + to `PhilomenaMedia.Objects.upload/2` without modification. See the definition of that function for + additional details. + + ## Example + + key = "2024/1/1/5/full.png" + Uploader.persist_file(key, file_path) + + """ + @spec persist_file(Objects.key(), Path.t()) :: :ok + def persist_file(key, file_path) do + Objects.upload(key, file_path) + end + + @doc """ + Removes the old file from permanent storage. This should be the last step in + completing a file operation. + + The key (location to write the persisted file) is fetched from the schema by `field_name`. + This is then prefixed with the `file_root` specified by the caller. Finally, the file is + purged from storage. + + See the module documentation for a complete example. + + ## Example + + @spec unpersist_old_upload(Schema.t()) :: :ok + def unpersist_old_upload(schema) do + Uploader.unpersist_old_upload(schema, schema_file_root(), "foo") + end + + """ + @spec unpersist_old_upload(schema(), file_root(), field_name()) :: :ok + def unpersist_old_upload(schema, file_root, field_name) do + schema + |> Map.get(field(remove_key(field_name))) + |> try_remove(file_root) + end + + defp extra_attributes(analysis, %Plug.Upload{path: path, filename: filename}) do + {width, height} = analysis.dimensions + aspect_ratio = aspect_ratio(width, height) + + stat = File.stat!(path) + sha512 = Sha512.file(path) + new_name = Filename.build(analysis.extension) + + analysis + |> Map.put(:size, stat.size) + |> Map.put(:name, filename) + |> Map.put(:width, width) + |> Map.put(:height, height) + |> Map.put(:sha512, sha512) + |> Map.put(:new_name, new_name) + |> Map.put(:aspect_ratio, aspect_ratio) + end + + defp aspect_ratio(_, 0), do: 0.0 + defp aspect_ratio(w, h), do: w / h + + defp try_remove("", _file_root), do: :ok + defp try_remove(nil, _file_root), do: :ok + + defp try_remove(file, file_root) do + Objects.delete(Path.join(file_root, file)) + end + + defp prefix_attributes(map, prefix), + do: Map.new(map, fn {key, value} -> {"#{prefix}_#{key}", value} end) + + defp upload_key(field_name), do: "uploaded_#{field_name}" + + defp remove_key(field_name), do: "removed_#{field_name}" + + defp field(field_name), do: String.to_existing_atom(field_name) +end diff --git a/lib/philomena_web/image_reverse.ex b/lib/philomena_web/image_reverse.ex index 4fa5f459..161ebad3 100644 --- a/lib/philomena_web/image_reverse.ex +++ b/lib/philomena_web/image_reverse.ex @@ -1,6 +1,6 @@ defmodule PhilomenaWeb.ImageReverse do - alias Philomena.Analyzers - alias Philomena.Processors + alias PhilomenaMedia.Analyzers + alias PhilomenaMedia.Processors alias Philomena.DuplicateReports alias Philomena.Repo import Ecto.Query