mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-02-07 23:06:42 +01:00
Add feature extraction and importing pipeline to Philomena
This commit is contained in:
parent
2bd7ddf9d1
commit
12d3809d37
18 changed files with 517 additions and 5 deletions
91
lib/philomena/image_vectors.ex
Normal file
91
lib/philomena/image_vectors.ex
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
defmodule Philomena.ImageVectors do
|
||||||
|
@moduledoc """
|
||||||
|
The ImageVectors context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import Ecto.Query, warn: false
|
||||||
|
alias Philomena.Repo
|
||||||
|
|
||||||
|
alias Philomena.ImageVectors.ImageVector
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Gets a single image_vector.
|
||||||
|
|
||||||
|
Raises `Ecto.NoResultsError` if the Image vector does not exist.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> get_image_vector!(123)
|
||||||
|
%ImageVector{}
|
||||||
|
|
||||||
|
iex> get_image_vector!(456)
|
||||||
|
** (Ecto.NoResultsError)
|
||||||
|
|
||||||
|
"""
|
||||||
|
def get_image_vector!(id), do: Repo.get!(ImageVector, id)
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Creates a image_vector.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> create_image_vector(%{field: value})
|
||||||
|
{:ok, %ImageVector{}}
|
||||||
|
|
||||||
|
iex> create_image_vector(%{field: bad_value})
|
||||||
|
{:error, %Ecto.Changeset{}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def create_image_vector(image, attrs \\ %PhilomenaMedia.Features{}) do
|
||||||
|
%ImageVector{image_id: image.id}
|
||||||
|
|> ImageVector.changeset(Map.from_struct(attrs))
|
||||||
|
|> Repo.insert()
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Updates a image_vector.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> update_image_vector(image_vector, %{field: new_value})
|
||||||
|
{:ok, %ImageVector{}}
|
||||||
|
|
||||||
|
iex> update_image_vector(image_vector, %{field: bad_value})
|
||||||
|
{:error, %Ecto.Changeset{}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def update_image_vector(%ImageVector{} = image_vector, attrs) do
|
||||||
|
image_vector
|
||||||
|
|> ImageVector.changeset(attrs)
|
||||||
|
|> Repo.update()
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Deletes a image_vector.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> delete_image_vector(image_vector)
|
||||||
|
{:ok, %ImageVector{}}
|
||||||
|
|
||||||
|
iex> delete_image_vector(image_vector)
|
||||||
|
{:error, %Ecto.Changeset{}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def delete_image_vector(%ImageVector{} = image_vector) do
|
||||||
|
Repo.delete(image_vector)
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Returns an `%Ecto.Changeset{}` for tracking image_vector changes.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> change_image_vector(image_vector)
|
||||||
|
%Ecto.Changeset{data: %ImageVector{}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def change_image_vector(%ImageVector{} = image_vector, attrs \\ %{}) do
|
||||||
|
ImageVector.changeset(image_vector, attrs)
|
||||||
|
end
|
||||||
|
end
|
88
lib/philomena/image_vectors/batch_processor.ex
Normal file
88
lib/philomena/image_vectors/batch_processor.ex
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
defmodule Philomena.ImageVectors.BatchProcessor do
|
||||||
|
@moduledoc """
|
||||||
|
Batch processing interface for Philomena. See the module documentation
|
||||||
|
in `m:Philomena.ImageVectors.Importer` for more information about how to
|
||||||
|
use the functions in this module during maintenance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
alias Philomena.Images
|
||||||
|
alias Philomena.Images.Image
|
||||||
|
alias Philomena.Images.Thumbnailer
|
||||||
|
alias Philomena.ImageVectors.ImageVector
|
||||||
|
alias Philomena.Maintenance
|
||||||
|
alias Philomena.Repo
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Analyzers
|
||||||
|
alias PhilomenaMedia.Processors
|
||||||
|
alias PhilomenaQuery.Batch
|
||||||
|
alias PhilomenaQuery.Search
|
||||||
|
|
||||||
|
alias Philomena.Repo
|
||||||
|
import Ecto.Query
|
||||||
|
|
||||||
|
@spec all_missing(String.t(), Keyword.t()) :: :ok
|
||||||
|
def all_missing(type \\ "full", opts \\ []) do
|
||||||
|
Image
|
||||||
|
|> from(as: :image)
|
||||||
|
|> where(not exists(where(ImageVector, [iv], iv.image_id == parent_as(:image).id)))
|
||||||
|
|> by_image_query(type, opts)
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec by_image_query(Ecto.Query.t(), String.t(), Keyword.t()) :: :ok
|
||||||
|
defp by_image_query(query, type, opts) do
|
||||||
|
max_concurrency = Keyword.get(opts, :max_concurrency, 4)
|
||||||
|
min = Repo.one(limit(order_by(query, asc: :id), 1)).id
|
||||||
|
max = Repo.one(limit(order_by(query, desc: :id), 1)).id
|
||||||
|
|
||||||
|
query
|
||||||
|
|> Batch.query_batches(opts)
|
||||||
|
|> Task.async_stream(
|
||||||
|
fn query -> process_query(query, type, opts) end,
|
||||||
|
timeout: :infinity,
|
||||||
|
max_concurrency: max_concurrency
|
||||||
|
)
|
||||||
|
|> Maintenance.log_progress("BatchProcessor/#{type}", min, max)
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec process_query(Ecto.Query.t(), String.t(), Keyword.t()) ::
|
||||||
|
Enumerable.t({:ok, integer()})
|
||||||
|
defp process_query(query, type, batch_opts) do
|
||||||
|
images = Repo.all(query)
|
||||||
|
last_id = Enum.max_by(images, & &1.id).id
|
||||||
|
|
||||||
|
values =
|
||||||
|
Enum.flat_map(images, fn image ->
|
||||||
|
try do
|
||||||
|
[process_image(image, type)]
|
||||||
|
rescue
|
||||||
|
ex ->
|
||||||
|
IO.puts("While processing #{image.id}: #{inspect(ex)}")
|
||||||
|
IO.puts(Exception.format_stacktrace(__STACKTRACE__))
|
||||||
|
[]
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
{_count, nil} = Repo.insert_all(ImageVector, values, on_conflict: :nothing)
|
||||||
|
|
||||||
|
:ok =
|
||||||
|
query
|
||||||
|
|> preload(^Images.indexing_preloads())
|
||||||
|
|> Search.reindex(Image, batch_opts)
|
||||||
|
|
||||||
|
last_id
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec process_image(%Image{}, String.t()) :: map()
|
||||||
|
defp process_image(image = %Image{}, type) do
|
||||||
|
file = Thumbnailer.download_image_file(image)
|
||||||
|
|
||||||
|
{:ok, analysis} = Analyzers.analyze_path(file)
|
||||||
|
features = Processors.features(analysis, file)
|
||||||
|
|
||||||
|
%{
|
||||||
|
image_id: image.id,
|
||||||
|
type: type,
|
||||||
|
features: features.features
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
19
lib/philomena/image_vectors/image_vector.ex
Normal file
19
lib/philomena/image_vectors/image_vector.ex
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
defmodule Philomena.ImageVectors.ImageVector do
|
||||||
|
use Ecto.Schema
|
||||||
|
import Ecto.Changeset
|
||||||
|
|
||||||
|
alias Philomena.Images.Image
|
||||||
|
|
||||||
|
schema "image_vectors" do
|
||||||
|
belongs_to :image, Image
|
||||||
|
field :type, :string
|
||||||
|
field :features, {:array, :float}
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc false
|
||||||
|
def changeset(image_vector, attrs) do
|
||||||
|
image_vector
|
||||||
|
|> cast(attrs, [:type, :features])
|
||||||
|
|> validate_required([:type, :features])
|
||||||
|
end
|
||||||
|
end
|
86
lib/philomena/image_vectors/importer.ex
Normal file
86
lib/philomena/image_vectors/importer.ex
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
defmodule Philomena.ImageVectors.Importer do
|
||||||
|
@moduledoc """
|
||||||
|
Import logic for binary files produced by the export function of
|
||||||
|
https://github.com/philomena-dev/philomena-ris-inference-toolkit.
|
||||||
|
|
||||||
|
Run the following commands in a long-running terminal, like screen or tmux.
|
||||||
|
The workflow for using the importer is as follows:
|
||||||
|
|
||||||
|
1. Use the batch inference toolkit to get the `features.bin`.
|
||||||
|
2. Run `philomena eval 'Philomena.ImageVectors.Importer.import_from("/path/to/features.bin")'`.
|
||||||
|
3. Backfill the remaining images:
|
||||||
|
`philomena eval 'Philomena.ImageVectors.BatchProcessor.all_missing("full", batch_size: 32)'`
|
||||||
|
4. Downtime, delete and recreate the images index:
|
||||||
|
`philomena eval 'Philomena.SearchIndexer.recreate_reindex_schema_destructive!(Philomena.Images.Image)'`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
alias Philomena.ImageVectors.ImageVector
|
||||||
|
alias Philomena.Maintenance
|
||||||
|
alias Philomena.Repo
|
||||||
|
|
||||||
|
# 4 bytes unsigned id + 768 floats per feature vector * 4 bytes per float
|
||||||
|
@row_size 4 + 768 * 4
|
||||||
|
|
||||||
|
@typedoc "A single feature row."
|
||||||
|
@type row :: %{
|
||||||
|
image_id: integer(),
|
||||||
|
type: String.t(),
|
||||||
|
features: [float()]
|
||||||
|
}
|
||||||
|
|
||||||
|
@spec import_from(Path.t()) :: :ok
|
||||||
|
def import_from(batch_inference_file, type \\ "full", max_concurrency \\ 4) do
|
||||||
|
{min, max} = get_min_and_max_id(batch_inference_file, type)
|
||||||
|
|
||||||
|
batch_inference_file
|
||||||
|
|> File.stream!(@row_size)
|
||||||
|
|> Stream.chunk_every(1024)
|
||||||
|
|> Task.async_stream(
|
||||||
|
&process_chunk(&1, type),
|
||||||
|
timeout: :infinity,
|
||||||
|
max_concurrency: max_concurrency
|
||||||
|
)
|
||||||
|
|> Maintenance.log_progress("Importer/#{type}", min, max)
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec process_chunk([binary()], String.t()) :: :ok
|
||||||
|
defp process_chunk(chunk, type) do
|
||||||
|
data = Enum.map(chunk, &unpack(&1, type))
|
||||||
|
last_id = Enum.max_by(data, & &1.image_id).image_id
|
||||||
|
|
||||||
|
{_count, nil} = Repo.insert_all(ImageVector, data, on_conflict: :nothing)
|
||||||
|
|
||||||
|
last_id
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec unpack(binary(), String.t()) :: row()
|
||||||
|
defp unpack(row, type) do
|
||||||
|
<<image_id::little-unsigned-integer-size(32), rest::binary-size(3072)>> = row
|
||||||
|
features = for <<v::little-float-size(32) <- rest>>, do: v
|
||||||
|
|
||||||
|
%{
|
||||||
|
image_id: image_id,
|
||||||
|
type: type,
|
||||||
|
features: features
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_min_and_max_id(Path.t(), String.t()) :: {integer(), integer()}
|
||||||
|
defp get_min_and_max_id(path, type) do
|
||||||
|
stat = File.stat!(path)
|
||||||
|
last_row = stat.size - @row_size
|
||||||
|
|
||||||
|
%{image_id: min} = get_single_row(path, 0, type)
|
||||||
|
%{image_id: max} = get_single_row(path, last_row, type)
|
||||||
|
|
||||||
|
{min, max}
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_single_row(Path.t(), integer(), String.t()) :: row()
|
||||||
|
defp get_single_row(path, offset, type) do
|
||||||
|
path
|
||||||
|
|> File.stream!(@row_size, read_offset: offset)
|
||||||
|
|> Enum.at(0)
|
||||||
|
|> unpack(type)
|
||||||
|
end
|
||||||
|
end
|
|
@ -858,6 +858,7 @@ defmodule Philomena.Images do
|
||||||
|
|
||||||
[
|
[
|
||||||
:gallery_interactions,
|
:gallery_interactions,
|
||||||
|
:vectors,
|
||||||
sources: sources_query,
|
sources: sources_query,
|
||||||
user: user_query,
|
user: user_query,
|
||||||
favers: user_query,
|
favers: user_query,
|
||||||
|
|
|
@ -7,6 +7,7 @@ defmodule Philomena.Images.Image do
|
||||||
alias Philomena.ImageVotes.ImageVote
|
alias Philomena.ImageVotes.ImageVote
|
||||||
alias Philomena.ImageFaves.ImageFave
|
alias Philomena.ImageFaves.ImageFave
|
||||||
alias Philomena.ImageHides.ImageHide
|
alias Philomena.ImageHides.ImageHide
|
||||||
|
alias Philomena.ImageVectors.ImageVector
|
||||||
alias Philomena.Images.Source
|
alias Philomena.Images.Source
|
||||||
alias Philomena.Images.Subscription
|
alias Philomena.Images.Subscription
|
||||||
alias Philomena.Users.User
|
alias Philomena.Users.User
|
||||||
|
@ -35,6 +36,7 @@ defmodule Philomena.Images.Image do
|
||||||
has_many :subscriptions, Subscription
|
has_many :subscriptions, Subscription
|
||||||
has_many :source_changes, SourceChange, on_replace: :delete
|
has_many :source_changes, SourceChange, on_replace: :delete
|
||||||
has_many :tag_changes, TagChange
|
has_many :tag_changes, TagChange
|
||||||
|
has_many :vectors, ImageVector
|
||||||
has_many :upvoters, through: [:upvotes, :user]
|
has_many :upvoters, through: [:upvotes, :user]
|
||||||
has_many :downvoters, through: [:downvotes, :user]
|
has_many :downvoters, through: [:downvotes, :user]
|
||||||
has_many :favers, through: [:faves, :user]
|
has_many :favers, through: [:faves, :user]
|
||||||
|
|
|
@ -11,6 +11,7 @@ defmodule Philomena.Images.SearchIndex do
|
||||||
%{
|
%{
|
||||||
settings: %{
|
settings: %{
|
||||||
index: %{
|
index: %{
|
||||||
|
knn: true,
|
||||||
number_of_shards: 5,
|
number_of_shards: 5,
|
||||||
max_result_window: 10_000_000
|
max_result_window: 10_000_000
|
||||||
}
|
}
|
||||||
|
@ -89,6 +90,26 @@ defmodule Philomena.Images.SearchIndex do
|
||||||
namespace: %{type: "keyword"}
|
namespace: %{type: "keyword"}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
vectors: %{
|
||||||
|
type: "nested",
|
||||||
|
properties: %{
|
||||||
|
f: %{
|
||||||
|
type: "knn_vector",
|
||||||
|
dimension: 768,
|
||||||
|
data_type: "float",
|
||||||
|
mode: "on_disk",
|
||||||
|
method: %{
|
||||||
|
name: "hnsw",
|
||||||
|
engine: "faiss",
|
||||||
|
space_type: "l2",
|
||||||
|
parameters: %{
|
||||||
|
ef_construction: 128,
|
||||||
|
m: 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
approved: %{type: "boolean"},
|
approved: %{type: "boolean"},
|
||||||
error_tag_count: %{type: "integer"},
|
error_tag_count: %{type: "integer"},
|
||||||
rating_tag_count: %{type: "integer"},
|
rating_tag_count: %{type: "integer"},
|
||||||
|
@ -160,6 +181,7 @@ defmodule Philomena.Images.SearchIndex do
|
||||||
},
|
},
|
||||||
gallery_id: Enum.map(image.gallery_interactions, & &1.gallery_id),
|
gallery_id: Enum.map(image.gallery_interactions, & &1.gallery_id),
|
||||||
gallery_position: Map.new(image.gallery_interactions, &{&1.gallery_id, &1.position}),
|
gallery_position: Map.new(image.gallery_interactions, &{&1.gallery_id, &1.position}),
|
||||||
|
vectors: image.vectors |> Enum.map(&%{f: &1.features}),
|
||||||
favourited_by_users: image.favers |> Enum.map(&String.downcase(&1.name)),
|
favourited_by_users: image.favers |> Enum.map(&String.downcase(&1.name)),
|
||||||
hidden_by_users: image.hiders |> Enum.map(&String.downcase(&1.name)),
|
hidden_by_users: image.hiders |> Enum.map(&String.downcase(&1.name)),
|
||||||
upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)),
|
upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)),
|
||||||
|
|
|
@ -12,6 +12,7 @@ defmodule Philomena.Images.Thumbnailer do
|
||||||
alias Philomena.DuplicateReports
|
alias Philomena.DuplicateReports
|
||||||
alias Philomena.ImageIntensities
|
alias Philomena.ImageIntensities
|
||||||
alias Philomena.ImagePurgeWorker
|
alias Philomena.ImagePurgeWorker
|
||||||
|
alias Philomena.ImageVectors
|
||||||
alias Philomena.Images.Image
|
alias Philomena.Images.Image
|
||||||
alias Philomena.Repo
|
alias Philomena.Repo
|
||||||
|
|
||||||
|
@ -105,6 +106,9 @@ defmodule Philomena.Images.Thumbnailer do
|
||||||
defp apply_change(image, {:intensities, intensities}),
|
defp apply_change(image, {:intensities, intensities}),
|
||||||
do: ImageIntensities.create_image_intensity(image, intensities)
|
do: ImageIntensities.create_image_intensity(image, intensities)
|
||||||
|
|
||||||
|
defp apply_change(image, {:features, features}),
|
||||||
|
do: ImageVectors.create_image_vector(image, features)
|
||||||
|
|
||||||
defp apply_change(image, {:replace_original, new_file}) do
|
defp apply_change(image, {:replace_original, new_file}) do
|
||||||
full = "full.#{image.image_format}"
|
full = "full.#{image.image_format}"
|
||||||
upload_file(image, new_file, full)
|
upload_file(image, new_file, full)
|
||||||
|
@ -139,7 +143,7 @@ defmodule Philomena.Images.Thumbnailer do
|
||||||
|> Repo.update!()
|
|> Repo.update!()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp download_image_file(image) do
|
def download_image_file(image) do
|
||||||
tempfile = Briefly.create!(extname: ".#{image.image_format}")
|
tempfile = Briefly.create!(extname: ".#{image.image_format}")
|
||||||
path = Path.join(image_thumb_prefix(image), "full.#{image.image_format}")
|
path = Path.join(image_thumb_prefix(image), "full.#{image.image_format}")
|
||||||
|
|
||||||
|
|
51
lib/philomena_media/features.ex
Normal file
51
lib/philomena_media/features.ex
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
defmodule PhilomenaMedia.Features do
|
||||||
|
@moduledoc """
|
||||||
|
Features are a set of 768 weighted classification outputs produced from a
|
||||||
|
vision transformer (ViT). The individual classifications are arbitrary and
|
||||||
|
not meaningful to analyze, but the vectors can be used to compare similarity
|
||||||
|
between images using the cosine similarity measurement.
|
||||||
|
|
||||||
|
Since cosine similarity is not a metric, it is substituted for normalized L2
|
||||||
|
distance by the feature extractor; every vector that it returns is normalized,
|
||||||
|
and traversing the k nearest neighbors in a vector space index will iterate
|
||||||
|
vectors in the same order as their cosine similarity.
|
||||||
|
"""
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Remote
|
||||||
|
|
||||||
|
@type t :: %__MODULE__{
|
||||||
|
features: [float()]
|
||||||
|
}
|
||||||
|
|
||||||
|
defstruct [:features]
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Gets the features of the given image file.
|
||||||
|
|
||||||
|
The image file must be in the PNG or JPEG format.
|
||||||
|
|
||||||
|
> #### Info {: .info}
|
||||||
|
>
|
||||||
|
> Clients should prefer to use `PhilomenaMedia.Processors.features/2`, as it handles
|
||||||
|
> media files of any type supported by this library, not just PNG or JPEG.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> Features.file("image.png")
|
||||||
|
{:ok, %Features{features: [0.03156396001577377, -0.04559657722711563, ...]}}
|
||||||
|
|
||||||
|
iex> Features.file("nonexistent.jpg")
|
||||||
|
:error
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec file(Path.t()) :: {:ok, t()} | :error
|
||||||
|
def file(input) do
|
||||||
|
case Remote.get_features(input) do
|
||||||
|
{:ok, features} ->
|
||||||
|
{:ok, %__MODULE__{features: features}}
|
||||||
|
|
||||||
|
_error ->
|
||||||
|
:error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -58,6 +58,7 @@ defmodule PhilomenaMedia.Processors do
|
||||||
"""
|
"""
|
||||||
|
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm}
|
alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm}
|
||||||
alias PhilomenaMedia.Mime
|
alias PhilomenaMedia.Mime
|
||||||
|
@ -185,6 +186,25 @@ defmodule PhilomenaMedia.Processors do
|
||||||
processor(analysis.mime_type).post_process(analysis, file)
|
processor(analysis.mime_type).post_process(analysis, file)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Takes an analyzer result and file path and runs the appropriate processor's `features/2`,
|
||||||
|
returning the feature vector.
|
||||||
|
|
||||||
|
This allows for generating feature vectors for file types that are not directly supported by
|
||||||
|
`m:PhilomenaMedia.Features`, and should be the preferred function to call when feature vectors
|
||||||
|
are needed.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
iex> PhilomenaMedia.Processors.features(%Result{...}, "video.webm")
|
||||||
|
%Features{features: [0.03156396001577377, -0.04559657722711563, ...]}
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(analysis, file) do
|
||||||
|
processor(analysis.mime_type).features(analysis, file)
|
||||||
|
end
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`,
|
Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`,
|
||||||
returning the corner intensities.
|
returning the corner intensities.
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
defmodule PhilomenaMedia.Processors.Gif do
|
defmodule PhilomenaMedia.Processors.Gif do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
alias PhilomenaMedia.Remote
|
alias PhilomenaMedia.Remote
|
||||||
|
@ -23,12 +24,14 @@ defmodule PhilomenaMedia.Processors.Gif do
|
||||||
palette = palette(file)
|
palette = palette(file)
|
||||||
|
|
||||||
{:ok, intensities} = Intensities.file(preview)
|
{:ok, intensities} = Intensities.file(preview)
|
||||||
|
{:ok, features} = Features.file(preview)
|
||||||
|
|
||||||
scaled = Enum.flat_map(versions, &scale(palette, file, &1))
|
scaled = Enum.flat_map(versions, &scale(palette, file, &1))
|
||||||
videos = generate_videos(file)
|
videos = generate_videos(file)
|
||||||
|
|
||||||
[
|
[
|
||||||
intensities: intensities,
|
intensities: intensities,
|
||||||
|
features: features,
|
||||||
thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}]
|
thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Gif do
|
||||||
[replace_original: optimize(file)]
|
[replace_original: optimize(file)]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(analysis, file) do
|
||||||
|
{:ok, features} = Features.file(preview(analysis.duration, file))
|
||||||
|
features
|
||||||
|
end
|
||||||
|
|
||||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||||
def intensities(analysis, file) do
|
def intensities(analysis, file) do
|
||||||
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
defmodule PhilomenaMedia.Processors.Jpeg do
|
defmodule PhilomenaMedia.Processors.Jpeg do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
alias PhilomenaMedia.Remote
|
alias PhilomenaMedia.Remote
|
||||||
|
@ -22,12 +23,14 @@ defmodule PhilomenaMedia.Processors.Jpeg do
|
||||||
stripped = optimize(strip(file))
|
stripped = optimize(strip(file))
|
||||||
|
|
||||||
{:ok, intensities} = Intensities.file(stripped)
|
{:ok, intensities} = Intensities.file(stripped)
|
||||||
|
{:ok, features} = Features.file(stripped)
|
||||||
|
|
||||||
scaled = Enum.flat_map(versions, &scale(stripped, &1))
|
scaled = Enum.flat_map(versions, &scale(stripped, &1))
|
||||||
|
|
||||||
[
|
[
|
||||||
replace_original: stripped,
|
replace_original: stripped,
|
||||||
intensities: intensities,
|
intensities: intensities,
|
||||||
|
features: features,
|
||||||
thumbnails: scaled
|
thumbnails: scaled
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -35,6 +38,12 @@ defmodule PhilomenaMedia.Processors.Jpeg do
|
||||||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||||
def post_process(_analysis, _file), do: []
|
def post_process(_analysis, _file), do: []
|
||||||
|
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(_analysis, file) do
|
||||||
|
{:ok, features} = Features.file(file)
|
||||||
|
features
|
||||||
|
end
|
||||||
|
|
||||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||||
def intensities(_analysis, file) do
|
def intensities(_analysis, file) do
|
||||||
{:ok, intensities} = Intensities.file(file)
|
{:ok, intensities} = Intensities.file(file)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
defmodule PhilomenaMedia.Processors.Png do
|
defmodule PhilomenaMedia.Processors.Png do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
alias PhilomenaMedia.Remote
|
alias PhilomenaMedia.Remote
|
||||||
|
@ -19,11 +20,13 @@ defmodule PhilomenaMedia.Processors.Png do
|
||||||
animated? = analysis.animated?
|
animated? = analysis.animated?
|
||||||
|
|
||||||
{:ok, intensities} = Intensities.file(file)
|
{:ok, intensities} = Intensities.file(file)
|
||||||
|
{:ok, features} = Features.file(file)
|
||||||
|
|
||||||
scaled = Enum.flat_map(versions, &scale(file, animated?, &1))
|
scaled = Enum.flat_map(versions, &scale(file, animated?, &1))
|
||||||
|
|
||||||
[
|
[
|
||||||
intensities: intensities,
|
intensities: intensities,
|
||||||
|
features: features,
|
||||||
thumbnails: scaled
|
thumbnails: scaled
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Png do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(_analysis, file) do
|
||||||
|
{:ok, features} = Features.file(file)
|
||||||
|
features
|
||||||
|
end
|
||||||
|
|
||||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||||
def intensities(_analysis, file) do
|
def intensities(_analysis, file) do
|
||||||
{:ok, intensities} = Intensities.file(file)
|
{:ok, intensities} = Intensities.file(file)
|
||||||
|
|
|
@ -2,6 +2,7 @@ defmodule PhilomenaMedia.Processors.Processor do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Processors
|
alias PhilomenaMedia.Processors
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
|
|
||||||
|
@ -22,6 +23,11 @@ defmodule PhilomenaMedia.Processors.Processor do
|
||||||
"""
|
"""
|
||||||
@callback post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
@callback post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Generate a feature vector for the given path.
|
||||||
|
"""
|
||||||
|
@callback features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Generate corner intensities for the given path.
|
Generate corner intensities for the given path.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
defmodule PhilomenaMedia.Processors.Svg do
|
defmodule PhilomenaMedia.Processors.Svg do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
alias PhilomenaMedia.Remote
|
alias PhilomenaMedia.Remote
|
||||||
|
@ -21,12 +22,14 @@ defmodule PhilomenaMedia.Processors.Svg do
|
||||||
preview = preview(file)
|
preview = preview(file)
|
||||||
|
|
||||||
{:ok, intensities} = Intensities.file(preview)
|
{:ok, intensities} = Intensities.file(preview)
|
||||||
|
{:ok, features} = Features.file(preview)
|
||||||
|
|
||||||
scaled = Enum.flat_map(versions, &scale(preview, &1))
|
scaled = Enum.flat_map(versions, &scale(preview, &1))
|
||||||
full = [{:copy, preview, "full.png"}]
|
full = [{:copy, preview, "full.png"}]
|
||||||
|
|
||||||
[
|
[
|
||||||
intensities: intensities,
|
intensities: intensities,
|
||||||
|
features: features,
|
||||||
thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}]
|
thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -34,6 +37,12 @@ defmodule PhilomenaMedia.Processors.Svg do
|
||||||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||||
def post_process(_analysis, _file), do: []
|
def post_process(_analysis, _file), do: []
|
||||||
|
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(_analysis, file) do
|
||||||
|
{:ok, features} = Features.file(preview(file))
|
||||||
|
features
|
||||||
|
end
|
||||||
|
|
||||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||||
def intensities(_analysis, file) do
|
def intensities(_analysis, file) do
|
||||||
{:ok, intensities} = Intensities.file(preview(file))
|
{:ok, intensities} = Intensities.file(preview(file))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
defmodule PhilomenaMedia.Processors.Webm do
|
defmodule PhilomenaMedia.Processors.Webm do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaMedia.Features
|
||||||
alias PhilomenaMedia.Intensities
|
alias PhilomenaMedia.Intensities
|
||||||
alias PhilomenaMedia.Analyzers.Result
|
alias PhilomenaMedia.Analyzers.Result
|
||||||
alias PhilomenaMedia.Remote
|
alias PhilomenaMedia.Remote
|
||||||
|
@ -34,6 +35,7 @@ defmodule PhilomenaMedia.Processors.Webm do
|
||||||
mp4 = scale_mp4_only(decoder, stripped, dimensions, dimensions)
|
mp4 = scale_mp4_only(decoder, stripped, dimensions, dimensions)
|
||||||
|
|
||||||
{:ok, intensities} = Intensities.file(preview)
|
{:ok, intensities} = Intensities.file(preview)
|
||||||
|
{:ok, features} = Features.file(preview)
|
||||||
|
|
||||||
scaled = Enum.flat_map(versions, &scale(decoder, stripped, duration, dimensions, &1))
|
scaled = Enum.flat_map(versions, &scale(decoder, stripped, duration, dimensions, &1))
|
||||||
mp4 = [{:copy, mp4, "full.mp4"}]
|
mp4 = [{:copy, mp4, "full.mp4"}]
|
||||||
|
@ -41,6 +43,7 @@ defmodule PhilomenaMedia.Processors.Webm do
|
||||||
[
|
[
|
||||||
replace_original: stripped,
|
replace_original: stripped,
|
||||||
intensities: intensities,
|
intensities: intensities,
|
||||||
|
features: features,
|
||||||
thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}]
|
thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}]
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
@ -48,6 +51,12 @@ defmodule PhilomenaMedia.Processors.Webm do
|
||||||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||||
def post_process(_analysis, _file), do: []
|
def post_process(_analysis, _file), do: []
|
||||||
|
|
||||||
|
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||||
|
def features(analysis, file) do
|
||||||
|
{:ok, features} = Features.file(preview(analysis.duration, file))
|
||||||
|
features
|
||||||
|
end
|
||||||
|
|
||||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||||
def intensities(analysis, file) do
|
def intensities(analysis, file) do
|
||||||
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
||||||
|
|
14
priv/repo/migrations/20250109155442_create_image_vectors.exs
Normal file
14
priv/repo/migrations/20250109155442_create_image_vectors.exs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
defmodule Philomena.Repo.Migrations.CreateImageVectors do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
def change do
|
||||||
|
# NB: this is normalized, the float array is not divisible
|
||||||
|
create table(:image_vectors) do
|
||||||
|
add :image_id, references(:images, on_delete: :delete_all), null: false
|
||||||
|
add :type, :string, null: false
|
||||||
|
add :features, {:array, :float}, null: false
|
||||||
|
end
|
||||||
|
|
||||||
|
create unique_index(:image_vectors, [:image_id, :type])
|
||||||
|
end
|
||||||
|
end
|
|
@ -2,12 +2,13 @@
|
||||||
-- PostgreSQL database dump
|
-- PostgreSQL database dump
|
||||||
--
|
--
|
||||||
|
|
||||||
-- Dumped from database version 16.4
|
-- Dumped from database version 17.2
|
||||||
-- Dumped by pg_dump version 16.6
|
-- Dumped by pg_dump version 17.2
|
||||||
|
|
||||||
SET statement_timeout = 0;
|
SET statement_timeout = 0;
|
||||||
SET lock_timeout = 0;
|
SET lock_timeout = 0;
|
||||||
SET idle_in_transaction_session_timeout = 0;
|
SET idle_in_transaction_session_timeout = 0;
|
||||||
|
SET transaction_timeout = 0;
|
||||||
SET client_encoding = 'UTF8';
|
SET client_encoding = 'UTF8';
|
||||||
SET standard_conforming_strings = on;
|
SET standard_conforming_strings = on;
|
||||||
SELECT pg_catalog.set_config('search_path', '', false);
|
SELECT pg_catalog.set_config('search_path', '', false);
|
||||||
|
@ -959,6 +960,37 @@ CREATE TABLE public.image_taggings (
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors; Type: TABLE; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
CREATE TABLE public.image_vectors (
|
||||||
|
id bigint NOT NULL,
|
||||||
|
image_id bigint NOT NULL,
|
||||||
|
type character varying(255) NOT NULL,
|
||||||
|
features double precision[] NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors_id_seq; Type: SEQUENCE; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
CREATE SEQUENCE public.image_vectors_id_seq
|
||||||
|
START WITH 1
|
||||||
|
INCREMENT BY 1
|
||||||
|
NO MINVALUE
|
||||||
|
NO MAXVALUE
|
||||||
|
CACHE 1;
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
ALTER SEQUENCE public.image_vectors_id_seq OWNED BY public.image_vectors.id;
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: image_votes; Type: TABLE; Schema: public; Owner: -
|
-- Name: image_votes; Type: TABLE; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -2365,6 +2397,13 @@ ALTER TABLE ONLY public.image_features ALTER COLUMN id SET DEFAULT nextval('publ
|
||||||
ALTER TABLE ONLY public.image_intensities ALTER COLUMN id SET DEFAULT nextval('public.image_intensities_id_seq'::regclass);
|
ALTER TABLE ONLY public.image_intensities ALTER COLUMN id SET DEFAULT nextval('public.image_intensities_id_seq'::regclass);
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors id; Type: DEFAULT; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
ALTER TABLE ONLY public.image_vectors ALTER COLUMN id SET DEFAULT nextval('public.image_vectors_id_seq'::regclass);
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: images id; Type: DEFAULT; Schema: public; Owner: -
|
-- Name: images id; Type: DEFAULT; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -2727,6 +2766,14 @@ ALTER TABLE ONLY public.image_intensities
|
||||||
ADD CONSTRAINT image_intensities_pkey PRIMARY KEY (id);
|
ADD CONSTRAINT image_intensities_pkey PRIMARY KEY (id);
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors image_vectors_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
ALTER TABLE ONLY public.image_vectors
|
||||||
|
ADD CONSTRAINT image_vectors_pkey PRIMARY KEY (id);
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: images images_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
-- Name: images images_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -3192,6 +3239,13 @@ CREATE UNIQUE INDEX image_tag_locks_image_id_tag_id_index ON public.image_tag_lo
|
||||||
CREATE INDEX image_tag_locks_tag_id_index ON public.image_tag_locks USING btree (tag_id);
|
CREATE INDEX image_tag_locks_tag_id_index ON public.image_tag_locks USING btree (tag_id);
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors_image_id_type_index; Type: INDEX; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX image_vectors_image_id_type_index ON public.image_vectors USING btree (image_id, type);
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: images_hidden_from_users_approved_index; Type: INDEX; Schema: public; Owner: -
|
-- Name: images_hidden_from_users_approved_index; Type: INDEX; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -5381,6 +5435,14 @@ ALTER TABLE ONLY public.image_tag_locks
|
||||||
ADD CONSTRAINT image_tag_locks_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE;
|
ADD CONSTRAINT image_tag_locks_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE;
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: image_vectors image_vectors_image_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
ALTER TABLE ONLY public.image_vectors
|
||||||
|
ADD CONSTRAINT image_vectors_image_id_fkey FOREIGN KEY (image_id) REFERENCES public.images(id) ON DELETE CASCADE;
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: moderation_logs moderation_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
|
-- Name: moderation_logs moderation_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -5448,3 +5510,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20220321173359);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20240723122759);
|
INSERT INTO public."schema_migrations" (version) VALUES (20240723122759);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20240728191353);
|
INSERT INTO public."schema_migrations" (version) VALUES (20240728191353);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20241216165826);
|
INSERT INTO public."schema_migrations" (version) VALUES (20241216165826);
|
||||||
|
INSERT INTO public."schema_migrations" (version) VALUES (20250109155442);
|
||||||
|
|
Loading…
Reference in a new issue