mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-01-31 19:36:44 +01:00
Add feature extraction and importing pipeline to Philomena
This commit is contained in:
parent
2bd7ddf9d1
commit
12d3809d37
18 changed files with 517 additions and 5 deletions
91
lib/philomena/image_vectors.ex
Normal file
91
lib/philomena/image_vectors.ex
Normal file
|
@ -0,0 +1,91 @@
|
|||
defmodule Philomena.ImageVectors do
|
||||
@moduledoc """
|
||||
The ImageVectors context.
|
||||
"""
|
||||
|
||||
import Ecto.Query, warn: false
|
||||
alias Philomena.Repo
|
||||
|
||||
alias Philomena.ImageVectors.ImageVector
|
||||
|
||||
@doc """
|
||||
Gets a single image_vector.
|
||||
|
||||
Raises `Ecto.NoResultsError` if the Image vector does not exist.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> get_image_vector!(123)
|
||||
%ImageVector{}
|
||||
|
||||
iex> get_image_vector!(456)
|
||||
** (Ecto.NoResultsError)
|
||||
|
||||
"""
|
||||
def get_image_vector!(id), do: Repo.get!(ImageVector, id)
|
||||
|
||||
@doc """
|
||||
Creates a image_vector.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> create_image_vector(%{field: value})
|
||||
{:ok, %ImageVector{}}
|
||||
|
||||
iex> create_image_vector(%{field: bad_value})
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def create_image_vector(image, attrs \\ %PhilomenaMedia.Features{}) do
|
||||
%ImageVector{image_id: image.id}
|
||||
|> ImageVector.changeset(Map.from_struct(attrs))
|
||||
|> Repo.insert()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Updates a image_vector.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> update_image_vector(image_vector, %{field: new_value})
|
||||
{:ok, %ImageVector{}}
|
||||
|
||||
iex> update_image_vector(image_vector, %{field: bad_value})
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def update_image_vector(%ImageVector{} = image_vector, attrs) do
|
||||
image_vector
|
||||
|> ImageVector.changeset(attrs)
|
||||
|> Repo.update()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Deletes a image_vector.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> delete_image_vector(image_vector)
|
||||
{:ok, %ImageVector{}}
|
||||
|
||||
iex> delete_image_vector(image_vector)
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def delete_image_vector(%ImageVector{} = image_vector) do
|
||||
Repo.delete(image_vector)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Returns an `%Ecto.Changeset{}` for tracking image_vector changes.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> change_image_vector(image_vector)
|
||||
%Ecto.Changeset{data: %ImageVector{}}
|
||||
|
||||
"""
|
||||
def change_image_vector(%ImageVector{} = image_vector, attrs \\ %{}) do
|
||||
ImageVector.changeset(image_vector, attrs)
|
||||
end
|
||||
end
|
88
lib/philomena/image_vectors/batch_processor.ex
Normal file
88
lib/philomena/image_vectors/batch_processor.ex
Normal file
|
@ -0,0 +1,88 @@
|
|||
defmodule Philomena.ImageVectors.BatchProcessor do
|
||||
@moduledoc """
|
||||
Batch processing interface for Philomena. See the module documentation
|
||||
in `m:Philomena.ImageVectors.Importer` for more information about how to
|
||||
use the functions in this module during maintenance.
|
||||
"""
|
||||
|
||||
alias Philomena.Images
|
||||
alias Philomena.Images.Image
|
||||
alias Philomena.Images.Thumbnailer
|
||||
alias Philomena.ImageVectors.ImageVector
|
||||
alias Philomena.Maintenance
|
||||
alias Philomena.Repo
|
||||
|
||||
alias PhilomenaMedia.Analyzers
|
||||
alias PhilomenaMedia.Processors
|
||||
alias PhilomenaQuery.Batch
|
||||
alias PhilomenaQuery.Search
|
||||
|
||||
alias Philomena.Repo
|
||||
import Ecto.Query
|
||||
|
||||
@spec all_missing(String.t(), Keyword.t()) :: :ok
|
||||
def all_missing(type \\ "full", opts \\ []) do
|
||||
Image
|
||||
|> from(as: :image)
|
||||
|> where(not exists(where(ImageVector, [iv], iv.image_id == parent_as(:image).id)))
|
||||
|> by_image_query(type, opts)
|
||||
end
|
||||
|
||||
@spec by_image_query(Ecto.Query.t(), String.t(), Keyword.t()) :: :ok
|
||||
defp by_image_query(query, type, opts) do
|
||||
max_concurrency = Keyword.get(opts, :max_concurrency, 4)
|
||||
min = Repo.one(limit(order_by(query, asc: :id), 1)).id
|
||||
max = Repo.one(limit(order_by(query, desc: :id), 1)).id
|
||||
|
||||
query
|
||||
|> Batch.query_batches(opts)
|
||||
|> Task.async_stream(
|
||||
fn query -> process_query(query, type, opts) end,
|
||||
timeout: :infinity,
|
||||
max_concurrency: max_concurrency
|
||||
)
|
||||
|> Maintenance.log_progress("BatchProcessor/#{type}", min, max)
|
||||
end
|
||||
|
||||
@spec process_query(Ecto.Query.t(), String.t(), Keyword.t()) ::
|
||||
Enumerable.t({:ok, integer()})
|
||||
defp process_query(query, type, batch_opts) do
|
||||
images = Repo.all(query)
|
||||
last_id = Enum.max_by(images, & &1.id).id
|
||||
|
||||
values =
|
||||
Enum.flat_map(images, fn image ->
|
||||
try do
|
||||
[process_image(image, type)]
|
||||
rescue
|
||||
ex ->
|
||||
IO.puts("While processing #{image.id}: #{inspect(ex)}")
|
||||
IO.puts(Exception.format_stacktrace(__STACKTRACE__))
|
||||
[]
|
||||
end
|
||||
end)
|
||||
|
||||
{_count, nil} = Repo.insert_all(ImageVector, values, on_conflict: :nothing)
|
||||
|
||||
:ok =
|
||||
query
|
||||
|> preload(^Images.indexing_preloads())
|
||||
|> Search.reindex(Image, batch_opts)
|
||||
|
||||
last_id
|
||||
end
|
||||
|
||||
@spec process_image(%Image{}, String.t()) :: map()
|
||||
defp process_image(image = %Image{}, type) do
|
||||
file = Thumbnailer.download_image_file(image)
|
||||
|
||||
{:ok, analysis} = Analyzers.analyze_path(file)
|
||||
features = Processors.features(analysis, file)
|
||||
|
||||
%{
|
||||
image_id: image.id,
|
||||
type: type,
|
||||
features: features.features
|
||||
}
|
||||
end
|
||||
end
|
19
lib/philomena/image_vectors/image_vector.ex
Normal file
19
lib/philomena/image_vectors/image_vector.ex
Normal file
|
@ -0,0 +1,19 @@
|
|||
defmodule Philomena.ImageVectors.ImageVector do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
alias Philomena.Images.Image
|
||||
|
||||
schema "image_vectors" do
|
||||
belongs_to :image, Image
|
||||
field :type, :string
|
||||
field :features, {:array, :float}
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(image_vector, attrs) do
|
||||
image_vector
|
||||
|> cast(attrs, [:type, :features])
|
||||
|> validate_required([:type, :features])
|
||||
end
|
||||
end
|
86
lib/philomena/image_vectors/importer.ex
Normal file
86
lib/philomena/image_vectors/importer.ex
Normal file
|
@ -0,0 +1,86 @@
|
|||
defmodule Philomena.ImageVectors.Importer do
|
||||
@moduledoc """
|
||||
Import logic for binary files produced by the export function of
|
||||
https://github.com/philomena-dev/philomena-ris-inference-toolkit.
|
||||
|
||||
Run the following commands in a long-running terminal, like screen or tmux.
|
||||
The workflow for using the importer is as follows:
|
||||
|
||||
1. Use the batch inference toolkit to get the `features.bin`.
|
||||
2. Run `philomena eval 'Philomena.ImageVectors.Importer.import_from("/path/to/features.bin")'`.
|
||||
3. Backfill the remaining images:
|
||||
`philomena eval 'Philomena.ImageVectors.BatchProcessor.all_missing("full", batch_size: 32)'`
|
||||
4. Downtime, delete and recreate the images index:
|
||||
`philomena eval 'Philomena.SearchIndexer.recreate_reindex_schema_destructive!(Philomena.Images.Image)'`.
|
||||
"""
|
||||
|
||||
alias Philomena.ImageVectors.ImageVector
|
||||
alias Philomena.Maintenance
|
||||
alias Philomena.Repo
|
||||
|
||||
# 4 bytes unsigned id + 768 floats per feature vector * 4 bytes per float
|
||||
@row_size 4 + 768 * 4
|
||||
|
||||
@typedoc "A single feature row."
|
||||
@type row :: %{
|
||||
image_id: integer(),
|
||||
type: String.t(),
|
||||
features: [float()]
|
||||
}
|
||||
|
||||
@spec import_from(Path.t()) :: :ok
|
||||
def import_from(batch_inference_file, type \\ "full", max_concurrency \\ 4) do
|
||||
{min, max} = get_min_and_max_id(batch_inference_file, type)
|
||||
|
||||
batch_inference_file
|
||||
|> File.stream!(@row_size)
|
||||
|> Stream.chunk_every(1024)
|
||||
|> Task.async_stream(
|
||||
&process_chunk(&1, type),
|
||||
timeout: :infinity,
|
||||
max_concurrency: max_concurrency
|
||||
)
|
||||
|> Maintenance.log_progress("Importer/#{type}", min, max)
|
||||
end
|
||||
|
||||
@spec process_chunk([binary()], String.t()) :: :ok
|
||||
defp process_chunk(chunk, type) do
|
||||
data = Enum.map(chunk, &unpack(&1, type))
|
||||
last_id = Enum.max_by(data, & &1.image_id).image_id
|
||||
|
||||
{_count, nil} = Repo.insert_all(ImageVector, data, on_conflict: :nothing)
|
||||
|
||||
last_id
|
||||
end
|
||||
|
||||
@spec unpack(binary(), String.t()) :: row()
|
||||
defp unpack(row, type) do
|
||||
<<image_id::little-unsigned-integer-size(32), rest::binary-size(3072)>> = row
|
||||
features = for <<v::little-float-size(32) <- rest>>, do: v
|
||||
|
||||
%{
|
||||
image_id: image_id,
|
||||
type: type,
|
||||
features: features
|
||||
}
|
||||
end
|
||||
|
||||
@spec get_min_and_max_id(Path.t(), String.t()) :: {integer(), integer()}
|
||||
defp get_min_and_max_id(path, type) do
|
||||
stat = File.stat!(path)
|
||||
last_row = stat.size - @row_size
|
||||
|
||||
%{image_id: min} = get_single_row(path, 0, type)
|
||||
%{image_id: max} = get_single_row(path, last_row, type)
|
||||
|
||||
{min, max}
|
||||
end
|
||||
|
||||
@spec get_single_row(Path.t(), integer(), String.t()) :: row()
|
||||
defp get_single_row(path, offset, type) do
|
||||
path
|
||||
|> File.stream!(@row_size, read_offset: offset)
|
||||
|> Enum.at(0)
|
||||
|> unpack(type)
|
||||
end
|
||||
end
|
|
@ -858,6 +858,7 @@ defmodule Philomena.Images do
|
|||
|
||||
[
|
||||
:gallery_interactions,
|
||||
:vectors,
|
||||
sources: sources_query,
|
||||
user: user_query,
|
||||
favers: user_query,
|
||||
|
|
|
@ -7,6 +7,7 @@ defmodule Philomena.Images.Image do
|
|||
alias Philomena.ImageVotes.ImageVote
|
||||
alias Philomena.ImageFaves.ImageFave
|
||||
alias Philomena.ImageHides.ImageHide
|
||||
alias Philomena.ImageVectors.ImageVector
|
||||
alias Philomena.Images.Source
|
||||
alias Philomena.Images.Subscription
|
||||
alias Philomena.Users.User
|
||||
|
@ -35,6 +36,7 @@ defmodule Philomena.Images.Image do
|
|||
has_many :subscriptions, Subscription
|
||||
has_many :source_changes, SourceChange, on_replace: :delete
|
||||
has_many :tag_changes, TagChange
|
||||
has_many :vectors, ImageVector
|
||||
has_many :upvoters, through: [:upvotes, :user]
|
||||
has_many :downvoters, through: [:downvotes, :user]
|
||||
has_many :favers, through: [:faves, :user]
|
||||
|
|
|
@ -11,6 +11,7 @@ defmodule Philomena.Images.SearchIndex do
|
|||
%{
|
||||
settings: %{
|
||||
index: %{
|
||||
knn: true,
|
||||
number_of_shards: 5,
|
||||
max_result_window: 10_000_000
|
||||
}
|
||||
|
@ -89,6 +90,26 @@ defmodule Philomena.Images.SearchIndex do
|
|||
namespace: %{type: "keyword"}
|
||||
}
|
||||
},
|
||||
vectors: %{
|
||||
type: "nested",
|
||||
properties: %{
|
||||
f: %{
|
||||
type: "knn_vector",
|
||||
dimension: 768,
|
||||
data_type: "float",
|
||||
mode: "on_disk",
|
||||
method: %{
|
||||
name: "hnsw",
|
||||
engine: "faiss",
|
||||
space_type: "l2",
|
||||
parameters: %{
|
||||
ef_construction: 128,
|
||||
m: 16
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
approved: %{type: "boolean"},
|
||||
error_tag_count: %{type: "integer"},
|
||||
rating_tag_count: %{type: "integer"},
|
||||
|
@ -160,6 +181,7 @@ defmodule Philomena.Images.SearchIndex do
|
|||
},
|
||||
gallery_id: Enum.map(image.gallery_interactions, & &1.gallery_id),
|
||||
gallery_position: Map.new(image.gallery_interactions, &{&1.gallery_id, &1.position}),
|
||||
vectors: image.vectors |> Enum.map(&%{f: &1.features}),
|
||||
favourited_by_users: image.favers |> Enum.map(&String.downcase(&1.name)),
|
||||
hidden_by_users: image.hiders |> Enum.map(&String.downcase(&1.name)),
|
||||
upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)),
|
||||
|
|
|
@ -12,6 +12,7 @@ defmodule Philomena.Images.Thumbnailer do
|
|||
alias Philomena.DuplicateReports
|
||||
alias Philomena.ImageIntensities
|
||||
alias Philomena.ImagePurgeWorker
|
||||
alias Philomena.ImageVectors
|
||||
alias Philomena.Images.Image
|
||||
alias Philomena.Repo
|
||||
|
||||
|
@ -105,6 +106,9 @@ defmodule Philomena.Images.Thumbnailer do
|
|||
defp apply_change(image, {:intensities, intensities}),
|
||||
do: ImageIntensities.create_image_intensity(image, intensities)
|
||||
|
||||
defp apply_change(image, {:features, features}),
|
||||
do: ImageVectors.create_image_vector(image, features)
|
||||
|
||||
defp apply_change(image, {:replace_original, new_file}) do
|
||||
full = "full.#{image.image_format}"
|
||||
upload_file(image, new_file, full)
|
||||
|
@ -139,7 +143,7 @@ defmodule Philomena.Images.Thumbnailer do
|
|||
|> Repo.update!()
|
||||
end
|
||||
|
||||
defp download_image_file(image) do
|
||||
def download_image_file(image) do
|
||||
tempfile = Briefly.create!(extname: ".#{image.image_format}")
|
||||
path = Path.join(image_thumb_prefix(image), "full.#{image.image_format}")
|
||||
|
||||
|
|
51
lib/philomena_media/features.ex
Normal file
51
lib/philomena_media/features.ex
Normal file
|
@ -0,0 +1,51 @@
|
|||
defmodule PhilomenaMedia.Features do
|
||||
@moduledoc """
|
||||
Features are a set of 768 weighted classification outputs produced from a
|
||||
vision transformer (ViT). The individual classifications are arbitrary and
|
||||
not meaningful to analyze, but the vectors can be used to compare similarity
|
||||
between images using the cosine similarity measurement.
|
||||
|
||||
Since cosine similarity is not a metric, it is substituted for normalized L2
|
||||
distance by the feature extractor; every vector that it returns is normalized,
|
||||
and traversing the k nearest neighbors in a vector space index will iterate
|
||||
vectors in the same order as their cosine similarity.
|
||||
"""
|
||||
|
||||
alias PhilomenaMedia.Remote
|
||||
|
||||
@type t :: %__MODULE__{
|
||||
features: [float()]
|
||||
}
|
||||
|
||||
defstruct [:features]
|
||||
|
||||
@doc """
|
||||
Gets the features of the given image file.
|
||||
|
||||
The image file must be in the PNG or JPEG format.
|
||||
|
||||
> #### Info {: .info}
|
||||
>
|
||||
> Clients should prefer to use `PhilomenaMedia.Processors.features/2`, as it handles
|
||||
> media files of any type supported by this library, not just PNG or JPEG.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> Features.file("image.png")
|
||||
{:ok, %Features{features: [0.03156396001577377, -0.04559657722711563, ...]}}
|
||||
|
||||
iex> Features.file("nonexistent.jpg")
|
||||
:error
|
||||
|
||||
"""
|
||||
@spec file(Path.t()) :: {:ok, t()} | :error
|
||||
def file(input) do
|
||||
case Remote.get_features(input) do
|
||||
{:ok, features} ->
|
||||
{:ok, %__MODULE__{features: features}}
|
||||
|
||||
_error ->
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
|
@ -58,6 +58,7 @@ defmodule PhilomenaMedia.Processors do
|
|||
"""
|
||||
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm}
|
||||
alias PhilomenaMedia.Mime
|
||||
|
@ -185,6 +186,25 @@ defmodule PhilomenaMedia.Processors do
|
|||
processor(analysis.mime_type).post_process(analysis, file)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Takes an analyzer result and file path and runs the appropriate processor's `features/2`,
|
||||
returning the feature vector.
|
||||
|
||||
This allows for generating feature vectors for file types that are not directly supported by
|
||||
`m:PhilomenaMedia.Features`, and should be the preferred function to call when feature vectors
|
||||
are needed.
|
||||
|
||||
## Example
|
||||
|
||||
iex> PhilomenaMedia.Processors.features(%Result{...}, "video.webm")
|
||||
%Features{features: [0.03156396001577377, -0.04559657722711563, ...]}
|
||||
|
||||
"""
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(analysis, file) do
|
||||
processor(analysis.mime_type).features(analysis, file)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`,
|
||||
returning the corner intensities.
|
||||
|
@ -195,8 +215,8 @@ defmodule PhilomenaMedia.Processors do
|
|||
|
||||
## Example
|
||||
|
||||
iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm")
|
||||
%Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064}
|
||||
iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm")
|
||||
%Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064}
|
||||
|
||||
"""
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
defmodule PhilomenaMedia.Processors.Gif do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Remote
|
||||
|
@ -23,12 +24,14 @@ defmodule PhilomenaMedia.Processors.Gif do
|
|||
palette = palette(file)
|
||||
|
||||
{:ok, intensities} = Intensities.file(preview)
|
||||
{:ok, features} = Features.file(preview)
|
||||
|
||||
scaled = Enum.flat_map(versions, &scale(palette, file, &1))
|
||||
videos = generate_videos(file)
|
||||
|
||||
[
|
||||
intensities: intensities,
|
||||
features: features,
|
||||
thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}]
|
||||
]
|
||||
end
|
||||
|
@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Gif do
|
|||
[replace_original: optimize(file)]
|
||||
end
|
||||
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(analysis, file) do
|
||||
{:ok, features} = Features.file(preview(analysis.duration, file))
|
||||
features
|
||||
end
|
||||
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
def intensities(analysis, file) do
|
||||
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
defmodule PhilomenaMedia.Processors.Jpeg do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Remote
|
||||
|
@ -22,12 +23,14 @@ defmodule PhilomenaMedia.Processors.Jpeg do
|
|||
stripped = optimize(strip(file))
|
||||
|
||||
{:ok, intensities} = Intensities.file(stripped)
|
||||
{:ok, features} = Features.file(stripped)
|
||||
|
||||
scaled = Enum.flat_map(versions, &scale(stripped, &1))
|
||||
|
||||
[
|
||||
replace_original: stripped,
|
||||
intensities: intensities,
|
||||
features: features,
|
||||
thumbnails: scaled
|
||||
]
|
||||
end
|
||||
|
@ -35,6 +38,12 @@ defmodule PhilomenaMedia.Processors.Jpeg do
|
|||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||
def post_process(_analysis, _file), do: []
|
||||
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(_analysis, file) do
|
||||
{:ok, features} = Features.file(file)
|
||||
features
|
||||
end
|
||||
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
def intensities(_analysis, file) do
|
||||
{:ok, intensities} = Intensities.file(file)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
defmodule PhilomenaMedia.Processors.Png do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Remote
|
||||
|
@ -19,11 +20,13 @@ defmodule PhilomenaMedia.Processors.Png do
|
|||
animated? = analysis.animated?
|
||||
|
||||
{:ok, intensities} = Intensities.file(file)
|
||||
{:ok, features} = Features.file(file)
|
||||
|
||||
scaled = Enum.flat_map(versions, &scale(file, animated?, &1))
|
||||
|
||||
[
|
||||
intensities: intensities,
|
||||
features: features,
|
||||
thumbnails: scaled
|
||||
]
|
||||
end
|
||||
|
@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Png do
|
|||
end
|
||||
end
|
||||
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(_analysis, file) do
|
||||
{:ok, features} = Features.file(file)
|
||||
features
|
||||
end
|
||||
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
def intensities(_analysis, file) do
|
||||
{:ok, intensities} = Intensities.file(file)
|
||||
|
|
|
@ -2,6 +2,7 @@ defmodule PhilomenaMedia.Processors.Processor do
|
|||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Processors
|
||||
alias PhilomenaMedia.Intensities
|
||||
|
||||
|
@ -22,6 +23,11 @@ defmodule PhilomenaMedia.Processors.Processor do
|
|||
"""
|
||||
@callback post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||
|
||||
@doc """
|
||||
Generate a feature vector for the given path.
|
||||
"""
|
||||
@callback features(Result.t(), Path.t()) :: Features.t()
|
||||
|
||||
@doc """
|
||||
Generate corner intensities for the given path.
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
defmodule PhilomenaMedia.Processors.Svg do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Remote
|
||||
|
@ -21,12 +22,14 @@ defmodule PhilomenaMedia.Processors.Svg do
|
|||
preview = preview(file)
|
||||
|
||||
{:ok, intensities} = Intensities.file(preview)
|
||||
{:ok, features} = Features.file(preview)
|
||||
|
||||
scaled = Enum.flat_map(versions, &scale(preview, &1))
|
||||
full = [{:copy, preview, "full.png"}]
|
||||
|
||||
[
|
||||
intensities: intensities,
|
||||
features: features,
|
||||
thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}]
|
||||
]
|
||||
end
|
||||
|
@ -34,6 +37,12 @@ defmodule PhilomenaMedia.Processors.Svg do
|
|||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||
def post_process(_analysis, _file), do: []
|
||||
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(_analysis, file) do
|
||||
{:ok, features} = Features.file(preview(file))
|
||||
features
|
||||
end
|
||||
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
def intensities(_analysis, file) do
|
||||
{:ok, intensities} = Intensities.file(preview(file))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
defmodule PhilomenaMedia.Processors.Webm do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaMedia.Features
|
||||
alias PhilomenaMedia.Intensities
|
||||
alias PhilomenaMedia.Analyzers.Result
|
||||
alias PhilomenaMedia.Remote
|
||||
|
@ -34,6 +35,7 @@ defmodule PhilomenaMedia.Processors.Webm do
|
|||
mp4 = scale_mp4_only(decoder, stripped, dimensions, dimensions)
|
||||
|
||||
{:ok, intensities} = Intensities.file(preview)
|
||||
{:ok, features} = Features.file(preview)
|
||||
|
||||
scaled = Enum.flat_map(versions, &scale(decoder, stripped, duration, dimensions, &1))
|
||||
mp4 = [{:copy, mp4, "full.mp4"}]
|
||||
|
@ -41,6 +43,7 @@ defmodule PhilomenaMedia.Processors.Webm do
|
|||
[
|
||||
replace_original: stripped,
|
||||
intensities: intensities,
|
||||
features: features,
|
||||
thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}]
|
||||
]
|
||||
end
|
||||
|
@ -48,6 +51,12 @@ defmodule PhilomenaMedia.Processors.Webm do
|
|||
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
|
||||
def post_process(_analysis, _file), do: []
|
||||
|
||||
@spec features(Result.t(), Path.t()) :: Features.t()
|
||||
def features(analysis, file) do
|
||||
{:ok, features} = Features.file(preview(analysis.duration, file))
|
||||
features
|
||||
end
|
||||
|
||||
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
|
||||
def intensities(analysis, file) do
|
||||
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
|
||||
|
|
14
priv/repo/migrations/20250109155442_create_image_vectors.exs
Normal file
14
priv/repo/migrations/20250109155442_create_image_vectors.exs
Normal file
|
@ -0,0 +1,14 @@
|
|||
defmodule Philomena.Repo.Migrations.CreateImageVectors do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
# NB: this is normalized, the float array is not divisible
|
||||
create table(:image_vectors) do
|
||||
add :image_id, references(:images, on_delete: :delete_all), null: false
|
||||
add :type, :string, null: false
|
||||
add :features, {:array, :float}, null: false
|
||||
end
|
||||
|
||||
create unique_index(:image_vectors, [:image_id, :type])
|
||||
end
|
||||
end
|
|
@ -2,12 +2,13 @@
|
|||
-- PostgreSQL database dump
|
||||
--
|
||||
|
||||
-- Dumped from database version 16.4
|
||||
-- Dumped by pg_dump version 16.6
|
||||
-- Dumped from database version 17.2
|
||||
-- Dumped by pg_dump version 17.2
|
||||
|
||||
SET statement_timeout = 0;
|
||||
SET lock_timeout = 0;
|
||||
SET idle_in_transaction_session_timeout = 0;
|
||||
SET transaction_timeout = 0;
|
||||
SET client_encoding = 'UTF8';
|
||||
SET standard_conforming_strings = on;
|
||||
SELECT pg_catalog.set_config('search_path', '', false);
|
||||
|
@ -959,6 +960,37 @@ CREATE TABLE public.image_taggings (
|
|||
);
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors; Type: TABLE; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE TABLE public.image_vectors (
|
||||
id bigint NOT NULL,
|
||||
image_id bigint NOT NULL,
|
||||
type character varying(255) NOT NULL,
|
||||
features double precision[] NOT NULL
|
||||
);
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors_id_seq; Type: SEQUENCE; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE SEQUENCE public.image_vectors_id_seq
|
||||
START WITH 1
|
||||
INCREMENT BY 1
|
||||
NO MINVALUE
|
||||
NO MAXVALUE
|
||||
CACHE 1;
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
ALTER SEQUENCE public.image_vectors_id_seq OWNED BY public.image_vectors.id;
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_votes; Type: TABLE; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -2365,6 +2397,13 @@ ALTER TABLE ONLY public.image_features ALTER COLUMN id SET DEFAULT nextval('publ
|
|||
ALTER TABLE ONLY public.image_intensities ALTER COLUMN id SET DEFAULT nextval('public.image_intensities_id_seq'::regclass);
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors id; Type: DEFAULT; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
ALTER TABLE ONLY public.image_vectors ALTER COLUMN id SET DEFAULT nextval('public.image_vectors_id_seq'::regclass);
|
||||
|
||||
|
||||
--
|
||||
-- Name: images id; Type: DEFAULT; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -2727,6 +2766,14 @@ ALTER TABLE ONLY public.image_intensities
|
|||
ADD CONSTRAINT image_intensities_pkey PRIMARY KEY (id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors image_vectors_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
ALTER TABLE ONLY public.image_vectors
|
||||
ADD CONSTRAINT image_vectors_pkey PRIMARY KEY (id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: images images_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -3192,6 +3239,13 @@ CREATE UNIQUE INDEX image_tag_locks_image_id_tag_id_index ON public.image_tag_lo
|
|||
CREATE INDEX image_tag_locks_tag_id_index ON public.image_tag_locks USING btree (tag_id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors_image_id_type_index; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE UNIQUE INDEX image_vectors_image_id_type_index ON public.image_vectors USING btree (image_id, type);
|
||||
|
||||
|
||||
--
|
||||
-- Name: images_hidden_from_users_approved_index; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -5381,6 +5435,14 @@ ALTER TABLE ONLY public.image_tag_locks
|
|||
ADD CONSTRAINT image_tag_locks_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE;
|
||||
|
||||
|
||||
--
|
||||
-- Name: image_vectors image_vectors_image_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
ALTER TABLE ONLY public.image_vectors
|
||||
ADD CONSTRAINT image_vectors_image_id_fkey FOREIGN KEY (image_id) REFERENCES public.images(id) ON DELETE CASCADE;
|
||||
|
||||
|
||||
--
|
||||
-- Name: moderation_logs moderation_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -5448,3 +5510,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20220321173359);
|
|||
INSERT INTO public."schema_migrations" (version) VALUES (20240723122759);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20240728191353);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20241216165826);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20250109155442);
|
||||
|
|
Loading…
Reference in a new issue