diff --git a/lib/philomena/image_vectors.ex b/lib/philomena/image_vectors.ex new file mode 100644 index 00000000..85268440 --- /dev/null +++ b/lib/philomena/image_vectors.ex @@ -0,0 +1,91 @@ +defmodule Philomena.ImageVectors do + @moduledoc """ + The ImageVectors context. + """ + + import Ecto.Query, warn: false + alias Philomena.Repo + + alias Philomena.ImageVectors.ImageVector + + @doc """ + Gets a single image_vector. + + Raises `Ecto.NoResultsError` if the Image vector does not exist. + + ## Examples + + iex> get_image_vector!(123) + %ImageVector{} + + iex> get_image_vector!(456) + ** (Ecto.NoResultsError) + + """ + def get_image_vector!(id), do: Repo.get!(ImageVector, id) + + @doc """ + Creates a image_vector. + + ## Examples + + iex> create_image_vector(%{field: value}) + {:ok, %ImageVector{}} + + iex> create_image_vector(%{field: bad_value}) + {:error, %Ecto.Changeset{}} + + """ + def create_image_vector(image, attrs \\ %PhilomenaMedia.Features{}) do + %ImageVector{image_id: image.id} + |> ImageVector.changeset(Map.from_struct(attrs)) + |> Repo.insert() + end + + @doc """ + Updates a image_vector. + + ## Examples + + iex> update_image_vector(image_vector, %{field: new_value}) + {:ok, %ImageVector{}} + + iex> update_image_vector(image_vector, %{field: bad_value}) + {:error, %Ecto.Changeset{}} + + """ + def update_image_vector(%ImageVector{} = image_vector, attrs) do + image_vector + |> ImageVector.changeset(attrs) + |> Repo.update() + end + + @doc """ + Deletes a image_vector. + + ## Examples + + iex> delete_image_vector(image_vector) + {:ok, %ImageVector{}} + + iex> delete_image_vector(image_vector) + {:error, %Ecto.Changeset{}} + + """ + def delete_image_vector(%ImageVector{} = image_vector) do + Repo.delete(image_vector) + end + + @doc """ + Returns an `%Ecto.Changeset{}` for tracking image_vector changes. + + ## Examples + + iex> change_image_vector(image_vector) + %Ecto.Changeset{data: %ImageVector{}} + + """ + def change_image_vector(%ImageVector{} = image_vector, attrs \\ %{}) do + ImageVector.changeset(image_vector, attrs) + end +end diff --git a/lib/philomena/image_vectors/batch_processor.ex b/lib/philomena/image_vectors/batch_processor.ex new file mode 100644 index 00000000..3fefc318 --- /dev/null +++ b/lib/philomena/image_vectors/batch_processor.ex @@ -0,0 +1,88 @@ +defmodule Philomena.ImageVectors.BatchProcessor do + @moduledoc """ + Batch processing interface for Philomena. See the module documentation + in `m:Philomena.ImageVectors.Importer` for more information about how to + use the functions in this module during maintenance. + """ + + alias Philomena.Images + alias Philomena.Images.Image + alias Philomena.Images.Thumbnailer + alias Philomena.ImageVectors.ImageVector + alias Philomena.Maintenance + alias Philomena.Repo + + alias PhilomenaMedia.Analyzers + alias PhilomenaMedia.Processors + alias PhilomenaQuery.Batch + alias PhilomenaQuery.Search + + alias Philomena.Repo + import Ecto.Query + + @spec all_missing(String.t(), Keyword.t()) :: :ok + def all_missing(type \\ "full", opts \\ []) do + Image + |> from(as: :image) + |> where(not exists(where(ImageVector, [iv], iv.image_id == parent_as(:image).id))) + |> by_image_query(type, opts) + end + + @spec by_image_query(Ecto.Query.t(), String.t(), Keyword.t()) :: :ok + defp by_image_query(query, type, opts) do + max_concurrency = Keyword.get(opts, :max_concurrency, 4) + min = Repo.one(limit(order_by(query, asc: :id), 1)).id + max = Repo.one(limit(order_by(query, desc: :id), 1)).id + + query + |> Batch.query_batches(opts) + |> Task.async_stream( + fn query -> process_query(query, type, opts) end, + timeout: :infinity, + max_concurrency: max_concurrency + ) + |> Maintenance.log_progress("BatchProcessor/#{type}", min, max) + end + + @spec process_query(Ecto.Query.t(), String.t(), Keyword.t()) :: + Enumerable.t({:ok, integer()}) + defp process_query(query, type, batch_opts) do + images = Repo.all(query) + last_id = Enum.max_by(images, & &1.id).id + + values = + Enum.flat_map(images, fn image -> + try do + [process_image(image, type)] + rescue + ex -> + IO.puts("While processing #{image.id}: #{inspect(ex)}") + IO.puts(Exception.format_stacktrace(__STACKTRACE__)) + [] + end + end) + + {_count, nil} = Repo.insert_all(ImageVector, values, on_conflict: :nothing) + + :ok = + query + |> preload(^Images.indexing_preloads()) + |> Search.reindex(Image, batch_opts) + + last_id + end + + @spec process_image(%Image{}, String.t()) :: map() + defp process_image(image = %Image{}, type) do + file = Thumbnailer.download_image_file(image) + + {:ok, analysis} = Analyzers.analyze_path(file) + features = Processors.features(analysis, file) + + %{ + image_id: image.id, + type: type, + features: features.features + } + end +end diff --git a/lib/philomena/image_vectors/image_vector.ex b/lib/philomena/image_vectors/image_vector.ex new file mode 100644 index 00000000..123f7015 --- /dev/null +++ b/lib/philomena/image_vectors/image_vector.ex @@ -0,0 +1,19 @@ +defmodule Philomena.ImageVectors.ImageVector do + use Ecto.Schema + import Ecto.Changeset + + alias Philomena.Images.Image + + schema "image_vectors" do + belongs_to :image, Image + field :type, :string + field :features, {:array, :float} + end + + @doc false + def changeset(image_vector, attrs) do + image_vector + |> cast(attrs, [:type, :features]) + |> validate_required([:type, :features]) + end +end diff --git a/lib/philomena/image_vectors/importer.ex b/lib/philomena/image_vectors/importer.ex new file mode 100644 index 00000000..3715feb6 --- /dev/null +++ b/lib/philomena/image_vectors/importer.ex @@ -0,0 +1,86 @@ +defmodule Philomena.ImageVectors.Importer do + @moduledoc """ + Import logic for binary files produced by the export function of + https://github.com/philomena-dev/philomena-ris-inference-toolkit. + + Run the following commands in a long-running terminal, like screen or tmux. + The workflow for using the importer is as follows: + + 1. Use the batch inference toolkit to get the `features.bin`. + 2. Run `philomena eval 'Philomena.ImageVectors.Importer.import_from("/path/to/features.bin")'`. + 3. Backfill the remaining images: + `philomena eval 'Philomena.ImageVectors.BatchProcessor.all_missing("full", batch_size: 32)'` + 4. Downtime, delete and recreate the images index: + `philomena eval 'Philomena.SearchIndexer.recreate_reindex_schema_destructive!(Philomena.Images.Image)'`. + """ + + alias Philomena.ImageVectors.ImageVector + alias Philomena.Maintenance + alias Philomena.Repo + + # 4 bytes unsigned id + 768 floats per feature vector * 4 bytes per float + @row_size 4 + 768 * 4 + + @typedoc "A single feature row." + @type row :: %{ + image_id: integer(), + type: String.t(), + features: [float()] + } + + @spec import_from(Path.t()) :: :ok + def import_from(batch_inference_file, type \\ "full", max_concurrency \\ 4) do + {min, max} = get_min_and_max_id(batch_inference_file, type) + + batch_inference_file + |> File.stream!(@row_size) + |> Stream.chunk_every(1024) + |> Task.async_stream( + &process_chunk(&1, type), + timeout: :infinity, + max_concurrency: max_concurrency + ) + |> Maintenance.log_progress("Importer/#{type}", min, max) + end + + @spec process_chunk([binary()], String.t()) :: :ok + defp process_chunk(chunk, type) do + data = Enum.map(chunk, &unpack(&1, type)) + last_id = Enum.max_by(data, & &1.image_id).image_id + + {_count, nil} = Repo.insert_all(ImageVector, data, on_conflict: :nothing) + + last_id + end + + @spec unpack(binary(), String.t()) :: row() + defp unpack(row, type) do + <> = row + features = for <>, do: v + + %{ + image_id: image_id, + type: type, + features: features + } + end + + @spec get_min_and_max_id(Path.t(), String.t()) :: {integer(), integer()} + defp get_min_and_max_id(path, type) do + stat = File.stat!(path) + last_row = stat.size - @row_size + + %{image_id: min} = get_single_row(path, 0, type) + %{image_id: max} = get_single_row(path, last_row, type) + + {min, max} + end + + @spec get_single_row(Path.t(), integer(), String.t()) :: row() + defp get_single_row(path, offset, type) do + path + |> File.stream!(@row_size, read_offset: offset) + |> Enum.at(0) + |> unpack(type) + end +end diff --git a/lib/philomena/images.ex b/lib/philomena/images.ex index af0ef79f..9c615bde 100644 --- a/lib/philomena/images.ex +++ b/lib/philomena/images.ex @@ -858,6 +858,7 @@ defmodule Philomena.Images do [ :gallery_interactions, + :vectors, sources: sources_query, user: user_query, favers: user_query, diff --git a/lib/philomena/images/image.ex b/lib/philomena/images/image.ex index e02356dd..bb7bda37 100644 --- a/lib/philomena/images/image.ex +++ b/lib/philomena/images/image.ex @@ -7,6 +7,7 @@ defmodule Philomena.Images.Image do alias Philomena.ImageVotes.ImageVote alias Philomena.ImageFaves.ImageFave alias Philomena.ImageHides.ImageHide + alias Philomena.ImageVectors.ImageVector alias Philomena.Images.Source alias Philomena.Images.Subscription alias Philomena.Users.User @@ -35,6 +36,7 @@ defmodule Philomena.Images.Image do has_many :subscriptions, Subscription has_many :source_changes, SourceChange, on_replace: :delete has_many :tag_changes, TagChange + has_many :vectors, ImageVector has_many :upvoters, through: [:upvotes, :user] has_many :downvoters, through: [:downvotes, :user] has_many :favers, through: [:faves, :user] diff --git a/lib/philomena/images/search_index.ex b/lib/philomena/images/search_index.ex index 35241ccd..55f06a12 100644 --- a/lib/philomena/images/search_index.ex +++ b/lib/philomena/images/search_index.ex @@ -11,6 +11,7 @@ defmodule Philomena.Images.SearchIndex do %{ settings: %{ index: %{ + knn: true, number_of_shards: 5, max_result_window: 10_000_000 } @@ -89,6 +90,26 @@ defmodule Philomena.Images.SearchIndex do namespace: %{type: "keyword"} } }, + vectors: %{ + type: "nested", + properties: %{ + f: %{ + type: "knn_vector", + dimension: 768, + data_type: "float", + mode: "on_disk", + method: %{ + name: "hnsw", + engine: "faiss", + space_type: "l2", + parameters: %{ + ef_construction: 128, + m: 16 + } + } + } + } + }, approved: %{type: "boolean"}, error_tag_count: %{type: "integer"}, rating_tag_count: %{type: "integer"}, @@ -160,6 +181,7 @@ defmodule Philomena.Images.SearchIndex do }, gallery_id: Enum.map(image.gallery_interactions, & &1.gallery_id), gallery_position: Map.new(image.gallery_interactions, &{&1.gallery_id, &1.position}), + vectors: image.vectors |> Enum.map(&%{f: &1.features}), favourited_by_users: image.favers |> Enum.map(&String.downcase(&1.name)), hidden_by_users: image.hiders |> Enum.map(&String.downcase(&1.name)), upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)), diff --git a/lib/philomena/images/thumbnailer.ex b/lib/philomena/images/thumbnailer.ex index b8be742b..e463688d 100644 --- a/lib/philomena/images/thumbnailer.ex +++ b/lib/philomena/images/thumbnailer.ex @@ -12,6 +12,7 @@ defmodule Philomena.Images.Thumbnailer do alias Philomena.DuplicateReports alias Philomena.ImageIntensities alias Philomena.ImagePurgeWorker + alias Philomena.ImageVectors alias Philomena.Images.Image alias Philomena.Repo @@ -105,6 +106,9 @@ defmodule Philomena.Images.Thumbnailer do defp apply_change(image, {:intensities, intensities}), do: ImageIntensities.create_image_intensity(image, intensities) + defp apply_change(image, {:features, features}), + do: ImageVectors.create_image_vector(image, features) + defp apply_change(image, {:replace_original, new_file}) do full = "full.#{image.image_format}" upload_file(image, new_file, full) @@ -139,7 +143,7 @@ defmodule Philomena.Images.Thumbnailer do |> Repo.update!() end - defp download_image_file(image) do + def download_image_file(image) do tempfile = Briefly.create!(extname: ".#{image.image_format}") path = Path.join(image_thumb_prefix(image), "full.#{image.image_format}") diff --git a/lib/philomena_media/features.ex b/lib/philomena_media/features.ex new file mode 100644 index 00000000..bb75472c --- /dev/null +++ b/lib/philomena_media/features.ex @@ -0,0 +1,51 @@ +defmodule PhilomenaMedia.Features do + @moduledoc """ + Features are a set of 768 weighted classification outputs produced from a + vision transformer (ViT). The individual classifications are arbitrary and + not meaningful to analyze, but the vectors can be used to compare similarity + between images using the cosine similarity measurement. + + Since cosine similarity is not a metric, it is substituted for normalized L2 + distance by the feature extractor; every vector that it returns is normalized, + and traversing the k nearest neighbors in a vector space index will iterate + vectors in the same order as their cosine similarity. + """ + + alias PhilomenaMedia.Remote + + @type t :: %__MODULE__{ + features: [float()] + } + + defstruct [:features] + + @doc """ + Gets the features of the given image file. + + The image file must be in the PNG or JPEG format. + + > #### Info {: .info} + > + > Clients should prefer to use `PhilomenaMedia.Processors.features/2`, as it handles + > media files of any type supported by this library, not just PNG or JPEG. + + ## Examples + + iex> Features.file("image.png") + {:ok, %Features{features: [0.03156396001577377, -0.04559657722711563, ...]}} + + iex> Features.file("nonexistent.jpg") + :error + + """ + @spec file(Path.t()) :: {:ok, t()} | :error + def file(input) do + case Remote.get_features(input) do + {:ok, features} -> + {:ok, %__MODULE__{features: features}} + + _error -> + :error + end + end +end diff --git a/lib/philomena_media/processors.ex b/lib/philomena_media/processors.ex index b23ba005..492400be 100644 --- a/lib/philomena_media/processors.ex +++ b/lib/philomena_media/processors.ex @@ -58,6 +58,7 @@ defmodule PhilomenaMedia.Processors do """ alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm} alias PhilomenaMedia.Mime @@ -185,6 +186,25 @@ defmodule PhilomenaMedia.Processors do processor(analysis.mime_type).post_process(analysis, file) end + @doc """ + Takes an analyzer result and file path and runs the appropriate processor's `features/2`, + returning the feature vector. + + This allows for generating feature vectors for file types that are not directly supported by + `m:PhilomenaMedia.Features`, and should be the preferred function to call when feature vectors + are needed. + + ## Example + + iex> PhilomenaMedia.Processors.features(%Result{...}, "video.webm") + %Features{features: [0.03156396001577377, -0.04559657722711563, ...]} + + """ + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + processor(analysis.mime_type).features(analysis, file) + end + @doc """ Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`, returning the corner intensities. @@ -195,8 +215,8 @@ defmodule PhilomenaMedia.Processors do ## Example - iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm") - %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064} + iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm") + %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064} """ @spec intensities(Result.t(), Path.t()) :: Intensities.t() diff --git a/lib/philomena_media/processors/gif.ex b/lib/philomena_media/processors/gif.ex index 49756719..11391aec 100644 --- a/lib/philomena_media/processors/gif.ex +++ b/lib/philomena_media/processors/gif.ex @@ -1,6 +1,7 @@ defmodule PhilomenaMedia.Processors.Gif do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result alias PhilomenaMedia.Remote @@ -23,12 +24,14 @@ defmodule PhilomenaMedia.Processors.Gif do palette = palette(file) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(palette, file, &1)) videos = generate_videos(file) [ intensities: intensities, + features: features, thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}] ] end @@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Gif do [replace_original: optimize(file)] end + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + {:ok, features} = Features.file(preview(analysis.duration, file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) diff --git a/lib/philomena_media/processors/jpeg.ex b/lib/philomena_media/processors/jpeg.ex index 7cbec03d..776924c0 100644 --- a/lib/philomena_media/processors/jpeg.ex +++ b/lib/philomena_media/processors/jpeg.ex @@ -1,6 +1,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result alias PhilomenaMedia.Remote @@ -22,12 +23,14 @@ defmodule PhilomenaMedia.Processors.Jpeg do stripped = optimize(strip(file)) {:ok, intensities} = Intensities.file(stripped) + {:ok, features} = Features.file(stripped) scaled = Enum.flat_map(versions, &scale(stripped, &1)) [ replace_original: stripped, intensities: intensities, + features: features, thumbnails: scaled ] end @@ -35,6 +38,12 @@ defmodule PhilomenaMedia.Processors.Jpeg do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(file) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) diff --git a/lib/philomena_media/processors/png.ex b/lib/philomena_media/processors/png.ex index 27c71c14..79f50aa0 100644 --- a/lib/philomena_media/processors/png.ex +++ b/lib/philomena_media/processors/png.ex @@ -1,6 +1,7 @@ defmodule PhilomenaMedia.Processors.Png do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result alias PhilomenaMedia.Remote @@ -19,11 +20,13 @@ defmodule PhilomenaMedia.Processors.Png do animated? = analysis.animated? {:ok, intensities} = Intensities.file(file) + {:ok, features} = Features.file(file) scaled = Enum.flat_map(versions, &scale(file, animated?, &1)) [ intensities: intensities, + features: features, thumbnails: scaled ] end @@ -38,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Png do end end + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(file) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) diff --git a/lib/philomena_media/processors/processor.ex b/lib/philomena_media/processors/processor.ex index 8b9f568f..368d2d32 100644 --- a/lib/philomena_media/processors/processor.ex +++ b/lib/philomena_media/processors/processor.ex @@ -2,6 +2,7 @@ defmodule PhilomenaMedia.Processors.Processor do @moduledoc false alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Features alias PhilomenaMedia.Processors alias PhilomenaMedia.Intensities @@ -22,6 +23,11 @@ defmodule PhilomenaMedia.Processors.Processor do """ @callback post_process(Result.t(), Path.t()) :: Processors.edit_script() + @doc """ + Generate a feature vector for the given path. + """ + @callback features(Result.t(), Path.t()) :: Features.t() + @doc """ Generate corner intensities for the given path. """ diff --git a/lib/philomena_media/processors/svg.ex b/lib/philomena_media/processors/svg.ex index 0f9b6e6c..8a6140d9 100644 --- a/lib/philomena_media/processors/svg.ex +++ b/lib/philomena_media/processors/svg.ex @@ -1,6 +1,7 @@ defmodule PhilomenaMedia.Processors.Svg do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result alias PhilomenaMedia.Remote @@ -21,12 +22,14 @@ defmodule PhilomenaMedia.Processors.Svg do preview = preview(file) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(preview, &1)) full = [{:copy, preview, "full.png"}] [ intensities: intensities, + features: features, thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}] ] end @@ -34,6 +37,12 @@ defmodule PhilomenaMedia.Processors.Svg do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(preview(file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(preview(file)) diff --git a/lib/philomena_media/processors/webm.ex b/lib/philomena_media/processors/webm.ex index 852d5492..863a7dd6 100644 --- a/lib/philomena_media/processors/webm.ex +++ b/lib/philomena_media/processors/webm.ex @@ -1,6 +1,7 @@ defmodule PhilomenaMedia.Processors.Webm do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result alias PhilomenaMedia.Remote @@ -34,6 +35,7 @@ defmodule PhilomenaMedia.Processors.Webm do mp4 = scale_mp4_only(decoder, stripped, dimensions, dimensions) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(decoder, stripped, duration, dimensions, &1)) mp4 = [{:copy, mp4, "full.mp4"}] @@ -41,6 +43,7 @@ defmodule PhilomenaMedia.Processors.Webm do [ replace_original: stripped, intensities: intensities, + features: features, thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}] ] end @@ -48,6 +51,12 @@ defmodule PhilomenaMedia.Processors.Webm do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + {:ok, features} = Features.file(preview(analysis.duration, file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) diff --git a/priv/repo/migrations/20250109155442_create_image_vectors.exs b/priv/repo/migrations/20250109155442_create_image_vectors.exs new file mode 100644 index 00000000..251b86b3 --- /dev/null +++ b/priv/repo/migrations/20250109155442_create_image_vectors.exs @@ -0,0 +1,14 @@ +defmodule Philomena.Repo.Migrations.CreateImageVectors do + use Ecto.Migration + + def change do + # NB: this is normalized, the float array is not divisible + create table(:image_vectors) do + add :image_id, references(:images, on_delete: :delete_all), null: false + add :type, :string, null: false + add :features, {:array, :float}, null: false + end + + create unique_index(:image_vectors, [:image_id, :type]) + end +end diff --git a/priv/repo/structure.sql b/priv/repo/structure.sql index e449fbf4..9d3df616 100644 --- a/priv/repo/structure.sql +++ b/priv/repo/structure.sql @@ -2,12 +2,13 @@ -- PostgreSQL database dump -- --- Dumped from database version 16.4 --- Dumped by pg_dump version 16.6 +-- Dumped from database version 17.2 +-- Dumped by pg_dump version 17.2 SET statement_timeout = 0; SET lock_timeout = 0; SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; SET client_encoding = 'UTF8'; SET standard_conforming_strings = on; SELECT pg_catalog.set_config('search_path', '', false); @@ -959,6 +960,37 @@ CREATE TABLE public.image_taggings ( ); +-- +-- Name: image_vectors; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.image_vectors ( + id bigint NOT NULL, + image_id bigint NOT NULL, + type character varying(255) NOT NULL, + features double precision[] NOT NULL +); + + +-- +-- Name: image_vectors_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.image_vectors_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: image_vectors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.image_vectors_id_seq OWNED BY public.image_vectors.id; + + -- -- Name: image_votes; Type: TABLE; Schema: public; Owner: - -- @@ -2365,6 +2397,13 @@ ALTER TABLE ONLY public.image_features ALTER COLUMN id SET DEFAULT nextval('publ ALTER TABLE ONLY public.image_intensities ALTER COLUMN id SET DEFAULT nextval('public.image_intensities_id_seq'::regclass); +-- +-- Name: image_vectors id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors ALTER COLUMN id SET DEFAULT nextval('public.image_vectors_id_seq'::regclass); + + -- -- Name: images id; Type: DEFAULT; Schema: public; Owner: - -- @@ -2727,6 +2766,14 @@ ALTER TABLE ONLY public.image_intensities ADD CONSTRAINT image_intensities_pkey PRIMARY KEY (id); +-- +-- Name: image_vectors image_vectors_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors + ADD CONSTRAINT image_vectors_pkey PRIMARY KEY (id); + + -- -- Name: images images_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -3192,6 +3239,13 @@ CREATE UNIQUE INDEX image_tag_locks_image_id_tag_id_index ON public.image_tag_lo CREATE INDEX image_tag_locks_tag_id_index ON public.image_tag_locks USING btree (tag_id); +-- +-- Name: image_vectors_image_id_type_index; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX image_vectors_image_id_type_index ON public.image_vectors USING btree (image_id, type); + + -- -- Name: images_hidden_from_users_approved_index; Type: INDEX; Schema: public; Owner: - -- @@ -5381,6 +5435,14 @@ ALTER TABLE ONLY public.image_tag_locks ADD CONSTRAINT image_tag_locks_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE; +-- +-- Name: image_vectors image_vectors_image_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors + ADD CONSTRAINT image_vectors_image_id_fkey FOREIGN KEY (image_id) REFERENCES public.images(id) ON DELETE CASCADE; + + -- -- Name: moderation_logs moderation_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - -- @@ -5448,3 +5510,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20220321173359); INSERT INTO public."schema_migrations" (version) VALUES (20240723122759); INSERT INTO public."schema_migrations" (version) VALUES (20240728191353); INSERT INTO public."schema_migrations" (version) VALUES (20241216165826); +INSERT INTO public."schema_migrations" (version) VALUES (20250109155442);