From 0ff502e6382aae716bb22fed51ef3a5708a6d8f7 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 9 Jan 2025 17:56:28 -0500 Subject: [PATCH] Add feature-based reverse search interface --- lib/philomena/duplicate_reports.ex | 87 ++++++++++++++++++- .../api/json/search/reverse_controller.ex | 9 +- .../controllers/search/reverse_controller.ex | 6 +- lib/philomena_web/image_loader.ex | 21 +++-- .../templates/search/reverse/index.html.slime | 17 +--- 5 files changed, 113 insertions(+), 27 deletions(-) diff --git a/lib/philomena/duplicate_reports.ex b/lib/philomena/duplicate_reports.ex index a9cad67b..05aa54c3 100644 --- a/lib/philomena/duplicate_reports.ex +++ b/lib/philomena/duplicate_reports.ex @@ -9,6 +9,8 @@ defmodule Philomena.DuplicateReports do alias Ecto.Multi alias Philomena.Repo + alias PhilomenaMedia.Features + alias PhilomenaQuery.Search alias Philomena.DuplicateReports.DuplicateReport alias Philomena.DuplicateReports.SearchQuery alias Philomena.DuplicateReports.Uploader @@ -20,7 +22,7 @@ defmodule Philomena.DuplicateReports do source = Repo.preload(source, :intensity) {source.intensity, source.image_aspect_ratio} - |> find_duplicates(dist: 0.2) + |> find_duplicates_by_intensities(dist: 0.2) |> where([i, _it], i.id != ^source.id) |> Repo.all() |> Enum.map(fn target -> @@ -30,7 +32,77 @@ defmodule Philomena.DuplicateReports do end) end - def find_duplicates({intensities, aspect_ratio}, opts \\ []) do + def find_duplicates_by_features(features = %Features{}, filter, opts \\ []) do + min_score = Keyword.get(opts, :min_score, 0) + limit = Keyword.get(opts, :limit, 25) + + # TODO: many issues with efficient filtering using k-NN plugin, + # use post_filter to work around for the time being + # + # https://github.com/opensearch-project/k-NN/issues/2222 + # https://github.com/opensearch-project/k-NN/issues/2339 + # https://github.com/opensearch-project/k-NN/issues/2347 + + query = %{ + query: %{ + nested: %{ + path: "vectors", + query: %{ + knn: %{ + "vectors.f": %{ + vector: features.features, + k: 100 + } + } + } + } + }, + post_filter: filter, + min_score: min_score + } + + images = + Image + |> Search.search_definition(query, %{page_size: limit}) + |> Search.search_records(preload(Image, [:user, :sources, tags: :aliases])) + + images + |> Map.put(:total_entries, min(images.total_entries, limit)) + |> Map.put(:total_pages, min(images.total_pages, 1)) + end + + @doc """ + Executes the reverse image search query from parameters. + + ## Examples + + iex> execute_search_query_by_features(%{"image" => ...}) + {:ok, [%Image{...}, ....]} + + iex> execute_search_query_by_features(%{"image" => ...}) + {:error, %Ecto.Changeset{}} + + """ + def execute_search_query_by_features(filter, attrs \\ %{}) do + %SearchQuery{} + |> SearchQuery.changeset(attrs) + |> Uploader.analyze_upload(attrs) + |> Ecto.Changeset.apply_action(:create) + |> case do + {:ok, search_query} -> + images = + search_query + |> generate_features() + |> find_duplicates_by_features(filter, limit: search_query.limit) + + {:ok, images} + + error -> + error + end + end + + def find_duplicates_by_intensities({intensities, aspect_ratio}, opts \\ []) do aspect_dist = Keyword.get(opts, :aspect_dist, 0.05) limit = Keyword.get(opts, :limit, 10) dist = Keyword.get(opts, :dist, 0.25) @@ -71,7 +143,7 @@ defmodule Philomena.DuplicateReports do {:error, %Ecto.Changeset{}} """ - def execute_search_query(attrs \\ %{}) do + def execute_search_query_by_intensities(attrs \\ %{}) do %SearchQuery{} |> SearchQuery.changeset(attrs) |> Uploader.analyze_upload(attrs) @@ -85,7 +157,7 @@ defmodule Philomena.DuplicateReports do images = {intensities, aspect} - |> find_duplicates(dist: dist, aspect_dist: dist, limit: limit) + |> find_duplicates_by_intensities(dist: dist, aspect_dist: dist, limit: limit) |> preload([:user, :intensity, [:sources, tags: :aliases]]) |> Repo.paginate(page_size: 50) @@ -103,6 +175,13 @@ defmodule Philomena.DuplicateReports do PhilomenaMedia.Processors.intensities(analysis, file) end + defp generate_features(search_query) do + analysis = SearchQuery.to_analysis(search_query) + file = search_query.uploaded_image + + PhilomenaMedia.Processors.features(analysis, file) + end + @doc """ Returns an `%Ecto.Changeset{}` for tracking search query changes. diff --git a/lib/philomena_web/controllers/api/json/search/reverse_controller.ex b/lib/philomena_web/controllers/api/json/search/reverse_controller.ex index 4abe7560..1345d9f1 100644 --- a/lib/philomena_web/controllers/api/json/search/reverse_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/reverse_controller.ex @@ -1,6 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.ReverseController do use PhilomenaWeb, :controller + alias PhilomenaWeb.ImageLoader alias Philomena.DuplicateReports alias Philomena.Interactions @@ -9,12 +10,12 @@ defmodule PhilomenaWeb.Api.Json.Search.ReverseController do def create(conn, %{"image" => image_params}) do user = conn.assigns.current_user + image_params = Map.put(image_params, "limit", conn.params["limit"]) {images, total} = - image_params - |> Map.put("distance", conn.params["distance"]) - |> Map.put("limit", conn.params["limit"]) - |> DuplicateReports.execute_search_query() + conn + |> ImageLoader.reverse_filter() + |> DuplicateReports.execute_search_query_by_features(image_params) |> case do {:ok, images} -> {images, images.total_entries} diff --git a/lib/philomena_web/controllers/search/reverse_controller.ex b/lib/philomena_web/controllers/search/reverse_controller.ex index 0938642a..a3e803ab 100644 --- a/lib/philomena_web/controllers/search/reverse_controller.ex +++ b/lib/philomena_web/controllers/search/reverse_controller.ex @@ -1,6 +1,7 @@ defmodule PhilomenaWeb.Search.ReverseController do use PhilomenaWeb, :controller + alias PhilomenaWeb.ImageLoader alias Philomena.DuplicateReports.SearchQuery alias Philomena.DuplicateReports alias Philomena.Interactions @@ -14,7 +15,10 @@ defmodule PhilomenaWeb.Search.ReverseController do def create(conn, %{"image" => image_params}) when is_map(image_params) and image_params != %{} do - case DuplicateReports.execute_search_query(image_params) do + conn + |> ImageLoader.reverse_filter() + |> DuplicateReports.execute_search_query_by_features(image_params) + |> case do {:ok, images} -> changeset = DuplicateReports.change_search_query(%SearchQuery{}) interactions = Interactions.user_interactions(images, conn.assigns.current_user) diff --git a/lib/philomena_web/image_loader.ex b/lib/philomena_web/image_loader.ex index 81271e05..074a13be 100644 --- a/lib/philomena_web/image_loader.ex +++ b/lib/philomena_web/image_loader.ex @@ -29,10 +29,6 @@ defmodule PhilomenaWeb.ImageLoader do |> load_tags() |> render_bodies(conn) - user = conn.assigns.current_user - filter = conn.assigns.compiled_filter - filters = create_filters(conn, user, filter) - %{query: query, sorts: sort} = sorts.(body) definition = @@ -42,7 +38,7 @@ defmodule PhilomenaWeb.ImageLoader do query: %{ bool: %{ must: query, - must_not: filters + must_not: filters(conn) } }, sort: sort @@ -53,6 +49,21 @@ defmodule PhilomenaWeb.ImageLoader do {definition, tags} end + def reverse_filter(conn) do + %{ + bool: %{ + must_not: filters(conn) + } + } + end + + defp filters(conn) do + user = conn.assigns.current_user + filter = conn.assigns.compiled_filter + + create_filters(conn, user, filter) + end + defp create_filters(conn, user, filter) do show_hidden? = Canada.Can.can?(user, :hide, %Image{}) del = conn.params["del"] diff --git a/lib/philomena_web/templates/search/reverse/index.html.slime b/lib/philomena_web/templates/search/reverse/index.html.slime index 7e714600..6390fdce 100644 --- a/lib/philomena_web/templates/search/reverse/index.html.slime +++ b/lib/philomena_web/templates/search/reverse/index.html.slime @@ -3,11 +3,9 @@ h1 Reverse Search = form_for @changeset, ~p"/search/reverse", [multipart: true, as: :image], fn f -> .walloftext p - ' Basic image similarity search. Finds uploaded images similar to the one - ' provided based on simple intensities and uses the median frame of - ' animations; very low contrast images (such as sketches) will produce - ' poor results and, regardless of contrast, results may include seemingly - ' random images that look very different. + ' Advanced image similarity search. Finds uploaded images similar to the one + ' provided based on perceptual features and uses the median frame of + ' animations. .image-other #js-image-upload-previews @@ -26,14 +24,7 @@ h1 Reverse Search .field-error-js.hidden.js-scraper - h4 Optional settings - - .field - = label f, :distance, "Match distance (suggested values: between 0.2 and 0.5)" - br - = number_input f, :distance, min: 0, max: 1, step: 0.01, class: "input" - = error_tag f, :distance - + = hidden_input f, :limit, value: @conn.assigns.image_pagination.page_size = error_tag f, :limit .field