Add feature-based reverse search interface

This commit is contained in:
Liam 2025-01-09 17:56:28 -05:00
parent 12d3809d37
commit 0ff502e638
5 changed files with 113 additions and 27 deletions

View file

@ -9,6 +9,8 @@ defmodule Philomena.DuplicateReports do
alias Ecto.Multi
alias Philomena.Repo
alias PhilomenaMedia.Features
alias PhilomenaQuery.Search
alias Philomena.DuplicateReports.DuplicateReport
alias Philomena.DuplicateReports.SearchQuery
alias Philomena.DuplicateReports.Uploader
@ -20,7 +22,7 @@ defmodule Philomena.DuplicateReports do
source = Repo.preload(source, :intensity)
{source.intensity, source.image_aspect_ratio}
|> find_duplicates(dist: 0.2)
|> find_duplicates_by_intensities(dist: 0.2)
|> where([i, _it], i.id != ^source.id)
|> Repo.all()
|> Enum.map(fn target ->
@ -30,7 +32,77 @@ defmodule Philomena.DuplicateReports do
end)
end
def find_duplicates({intensities, aspect_ratio}, opts \\ []) do
def find_duplicates_by_features(features = %Features{}, filter, opts \\ []) do
min_score = Keyword.get(opts, :min_score, 0)
limit = Keyword.get(opts, :limit, 25)
# TODO: many issues with efficient filtering using k-NN plugin,
# use post_filter to work around for the time being
#
# https://github.com/opensearch-project/k-NN/issues/2222
# https://github.com/opensearch-project/k-NN/issues/2339
# https://github.com/opensearch-project/k-NN/issues/2347
query = %{
query: %{
nested: %{
path: "vectors",
query: %{
knn: %{
"vectors.f": %{
vector: features.features,
k: 100
}
}
}
}
},
post_filter: filter,
min_score: min_score
}
images =
Image
|> Search.search_definition(query, %{page_size: limit})
|> Search.search_records(preload(Image, [:user, :sources, tags: :aliases]))
images
|> Map.put(:total_entries, min(images.total_entries, limit))
|> Map.put(:total_pages, min(images.total_pages, 1))
end
@doc """
Executes the reverse image search query from parameters.
## Examples
iex> execute_search_query_by_features(%{"image" => ...})
{:ok, [%Image{...}, ....]}
iex> execute_search_query_by_features(%{"image" => ...})
{:error, %Ecto.Changeset{}}
"""
def execute_search_query_by_features(filter, attrs \\ %{}) do
%SearchQuery{}
|> SearchQuery.changeset(attrs)
|> Uploader.analyze_upload(attrs)
|> Ecto.Changeset.apply_action(:create)
|> case do
{:ok, search_query} ->
images =
search_query
|> generate_features()
|> find_duplicates_by_features(filter, limit: search_query.limit)
{:ok, images}
error ->
error
end
end
def find_duplicates_by_intensities({intensities, aspect_ratio}, opts \\ []) do
aspect_dist = Keyword.get(opts, :aspect_dist, 0.05)
limit = Keyword.get(opts, :limit, 10)
dist = Keyword.get(opts, :dist, 0.25)
@ -71,7 +143,7 @@ defmodule Philomena.DuplicateReports do
{:error, %Ecto.Changeset{}}
"""
def execute_search_query(attrs \\ %{}) do
def execute_search_query_by_intensities(attrs \\ %{}) do
%SearchQuery{}
|> SearchQuery.changeset(attrs)
|> Uploader.analyze_upload(attrs)
@ -85,7 +157,7 @@ defmodule Philomena.DuplicateReports do
images =
{intensities, aspect}
|> find_duplicates(dist: dist, aspect_dist: dist, limit: limit)
|> find_duplicates_by_intensities(dist: dist, aspect_dist: dist, limit: limit)
|> preload([:user, :intensity, [:sources, tags: :aliases]])
|> Repo.paginate(page_size: 50)
@ -103,6 +175,13 @@ defmodule Philomena.DuplicateReports do
PhilomenaMedia.Processors.intensities(analysis, file)
end
defp generate_features(search_query) do
analysis = SearchQuery.to_analysis(search_query)
file = search_query.uploaded_image
PhilomenaMedia.Processors.features(analysis, file)
end
@doc """
Returns an `%Ecto.Changeset{}` for tracking search query changes.

View file

@ -1,6 +1,7 @@
defmodule PhilomenaWeb.Api.Json.Search.ReverseController do
use PhilomenaWeb, :controller
alias PhilomenaWeb.ImageLoader
alias Philomena.DuplicateReports
alias Philomena.Interactions
@ -9,12 +10,12 @@ defmodule PhilomenaWeb.Api.Json.Search.ReverseController do
def create(conn, %{"image" => image_params}) do
user = conn.assigns.current_user
image_params = Map.put(image_params, "limit", conn.params["limit"])
{images, total} =
image_params
|> Map.put("distance", conn.params["distance"])
|> Map.put("limit", conn.params["limit"])
|> DuplicateReports.execute_search_query()
conn
|> ImageLoader.reverse_filter()
|> DuplicateReports.execute_search_query_by_features(image_params)
|> case do
{:ok, images} ->
{images, images.total_entries}

View file

@ -1,6 +1,7 @@
defmodule PhilomenaWeb.Search.ReverseController do
use PhilomenaWeb, :controller
alias PhilomenaWeb.ImageLoader
alias Philomena.DuplicateReports.SearchQuery
alias Philomena.DuplicateReports
alias Philomena.Interactions
@ -14,7 +15,10 @@ defmodule PhilomenaWeb.Search.ReverseController do
def create(conn, %{"image" => image_params})
when is_map(image_params) and image_params != %{} do
case DuplicateReports.execute_search_query(image_params) do
conn
|> ImageLoader.reverse_filter()
|> DuplicateReports.execute_search_query_by_features(image_params)
|> case do
{:ok, images} ->
changeset = DuplicateReports.change_search_query(%SearchQuery{})
interactions = Interactions.user_interactions(images, conn.assigns.current_user)

View file

@ -29,10 +29,6 @@ defmodule PhilomenaWeb.ImageLoader do
|> load_tags()
|> render_bodies(conn)
user = conn.assigns.current_user
filter = conn.assigns.compiled_filter
filters = create_filters(conn, user, filter)
%{query: query, sorts: sort} = sorts.(body)
definition =
@ -42,7 +38,7 @@ defmodule PhilomenaWeb.ImageLoader do
query: %{
bool: %{
must: query,
must_not: filters
must_not: filters(conn)
}
},
sort: sort
@ -53,6 +49,21 @@ defmodule PhilomenaWeb.ImageLoader do
{definition, tags}
end
def reverse_filter(conn) do
%{
bool: %{
must_not: filters(conn)
}
}
end
defp filters(conn) do
user = conn.assigns.current_user
filter = conn.assigns.compiled_filter
create_filters(conn, user, filter)
end
defp create_filters(conn, user, filter) do
show_hidden? = Canada.Can.can?(user, :hide, %Image{})
del = conn.params["del"]

View file

@ -3,11 +3,9 @@ h1 Reverse Search
= form_for @changeset, ~p"/search/reverse", [multipart: true, as: :image], fn f ->
.walloftext
p
' Basic image similarity search. Finds uploaded images similar to the one
' provided based on simple intensities and uses the median frame of
' animations; very low contrast images (such as sketches) will produce
' poor results and, regardless of contrast, results may include seemingly
' random images that look very different.
' Advanced image similarity search. Finds uploaded images similar to the one
' provided based on perceptual features and uses the median frame of
' animations.
.image-other
#js-image-upload-previews
@ -26,14 +24,7 @@ h1 Reverse Search
.field-error-js.hidden.js-scraper
h4 Optional settings
.field
= label f, :distance, "Match distance (suggested values: between 0.2 and 0.5)"
br
= number_input f, :distance, min: 0, max: 1, step: 0.01, class: "input"
= error_tag f, :distance
= hidden_input f, :limit, value: @conn.assigns.image_pagination.page_size
= error_tag f, :limit
.field