philomena/lib/philomena/duplicate_reports.ex

362 lines
10 KiB
Elixir
Raw Normal View History

2019-08-28 12:57:06 -04:00
defmodule Philomena.DuplicateReports do
@moduledoc """
The DuplicateReports context.
"""
import Philomena.DuplicateReports.Power
2019-08-28 12:57:06 -04:00
import Ecto.Query, warn: false
alias Ecto.Multi
2019-08-28 12:57:06 -04:00
alias Philomena.Repo
alias Philomena.DuplicateReports.DuplicateReport
alias Philomena.DuplicateReports.SearchQuery
alias Philomena.DuplicateReports.Uploader
2019-11-26 20:45:57 -05:00
alias Philomena.ImageIntensities.ImageIntensity
alias Philomena.Images.Image
2019-12-08 23:41:35 -05:00
alias Philomena.Images
2019-11-26 20:45:57 -05:00
2025-02-05 14:18:56 -05:00
@doc """
Generates automated duplicate reports for an image based on perceptual matching.
Takes a source image and generates duplicate reports for similar images based on
intensity and aspect ratio comparison.
## Examples
iex> generate_reports(source_image)
[{:ok, %DuplicateReport{}}, ...]
"""
2019-11-26 20:45:57 -05:00
def generate_reports(source) do
source = Repo.preload(source, :intensity)
{source.intensity, source.image_aspect_ratio}
|> find_duplicates(dist: 0.2)
2019-11-26 20:45:57 -05:00
|> where([i, _it], i.id != ^source.id)
|> Repo.all()
|> Enum.map(fn target ->
2020-01-10 23:20:19 -05:00
create_duplicate_report(source, target, %{}, %{
"reason" => "Automated Perceptual dedupe match"
})
2019-11-26 20:45:57 -05:00
end)
end
2019-08-28 12:57:06 -04:00
2025-02-05 14:18:56 -05:00
@doc """
Query for potential duplicate images based on intensity values and aspect ratio.
Takes a tuple of {intensities, aspect_ratio} and optional options to control the search:
- `:aspect_dist` - Maximum aspect ratio difference (default: 0.05)
- `:limit` - Maximum number of results (default: 10)
- `:dist` - Maximum intensity difference per channel (default: 0.25)
## Examples
iex> find_duplicates({%{nw: 0.5, ne: 0.5, sw: 0.5, se: 0.5}, 1.0})
#Ecto.Query<...>
iex> find_duplicates({intensities, ratio}, dist: 0.3, limit: 20)
#Ecto.Query<...>
"""
def find_duplicates({intensities, aspect_ratio}, opts \\ []) do
aspect_dist = Keyword.get(opts, :aspect_dist, 0.05)
limit = Keyword.get(opts, :limit, 10)
dist = Keyword.get(opts, :dist, 0.25)
# for each color channel
dist = dist * 3
2019-11-26 20:45:57 -05:00
from i in Image,
inner_join: it in ImageIntensity,
on: it.image_id == i.id,
where: it.nw >= ^(intensities.nw - dist) and it.nw <= ^(intensities.nw + dist),
where: it.ne >= ^(intensities.ne - dist) and it.ne <= ^(intensities.ne + dist),
where: it.sw >= ^(intensities.sw - dist) and it.sw <= ^(intensities.sw + dist),
where: it.se >= ^(intensities.se - dist) and it.se <= ^(intensities.se + dist),
2020-01-10 23:20:19 -05:00
where:
i.image_aspect_ratio >= ^(aspect_ratio - aspect_dist) and
i.image_aspect_ratio <= ^(aspect_ratio + aspect_dist),
order_by: [
asc:
power(it.nw - ^intensities.nw, 2) +
power(it.ne - ^intensities.ne, 2) +
power(it.sw - ^intensities.sw, 2) +
power(it.se - ^intensities.se, 2) +
power(i.image_aspect_ratio - ^aspect_ratio, 2)
],
limit: ^limit
2019-08-28 12:57:06 -04:00
end
@doc """
Executes the reverse image search query from parameters.
## Examples
iex> execute_search_query(%{"image" => ..., "distance" => "0.25"})
{:ok, [%Image{...}, ....]}
iex> execute_search_query(%{"image" => ..., "distance" => "asdf"})
{:error, %Ecto.Changeset{}}
"""
def execute_search_query(attrs \\ %{}) do
%SearchQuery{}
|> SearchQuery.changeset(attrs)
|> Uploader.analyze_upload(attrs)
|> Ecto.Changeset.apply_action(:create)
|> case do
{:ok, search_query} ->
intensities = generate_intensities(search_query)
aspect = search_query.image_aspect_ratio
limit = search_query.limit
dist = search_query.distance
images =
{intensities, aspect}
|> find_duplicates(dist: dist, aspect_dist: dist, limit: limit)
|> preload([:user, :intensity, [:sources, tags: :aliases]])
|> Repo.paginate(page_size: 50)
{:ok, images}
error ->
error
end
end
defp generate_intensities(search_query) do
analysis = SearchQuery.to_analysis(search_query)
file = search_query.uploaded_image
PhilomenaMedia.Processors.intensities(analysis, file)
end
@doc """
Returns an `%Ecto.Changeset{}` for tracking search query changes.
## Examples
iex> change_search_query(search_query)
%Ecto.Changeset{source: %SearchQuery{}}
"""
def change_search_query(%SearchQuery{} = search_query) do
SearchQuery.changeset(search_query)
end
2019-08-28 12:57:06 -04:00
@doc """
Gets a single duplicate_report.
Raises `Ecto.NoResultsError` if the Duplicate report does not exist.
## Examples
iex> get_duplicate_report!(123)
%DuplicateReport{}
iex> get_duplicate_report!(456)
** (Ecto.NoResultsError)
"""
def get_duplicate_report!(id), do: Repo.get!(DuplicateReport, id)
@doc """
Creates a duplicate_report.
## Examples
iex> create_duplicate_report(%{field: value})
{:ok, %DuplicateReport{}}
iex> create_duplicate_report(%{field: bad_value})
{:error, %Ecto.Changeset{}}
"""
2019-11-26 20:45:57 -05:00
def create_duplicate_report(source, target, attribution, attrs \\ %{}) do
%DuplicateReport{image_id: source.id, duplicate_of_image_id: target.id}
|> DuplicateReport.creation_changeset(attrs, attribution)
2019-08-28 12:57:06 -04:00
|> Repo.insert()
end
2025-02-05 14:18:56 -05:00
@doc """
Accepts a duplicate report and merges the duplicate image into the target image.
Takes an optional Ecto.Multi, the duplicate report to accept, and the user accepting the report.
Handles rejecting any other duplicate reports between the same images and merges the images.
## Examples
iex> accept_duplicate_report(nil, duplicate_report, user)
{:ok, %{duplicate_report: %DuplicateReport{}, ...}}
iex> accept_duplicate_report(existing_multi, duplicate_report, user)
%Ecto.Multi{}
"""
def accept_duplicate_report(multi \\ nil, %DuplicateReport{} = duplicate_report, user) do
duplicate_report = Repo.preload(duplicate_report, [:image, :duplicate_of_image])
2019-08-28 12:57:06 -04:00
other_duplicate_reports =
DuplicateReport
|> where(
[dr],
(dr.image_id == ^duplicate_report.image_id and
dr.duplicate_of_image_id == ^duplicate_report.duplicate_of_image_id) or
(dr.image_id == ^duplicate_report.duplicate_of_image_id and
dr.duplicate_of_image_id == ^duplicate_report.image_id)
)
|> where([dr], dr.id != ^duplicate_report.id)
|> update(set: [state: "rejected"])
changeset = DuplicateReport.accept_changeset(duplicate_report, user)
2019-08-28 12:57:06 -04:00
multi = multi || Multi.new()
2019-08-28 12:57:06 -04:00
multi
|> Multi.update(:duplicate_report, changeset)
|> Multi.update_all(:other_reports, other_duplicate_reports, [])
2020-09-07 14:50:34 -04:00
|> Images.merge_image(duplicate_report.image, duplicate_report.duplicate_of_image, user)
2019-12-08 23:41:35 -05:00
end
2019-08-28 12:57:06 -04:00
2025-02-05 14:18:56 -05:00
@doc """
Accepts a duplicate report in reverse, making the target image the duplicate instead.
Creates a new duplicate report with reversed image relationship if one doesn't exist,
rejects the original report, and accepts the reversed report.
## Examples
iex> accept_reverse_duplicate_report(duplicate_report, user)
{:ok, %{duplicate_report: %DuplicateReport{}, ...}}
"""
2019-12-08 23:41:35 -05:00
def accept_reverse_duplicate_report(%DuplicateReport{} = duplicate_report, user) do
2019-12-17 12:13:05 -05:00
new_report =
DuplicateReport
|> where(duplicate_of_image_id: ^duplicate_report.image_id)
2021-05-01 14:31:57 -04:00
|> where(image_id: ^duplicate_report.duplicate_of_image_id)
2019-12-17 12:13:05 -05:00
|> limit(1)
|> Repo.one()
2019-12-08 23:41:35 -05:00
new_report =
if new_report do
new_report
else
%DuplicateReport{
image_id: duplicate_report.duplicate_of_image_id,
duplicate_of_image_id: duplicate_report.image_id,
reason: Enum.join([duplicate_report.reason, "(Reverse accepted)"], "\n"),
user_id: user.id
}
|> DuplicateReport.changeset(%{})
|> Repo.insert!()
2019-12-08 23:41:35 -05:00
end
Multi.new()
|> Multi.run(:reject_duplicate_report, fn _, %{} ->
reject_duplicate_report(duplicate_report, user)
end)
|> accept_duplicate_report(new_report, user)
2019-12-08 23:41:35 -05:00
end
2025-02-05 14:18:56 -05:00
@doc """
Claims a duplicate report for review by a user.
## Examples
iex> claim_duplicate_report(duplicate_report, user)
{:ok, %DuplicateReport{}}
"""
2019-12-08 23:41:35 -05:00
def claim_duplicate_report(%DuplicateReport{} = duplicate_report, user) do
duplicate_report
|> DuplicateReport.claim_changeset(user)
|> Repo.update()
end
2025-02-05 14:18:56 -05:00
@doc """
Removes a user's claim on a duplicate report.
## Examples
iex> unclaim_duplicate_report(duplicate_report)
{:ok, %DuplicateReport{}}
"""
2019-12-08 23:41:35 -05:00
def unclaim_duplicate_report(%DuplicateReport{} = duplicate_report) do
duplicate_report
|> DuplicateReport.unclaim_changeset()
|> Repo.update()
end
2025-02-05 14:18:56 -05:00
@doc """
Rejects a duplicate report.
Updates the duplicate report's state to rejected and records the user who rejected it.
## Examples
iex> reject_duplicate_report(duplicate_report, user)
{:ok, %DuplicateReport{}}
"""
2019-12-08 23:41:35 -05:00
def reject_duplicate_report(%DuplicateReport{} = duplicate_report, user) do
2019-08-28 12:57:06 -04:00
duplicate_report
2019-12-08 23:41:35 -05:00
|> DuplicateReport.reject_changeset(user)
2019-08-28 12:57:06 -04:00
|> Repo.update()
end
@doc """
Deletes a DuplicateReport.
## Examples
iex> delete_duplicate_report(duplicate_report)
{:ok, %DuplicateReport{}}
iex> delete_duplicate_report(duplicate_report)
{:error, %Ecto.Changeset{}}
"""
def delete_duplicate_report(%DuplicateReport{} = duplicate_report) do
Repo.delete(duplicate_report)
end
@doc """
Returns an `%Ecto.Changeset{}` for tracking duplicate_report changes.
## Examples
iex> change_duplicate_report(duplicate_report)
%Ecto.Changeset{source: %DuplicateReport{}}
"""
def change_duplicate_report(%DuplicateReport{} = duplicate_report) do
DuplicateReport.changeset(duplicate_report, %{})
end
2019-12-04 18:15:54 -05:00
2025-02-05 14:18:56 -05:00
@doc """
Counts the number of duplicate reports in "open" state,
if the user has permission to view them.
## Examples
iex> count_duplicate_reports(admin)
42
iex> count_duplicate_reports(user)
nil
"""
2019-12-05 14:31:29 -05:00
def count_duplicate_reports(user) do
if Canada.Can.can?(user, :index, DuplicateReport) do
2019-12-05 14:31:29 -05:00
DuplicateReport
|> where(state: "open")
|> Repo.aggregate(:count, :id)
else
nil
end
2019-12-04 18:15:54 -05:00
end
2019-08-28 12:57:06 -04:00
end