mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 13:47:58 +01:00
Local autocomplete (#151)
This commit is contained in:
parent
715506352c
commit
dadc2f1585
10 changed files with 374 additions and 0 deletions
|
@ -2,6 +2,9 @@
|
||||||
* Autocomplete.
|
* Autocomplete.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { LocalAutocompleter } from 'utils/local-autocompleter';
|
||||||
|
import { handleError } from 'utils/requests';
|
||||||
|
|
||||||
const cache = {};
|
const cache = {};
|
||||||
let inputField, originalTerm;
|
let inputField, originalTerm;
|
||||||
|
|
||||||
|
@ -122,8 +125,23 @@ function getSuggestions(term) {
|
||||||
function listenAutocomplete() {
|
function listenAutocomplete() {
|
||||||
let timeout;
|
let timeout;
|
||||||
|
|
||||||
|
/** @type {LocalAutocompleter} */
|
||||||
|
let localAc = null;
|
||||||
|
let localFetched = false;
|
||||||
|
|
||||||
|
document.addEventListener('focusin', fetchLocalAutocomplete);
|
||||||
|
|
||||||
document.addEventListener('input', event => {
|
document.addEventListener('input', event => {
|
||||||
removeParent();
|
removeParent();
|
||||||
|
fetchLocalAutocomplete(event);
|
||||||
|
|
||||||
|
if (localAc !== null && 'ac' in event.target.dataset) {
|
||||||
|
inputField = event.target;
|
||||||
|
originalTerm = inputField.value;
|
||||||
|
|
||||||
|
const suggestions = localAc.topK(inputField.value, 5).map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
|
||||||
|
return showAutocomplete(suggestions, originalTerm, event.target);
|
||||||
|
}
|
||||||
|
|
||||||
window.clearTimeout(timeout);
|
window.clearTimeout(timeout);
|
||||||
// Use a timeout to delay requests until the user has stopped typing
|
// Use a timeout to delay requests until the user has stopped typing
|
||||||
|
@ -150,6 +168,16 @@ function listenAutocomplete() {
|
||||||
document.addEventListener('click', event => {
|
document.addEventListener('click', event => {
|
||||||
if (event.target && event.target !== inputField) removeParent();
|
if (event.target && event.target !== inputField) removeParent();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function fetchLocalAutocomplete(event) {
|
||||||
|
if (!localFetched && event.target.dataset && 'ac' in event.target.dataset) {
|
||||||
|
localFetched = true;
|
||||||
|
fetch('/autocomplete/compiled', { credentials: 'omit', cache: 'force-cache' })
|
||||||
|
.then(handleError)
|
||||||
|
.then(resp => resp.arrayBuffer())
|
||||||
|
.then(buf => localAc = new LocalAutocompleter(buf));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export { listenAutocomplete };
|
export { listenAutocomplete };
|
||||||
|
|
134
assets/js/utils/local-autocompleter.js
Normal file
134
assets/js/utils/local-autocompleter.js
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
//@ts-check
|
||||||
|
/*
|
||||||
|
* Client-side tag completion.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {object} Result
|
||||||
|
* @property {string} name
|
||||||
|
* @property {number} imageCount
|
||||||
|
* @property {number[]} associations
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See lib/philomena/autocomplete.ex for binary structure details.
|
||||||
|
*
|
||||||
|
* A binary blob is used to avoid the creation of large amounts of garbage on
|
||||||
|
* the JS heap and speed up the execution of the search.
|
||||||
|
*/
|
||||||
|
export class LocalAutocompleter {
|
||||||
|
/**
|
||||||
|
* Build a new local autocompleter.
|
||||||
|
*
|
||||||
|
* @param {ArrayBuffer} backingStore
|
||||||
|
*/
|
||||||
|
constructor(backingStore) {
|
||||||
|
/** @type {Uint8Array} */
|
||||||
|
this.data = new Uint8Array(backingStore);
|
||||||
|
/** @type {DataView} */
|
||||||
|
this.view = new DataView(backingStore);
|
||||||
|
/** @type {TextDecoder} */
|
||||||
|
this.decoder = new TextDecoder();
|
||||||
|
/** @type {number} */
|
||||||
|
this.numTags = this.view.getUint32(backingStore.byteLength - 4, true);
|
||||||
|
/** @type {number} */
|
||||||
|
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
|
||||||
|
/** @type {number} */
|
||||||
|
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
|
||||||
|
|
||||||
|
if (this.formatVersion !== 1) {
|
||||||
|
throw new Error('Incompatible autocomplete format version');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a tag's name and its associations given a byte location inside the file.
|
||||||
|
*
|
||||||
|
* @param {number} location
|
||||||
|
* @returns {[string, number[]]}
|
||||||
|
*/
|
||||||
|
getTagFromLocation(location) {
|
||||||
|
const nameLength = this.view.getUint8(location);
|
||||||
|
const assnLength = this.view.getUint8(location + 1 + nameLength);
|
||||||
|
|
||||||
|
/** @type {number[]} */
|
||||||
|
const associations = [];
|
||||||
|
const name = this.decoder.decode(this.data.slice(location + 1, location + nameLength + 1));
|
||||||
|
|
||||||
|
for (let i = 0; i < assnLength; i++) {
|
||||||
|
associations.push(this.view.getUint32(location + 1 + nameLength + i * 4, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
return [ name, associations ];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a Result object as the ith tag inside the file.
|
||||||
|
*
|
||||||
|
* @param {number} i
|
||||||
|
* @returns {Result}
|
||||||
|
*/
|
||||||
|
getResultAt(i) {
|
||||||
|
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||||
|
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
|
||||||
|
const [ name, associations ] = this.getTagFromLocation(nameLocation);
|
||||||
|
|
||||||
|
return { name, imageCount, associations };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the top k results by image count which match the given string prefix.
|
||||||
|
*
|
||||||
|
* @param {string} prefix
|
||||||
|
* @param {number} k
|
||||||
|
* @returns {Result[]}
|
||||||
|
*/
|
||||||
|
topK(prefix, k) {
|
||||||
|
/** @type {Result[]} */
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
/** @type {number[]} */
|
||||||
|
//@ts-expect-error No type for window.booru yet
|
||||||
|
const hiddenTags = window.booru.hiddenTagList;
|
||||||
|
|
||||||
|
if (prefix === '') {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Binary search to find last smaller prefix
|
||||||
|
let l = 0;
|
||||||
|
let r = this.numTags;
|
||||||
|
|
||||||
|
while (l < r - 1) {
|
||||||
|
const m = (l + (r - l) / 2) | 0;
|
||||||
|
const { name } = this.getResultAt(m);
|
||||||
|
|
||||||
|
if (name.slice(0, prefix.length) >= prefix) {
|
||||||
|
// too large, go left
|
||||||
|
r = m;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// too small, go right
|
||||||
|
l = m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan forward until no more matches occur
|
||||||
|
while (l < this.numTags - 1) {
|
||||||
|
const result = this.getResultAt(++l);
|
||||||
|
if (!result.name.startsWith(prefix)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add if no associations are filtered
|
||||||
|
if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort results by image count
|
||||||
|
results.sort((a, b) => b.imageCount - a.imageCount);
|
||||||
|
|
||||||
|
return results.slice(0, k);
|
||||||
|
}
|
||||||
|
}
|
|
@ -10,6 +10,7 @@ background() {
|
||||||
mix run -e 'Philomena.Release.verify_artist_links()'
|
mix run -e 'Philomena.Release.verify_artist_links()'
|
||||||
mix run -e 'Philomena.Release.update_stats()'
|
mix run -e 'Philomena.Release.update_stats()'
|
||||||
mix run -e 'Philomena.Release.clean_moderation_logs()'
|
mix run -e 'Philomena.Release.clean_moderation_logs()'
|
||||||
|
mix run -e 'Philomena.Release.generate_autocomplete()'
|
||||||
|
|
||||||
sleep 300
|
sleep 300
|
||||||
done
|
done
|
||||||
|
|
144
lib/philomena/autocomplete.ex
Normal file
144
lib/philomena/autocomplete.ex
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
defmodule Philomena.Autocomplete do
|
||||||
|
@moduledoc """
|
||||||
|
Pregenerated autocomplete files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import Ecto.Query, warn: false
|
||||||
|
alias Philomena.Repo
|
||||||
|
|
||||||
|
alias Philomena.Tags.Tag
|
||||||
|
alias Philomena.Images.Tagging
|
||||||
|
alias Philomena.Autocomplete.Autocomplete
|
||||||
|
|
||||||
|
@type tags_list() :: [{String.t(), number(), number()}]
|
||||||
|
@type assoc_map() :: %{String.t() => [number()]}
|
||||||
|
|
||||||
|
@spec get_autocomplete() :: Autocomplete.t() | nil
|
||||||
|
def get_autocomplete do
|
||||||
|
Autocomplete
|
||||||
|
|> order_by(desc: :created_at)
|
||||||
|
|> limit(1)
|
||||||
|
|> Repo.one()
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_autocomplete! do
|
||||||
|
tags = get_tags()
|
||||||
|
associations = get_associations(tags)
|
||||||
|
|
||||||
|
# Tags are already sorted, so just add them to the file directly
|
||||||
|
#
|
||||||
|
# struct tag {
|
||||||
|
# uint8_t key_length;
|
||||||
|
# uint8_t key[];
|
||||||
|
# uint8_t association_length;
|
||||||
|
# uint32_t associations[];
|
||||||
|
# };
|
||||||
|
#
|
||||||
|
# struct tag_reference {
|
||||||
|
# uint32_t tag_location;
|
||||||
|
# uint32_t num_uses;
|
||||||
|
# };
|
||||||
|
#
|
||||||
|
|
||||||
|
{ac_file, references} =
|
||||||
|
Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} ->
|
||||||
|
pos = byte_size(file)
|
||||||
|
assn = Map.get(associations, name, [])
|
||||||
|
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
|
||||||
|
|
||||||
|
{
|
||||||
|
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
|
||||||
|
<<references::binary, pos::32-little, images_count::32-little>>
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
ac_file = int32_align(ac_file)
|
||||||
|
reference_start = byte_size(ac_file)
|
||||||
|
|
||||||
|
# Finally add the reference start and number of tags in the footer
|
||||||
|
#
|
||||||
|
# struct autocomplete_file {
|
||||||
|
# struct tag tags[];
|
||||||
|
# struct tag_reference references[];
|
||||||
|
# uint32_t format_version;
|
||||||
|
# uint32_t reference_start;
|
||||||
|
# uint32_t num_tags;
|
||||||
|
# };
|
||||||
|
#
|
||||||
|
|
||||||
|
ac_file =
|
||||||
|
<<ac_file::binary, references::binary, 1::32-little, reference_start::32-little,
|
||||||
|
length(tags)::32-little>>
|
||||||
|
|
||||||
|
# Insert the autocomplete binary
|
||||||
|
new_ac =
|
||||||
|
%Autocomplete{}
|
||||||
|
|> Autocomplete.changeset(%{content: ac_file})
|
||||||
|
|> Repo.insert!()
|
||||||
|
|
||||||
|
# Remove anything older
|
||||||
|
Autocomplete
|
||||||
|
|> where([ac], ac.created_at < ^new_ac.created_at)
|
||||||
|
|> Repo.delete_all()
|
||||||
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Get the names of tags and their number of uses as a map.
|
||||||
|
# Sort is done in the application to avoid collation.
|
||||||
|
#
|
||||||
|
@spec get_tags() :: tags_list()
|
||||||
|
defp get_tags do
|
||||||
|
Tag
|
||||||
|
|> select([t], {t.name, t.images_count, t.id})
|
||||||
|
|> where([t], t.images_count > 0)
|
||||||
|
|> order_by(desc: :images_count)
|
||||||
|
|> limit(65_535)
|
||||||
|
|> Repo.all()
|
||||||
|
|> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end)
|
||||||
|
|> Enum.sort()
|
||||||
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Get up to eight associated tag ids for each returned tag.
|
||||||
|
#
|
||||||
|
@spec get_associations(tags_list()) :: assoc_map()
|
||||||
|
defp get_associations(tags) do
|
||||||
|
tags
|
||||||
|
|> Enum.map(fn {name, images_count, id} ->
|
||||||
|
# Randomly sample 100 images with this tag
|
||||||
|
image_sample =
|
||||||
|
Tagging
|
||||||
|
|> where(tag_id: ^id)
|
||||||
|
|> select([it], it.image_id)
|
||||||
|
|> order_by(asc: fragment("random()"))
|
||||||
|
|> limit(100)
|
||||||
|
|
||||||
|
# Select the tags from those images which have more uses than
|
||||||
|
# the current one being considered, and overlap more than 50%
|
||||||
|
assoc_ids =
|
||||||
|
Tagging
|
||||||
|
|> join(:inner, [it], _ in assoc(it, :tag))
|
||||||
|
|> where([_, t], t.images_count > ^images_count)
|
||||||
|
|> where([it, _], it.image_id in subquery(image_sample))
|
||||||
|
|> group_by([_, t], t.id)
|
||||||
|
|> order_by(desc: fragment("count(*)"))
|
||||||
|
|> having([_, t], fragment("(100 * count(*)::float / LEAST(?, 100)) > 50", ^images_count))
|
||||||
|
|> select([_, t], t.id)
|
||||||
|
|> limit(8)
|
||||||
|
|> Repo.all()
|
||||||
|
|
||||||
|
{name, assoc_ids}
|
||||||
|
end)
|
||||||
|
|> Map.new()
|
||||||
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Right-pad a binary to be a multiple of 4 bytes.
|
||||||
|
#
|
||||||
|
@spec int32_align(binary()) :: binary()
|
||||||
|
defp int32_align(bin) do
|
||||||
|
pad_bits = 8 * (4 - rem(byte_size(bin), 4))
|
||||||
|
|
||||||
|
<<bin::binary, 0::size(pad_bits)>>
|
||||||
|
end
|
||||||
|
end
|
17
lib/philomena/autocomplete/autocomplete.ex
Normal file
17
lib/philomena/autocomplete/autocomplete.ex
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
defmodule Philomena.Autocomplete.Autocomplete do
|
||||||
|
use Ecto.Schema
|
||||||
|
import Ecto.Changeset
|
||||||
|
|
||||||
|
@primary_key false
|
||||||
|
schema "autocomplete" do
|
||||||
|
field :content, :binary
|
||||||
|
timestamps(inserted_at: :created_at, updated_at: false, type: :utc_datetime)
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc false
|
||||||
|
def changeset(autocomplete, attrs) do
|
||||||
|
autocomplete
|
||||||
|
|> cast(attrs, [:content])
|
||||||
|
|> validate_required([:content])
|
||||||
|
end
|
||||||
|
end
|
|
@ -34,6 +34,11 @@ defmodule Philomena.Release do
|
||||||
Philomena.ModerationLogs.cleanup!()
|
Philomena.ModerationLogs.cleanup!()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def generate_autocomplete do
|
||||||
|
start_app()
|
||||||
|
Philomena.Autocomplete.generate_autocomplete!()
|
||||||
|
end
|
||||||
|
|
||||||
defp repos do
|
defp repos do
|
||||||
Application.fetch_env!(@app, :ecto_repos)
|
Application.fetch_env!(@app, :ecto_repos)
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
defmodule PhilomenaWeb.Autocomplete.CompiledController do
|
||||||
|
use PhilomenaWeb, :controller
|
||||||
|
|
||||||
|
alias Philomena.Autocomplete
|
||||||
|
|
||||||
|
def show(conn, _params) do
|
||||||
|
autocomplete = Autocomplete.get_autocomplete()
|
||||||
|
|
||||||
|
case autocomplete do
|
||||||
|
nil ->
|
||||||
|
conn
|
||||||
|
|> put_status(:not_found)
|
||||||
|
|> configure_session(drop: true)
|
||||||
|
|> text("")
|
||||||
|
|
||||||
|
%{content: content} ->
|
||||||
|
conn
|
||||||
|
|> put_resp_header("cache-control", "public, max-age=86400")
|
||||||
|
|> configure_session(drop: true)
|
||||||
|
|> resp(200, content)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -461,6 +461,7 @@ defmodule PhilomenaWeb.Router do
|
||||||
|
|
||||||
scope "/autocomplete", Autocomplete, as: :autocomplete do
|
scope "/autocomplete", Autocomplete, as: :autocomplete do
|
||||||
resources "/tags", TagController, only: [:show], singleton: true
|
resources "/tags", TagController, only: [:show], singleton: true
|
||||||
|
resources "/compiled", CompiledController, only: [:show], singleton: true
|
||||||
end
|
end
|
||||||
|
|
||||||
scope "/fetch", Fetch, as: :fetch do
|
scope "/fetch", Fetch, as: :fetch do
|
||||||
|
|
10
priv/repo/migrations/20211219194836_create_autocomplete.exs
Normal file
10
priv/repo/migrations/20211219194836_create_autocomplete.exs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
defmodule Philomena.Repo.Migrations.CreateAutocomplete do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
def change do
|
||||||
|
create table(:autocomplete, primary_key: false) do
|
||||||
|
add :content, :binary, null: false
|
||||||
|
timestamps(inserted_at: :created_at, updated_at: false, type: :utc_datetime)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -116,6 +116,16 @@ CREATE SEQUENCE public.artist_links_id_seq
|
||||||
ALTER SEQUENCE public.artist_links_id_seq OWNED BY public.artist_links.id;
|
ALTER SEQUENCE public.artist_links_id_seq OWNED BY public.artist_links.id;
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Name: autocomplete; Type: TABLE; Schema: public; Owner: -
|
||||||
|
--
|
||||||
|
|
||||||
|
CREATE TABLE public.autocomplete (
|
||||||
|
content bytea NOT NULL,
|
||||||
|
created_at timestamp(0) without time zone NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Name: badge_awards; Type: TABLE; Schema: public; Owner: -
|
-- Name: badge_awards; Type: TABLE; Schema: public; Owner: -
|
||||||
--
|
--
|
||||||
|
@ -4959,3 +4969,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20210917190346);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20210921025336);
|
INSERT INTO public."schema_migrations" (version) VALUES (20210921025336);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20210929181319);
|
INSERT INTO public."schema_migrations" (version) VALUES (20210929181319);
|
||||||
INSERT INTO public."schema_migrations" (version) VALUES (20211107130226);
|
INSERT INTO public."schema_migrations" (version) VALUES (20211107130226);
|
||||||
|
INSERT INTO public."schema_migrations" (version) VALUES (20211219194836);
|
||||||
|
|
Loading…
Reference in a new issue