mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 05:37:59 +01:00
Local autocomplete (#151)
This commit is contained in:
parent
715506352c
commit
dadc2f1585
10 changed files with 374 additions and 0 deletions
|
@ -2,6 +2,9 @@
|
|||
* Autocomplete.
|
||||
*/
|
||||
|
||||
import { LocalAutocompleter } from 'utils/local-autocompleter';
|
||||
import { handleError } from 'utils/requests';
|
||||
|
||||
const cache = {};
|
||||
let inputField, originalTerm;
|
||||
|
||||
|
@ -122,8 +125,23 @@ function getSuggestions(term) {
|
|||
function listenAutocomplete() {
|
||||
let timeout;
|
||||
|
||||
/** @type {LocalAutocompleter} */
|
||||
let localAc = null;
|
||||
let localFetched = false;
|
||||
|
||||
document.addEventListener('focusin', fetchLocalAutocomplete);
|
||||
|
||||
document.addEventListener('input', event => {
|
||||
removeParent();
|
||||
fetchLocalAutocomplete(event);
|
||||
|
||||
if (localAc !== null && 'ac' in event.target.dataset) {
|
||||
inputField = event.target;
|
||||
originalTerm = inputField.value;
|
||||
|
||||
const suggestions = localAc.topK(inputField.value, 5).map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
|
||||
return showAutocomplete(suggestions, originalTerm, event.target);
|
||||
}
|
||||
|
||||
window.clearTimeout(timeout);
|
||||
// Use a timeout to delay requests until the user has stopped typing
|
||||
|
@ -150,6 +168,16 @@ function listenAutocomplete() {
|
|||
document.addEventListener('click', event => {
|
||||
if (event.target && event.target !== inputField) removeParent();
|
||||
});
|
||||
|
||||
function fetchLocalAutocomplete(event) {
|
||||
if (!localFetched && event.target.dataset && 'ac' in event.target.dataset) {
|
||||
localFetched = true;
|
||||
fetch('/autocomplete/compiled', { credentials: 'omit', cache: 'force-cache' })
|
||||
.then(handleError)
|
||||
.then(resp => resp.arrayBuffer())
|
||||
.then(buf => localAc = new LocalAutocompleter(buf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export { listenAutocomplete };
|
||||
|
|
134
assets/js/utils/local-autocompleter.js
Normal file
134
assets/js/utils/local-autocompleter.js
Normal file
|
@ -0,0 +1,134 @@
|
|||
//@ts-check
|
||||
/*
|
||||
* Client-side tag completion.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {object} Result
|
||||
* @property {string} name
|
||||
* @property {number} imageCount
|
||||
* @property {number[]} associations
|
||||
*/
|
||||
|
||||
/**
|
||||
* See lib/philomena/autocomplete.ex for binary structure details.
|
||||
*
|
||||
* A binary blob is used to avoid the creation of large amounts of garbage on
|
||||
* the JS heap and speed up the execution of the search.
|
||||
*/
|
||||
export class LocalAutocompleter {
|
||||
/**
|
||||
* Build a new local autocompleter.
|
||||
*
|
||||
* @param {ArrayBuffer} backingStore
|
||||
*/
|
||||
constructor(backingStore) {
|
||||
/** @type {Uint8Array} */
|
||||
this.data = new Uint8Array(backingStore);
|
||||
/** @type {DataView} */
|
||||
this.view = new DataView(backingStore);
|
||||
/** @type {TextDecoder} */
|
||||
this.decoder = new TextDecoder();
|
||||
/** @type {number} */
|
||||
this.numTags = this.view.getUint32(backingStore.byteLength - 4, true);
|
||||
/** @type {number} */
|
||||
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
|
||||
/** @type {number} */
|
||||
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
|
||||
|
||||
if (this.formatVersion !== 1) {
|
||||
throw new Error('Incompatible autocomplete format version');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a tag's name and its associations given a byte location inside the file.
|
||||
*
|
||||
* @param {number} location
|
||||
* @returns {[string, number[]]}
|
||||
*/
|
||||
getTagFromLocation(location) {
|
||||
const nameLength = this.view.getUint8(location);
|
||||
const assnLength = this.view.getUint8(location + 1 + nameLength);
|
||||
|
||||
/** @type {number[]} */
|
||||
const associations = [];
|
||||
const name = this.decoder.decode(this.data.slice(location + 1, location + nameLength + 1));
|
||||
|
||||
for (let i = 0; i < assnLength; i++) {
|
||||
associations.push(this.view.getUint32(location + 1 + nameLength + i * 4, true));
|
||||
}
|
||||
|
||||
return [ name, associations ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Result object as the ith tag inside the file.
|
||||
*
|
||||
* @param {number} i
|
||||
* @returns {Result}
|
||||
*/
|
||||
getResultAt(i) {
|
||||
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
|
||||
const [ name, associations ] = this.getTagFromLocation(nameLocation);
|
||||
|
||||
return { name, imageCount, associations };
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the top k results by image count which match the given string prefix.
|
||||
*
|
||||
* @param {string} prefix
|
||||
* @param {number} k
|
||||
* @returns {Result[]}
|
||||
*/
|
||||
topK(prefix, k) {
|
||||
/** @type {Result[]} */
|
||||
const results = [];
|
||||
|
||||
/** @type {number[]} */
|
||||
//@ts-expect-error No type for window.booru yet
|
||||
const hiddenTags = window.booru.hiddenTagList;
|
||||
|
||||
if (prefix === '') {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Binary search to find last smaller prefix
|
||||
let l = 0;
|
||||
let r = this.numTags;
|
||||
|
||||
while (l < r - 1) {
|
||||
const m = (l + (r - l) / 2) | 0;
|
||||
const { name } = this.getResultAt(m);
|
||||
|
||||
if (name.slice(0, prefix.length) >= prefix) {
|
||||
// too large, go left
|
||||
r = m;
|
||||
}
|
||||
else {
|
||||
// too small, go right
|
||||
l = m;
|
||||
}
|
||||
}
|
||||
|
||||
// Scan forward until no more matches occur
|
||||
while (l < this.numTags - 1) {
|
||||
const result = this.getResultAt(++l);
|
||||
if (!result.name.startsWith(prefix)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add if no associations are filtered
|
||||
if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort results by image count
|
||||
results.sort((a, b) => b.imageCount - a.imageCount);
|
||||
|
||||
return results.slice(0, k);
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ background() {
|
|||
mix run -e 'Philomena.Release.verify_artist_links()'
|
||||
mix run -e 'Philomena.Release.update_stats()'
|
||||
mix run -e 'Philomena.Release.clean_moderation_logs()'
|
||||
mix run -e 'Philomena.Release.generate_autocomplete()'
|
||||
|
||||
sleep 300
|
||||
done
|
||||
|
|
144
lib/philomena/autocomplete.ex
Normal file
144
lib/philomena/autocomplete.ex
Normal file
|
@ -0,0 +1,144 @@
|
|||
defmodule Philomena.Autocomplete do
|
||||
@moduledoc """
|
||||
Pregenerated autocomplete files.
|
||||
"""
|
||||
|
||||
import Ecto.Query, warn: false
|
||||
alias Philomena.Repo
|
||||
|
||||
alias Philomena.Tags.Tag
|
||||
alias Philomena.Images.Tagging
|
||||
alias Philomena.Autocomplete.Autocomplete
|
||||
|
||||
@type tags_list() :: [{String.t(), number(), number()}]
|
||||
@type assoc_map() :: %{String.t() => [number()]}
|
||||
|
||||
@spec get_autocomplete() :: Autocomplete.t() | nil
|
||||
def get_autocomplete do
|
||||
Autocomplete
|
||||
|> order_by(desc: :created_at)
|
||||
|> limit(1)
|
||||
|> Repo.one()
|
||||
end
|
||||
|
||||
def generate_autocomplete! do
|
||||
tags = get_tags()
|
||||
associations = get_associations(tags)
|
||||
|
||||
# Tags are already sorted, so just add them to the file directly
|
||||
#
|
||||
# struct tag {
|
||||
# uint8_t key_length;
|
||||
# uint8_t key[];
|
||||
# uint8_t association_length;
|
||||
# uint32_t associations[];
|
||||
# };
|
||||
#
|
||||
# struct tag_reference {
|
||||
# uint32_t tag_location;
|
||||
# uint32_t num_uses;
|
||||
# };
|
||||
#
|
||||
|
||||
{ac_file, references} =
|
||||
Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} ->
|
||||
pos = byte_size(file)
|
||||
assn = Map.get(associations, name, [])
|
||||
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
|
||||
|
||||
{
|
||||
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
|
||||
<<references::binary, pos::32-little, images_count::32-little>>
|
||||
}
|
||||
end)
|
||||
|
||||
ac_file = int32_align(ac_file)
|
||||
reference_start = byte_size(ac_file)
|
||||
|
||||
# Finally add the reference start and number of tags in the footer
|
||||
#
|
||||
# struct autocomplete_file {
|
||||
# struct tag tags[];
|
||||
# struct tag_reference references[];
|
||||
# uint32_t format_version;
|
||||
# uint32_t reference_start;
|
||||
# uint32_t num_tags;
|
||||
# };
|
||||
#
|
||||
|
||||
ac_file =
|
||||
<<ac_file::binary, references::binary, 1::32-little, reference_start::32-little,
|
||||
length(tags)::32-little>>
|
||||
|
||||
# Insert the autocomplete binary
|
||||
new_ac =
|
||||
%Autocomplete{}
|
||||
|> Autocomplete.changeset(%{content: ac_file})
|
||||
|> Repo.insert!()
|
||||
|
||||
# Remove anything older
|
||||
Autocomplete
|
||||
|> where([ac], ac.created_at < ^new_ac.created_at)
|
||||
|> Repo.delete_all()
|
||||
end
|
||||
|
||||
#
|
||||
# Get the names of tags and their number of uses as a map.
|
||||
# Sort is done in the application to avoid collation.
|
||||
#
|
||||
@spec get_tags() :: tags_list()
|
||||
defp get_tags do
|
||||
Tag
|
||||
|> select([t], {t.name, t.images_count, t.id})
|
||||
|> where([t], t.images_count > 0)
|
||||
|> order_by(desc: :images_count)
|
||||
|> limit(65_535)
|
||||
|> Repo.all()
|
||||
|> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end)
|
||||
|> Enum.sort()
|
||||
end
|
||||
|
||||
#
|
||||
# Get up to eight associated tag ids for each returned tag.
|
||||
#
|
||||
@spec get_associations(tags_list()) :: assoc_map()
|
||||
defp get_associations(tags) do
|
||||
tags
|
||||
|> Enum.map(fn {name, images_count, id} ->
|
||||
# Randomly sample 100 images with this tag
|
||||
image_sample =
|
||||
Tagging
|
||||
|> where(tag_id: ^id)
|
||||
|> select([it], it.image_id)
|
||||
|> order_by(asc: fragment("random()"))
|
||||
|> limit(100)
|
||||
|
||||
# Select the tags from those images which have more uses than
|
||||
# the current one being considered, and overlap more than 50%
|
||||
assoc_ids =
|
||||
Tagging
|
||||
|> join(:inner, [it], _ in assoc(it, :tag))
|
||||
|> where([_, t], t.images_count > ^images_count)
|
||||
|> where([it, _], it.image_id in subquery(image_sample))
|
||||
|> group_by([_, t], t.id)
|
||||
|> order_by(desc: fragment("count(*)"))
|
||||
|> having([_, t], fragment("(100 * count(*)::float / LEAST(?, 100)) > 50", ^images_count))
|
||||
|> select([_, t], t.id)
|
||||
|> limit(8)
|
||||
|> Repo.all()
|
||||
|
||||
{name, assoc_ids}
|
||||
end)
|
||||
|> Map.new()
|
||||
end
|
||||
|
||||
#
|
||||
# Right-pad a binary to be a multiple of 4 bytes.
|
||||
#
|
||||
@spec int32_align(binary()) :: binary()
|
||||
defp int32_align(bin) do
|
||||
pad_bits = 8 * (4 - rem(byte_size(bin), 4))
|
||||
|
||||
<<bin::binary, 0::size(pad_bits)>>
|
||||
end
|
||||
end
|
17
lib/philomena/autocomplete/autocomplete.ex
Normal file
17
lib/philomena/autocomplete/autocomplete.ex
Normal file
|
@ -0,0 +1,17 @@
|
|||
defmodule Philomena.Autocomplete.Autocomplete do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
@primary_key false
|
||||
schema "autocomplete" do
|
||||
field :content, :binary
|
||||
timestamps(inserted_at: :created_at, updated_at: false, type: :utc_datetime)
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(autocomplete, attrs) do
|
||||
autocomplete
|
||||
|> cast(attrs, [:content])
|
||||
|> validate_required([:content])
|
||||
end
|
||||
end
|
|
@ -34,6 +34,11 @@ defmodule Philomena.Release do
|
|||
Philomena.ModerationLogs.cleanup!()
|
||||
end
|
||||
|
||||
def generate_autocomplete do
|
||||
start_app()
|
||||
Philomena.Autocomplete.generate_autocomplete!()
|
||||
end
|
||||
|
||||
defp repos do
|
||||
Application.fetch_env!(@app, :ecto_repos)
|
||||
end
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
defmodule PhilomenaWeb.Autocomplete.CompiledController do
|
||||
use PhilomenaWeb, :controller
|
||||
|
||||
alias Philomena.Autocomplete
|
||||
|
||||
def show(conn, _params) do
|
||||
autocomplete = Autocomplete.get_autocomplete()
|
||||
|
||||
case autocomplete do
|
||||
nil ->
|
||||
conn
|
||||
|> put_status(:not_found)
|
||||
|> configure_session(drop: true)
|
||||
|> text("")
|
||||
|
||||
%{content: content} ->
|
||||
conn
|
||||
|> put_resp_header("cache-control", "public, max-age=86400")
|
||||
|> configure_session(drop: true)
|
||||
|> resp(200, content)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -461,6 +461,7 @@ defmodule PhilomenaWeb.Router do
|
|||
|
||||
scope "/autocomplete", Autocomplete, as: :autocomplete do
|
||||
resources "/tags", TagController, only: [:show], singleton: true
|
||||
resources "/compiled", CompiledController, only: [:show], singleton: true
|
||||
end
|
||||
|
||||
scope "/fetch", Fetch, as: :fetch do
|
||||
|
|
10
priv/repo/migrations/20211219194836_create_autocomplete.exs
Normal file
10
priv/repo/migrations/20211219194836_create_autocomplete.exs
Normal file
|
@ -0,0 +1,10 @@
|
|||
defmodule Philomena.Repo.Migrations.CreateAutocomplete do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
create table(:autocomplete, primary_key: false) do
|
||||
add :content, :binary, null: false
|
||||
timestamps(inserted_at: :created_at, updated_at: false, type: :utc_datetime)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -116,6 +116,16 @@ CREATE SEQUENCE public.artist_links_id_seq
|
|||
ALTER SEQUENCE public.artist_links_id_seq OWNED BY public.artist_links.id;
|
||||
|
||||
|
||||
--
|
||||
-- Name: autocomplete; Type: TABLE; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE TABLE public.autocomplete (
|
||||
content bytea NOT NULL,
|
||||
created_at timestamp(0) without time zone NOT NULL
|
||||
);
|
||||
|
||||
|
||||
--
|
||||
-- Name: badge_awards; Type: TABLE; Schema: public; Owner: -
|
||||
--
|
||||
|
@ -4959,3 +4969,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20210917190346);
|
|||
INSERT INTO public."schema_migrations" (version) VALUES (20210921025336);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20210929181319);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20211107130226);
|
||||
INSERT INTO public."schema_migrations" (version) VALUES (20211219194836);
|
||||
|
|
Loading…
Reference in a new issue