Handle aliases, names in namespace

This commit is contained in:
byte[] 2021-12-27 18:19:08 -05:00
parent 3074a291c6
commit 25e9739383
2 changed files with 164 additions and 21 deletions

View file

@ -35,8 +35,10 @@ export class LocalAutocompleter {
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
/** @type {number} */
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
/** @type {number} */
this.numSecondary = (backingStore.byteLength - this.referenceStart - this.numTags * 8 - 12) / 4;
if (this.formatVersion !== 1) {
if (this.formatVersion !== 2) {
throw new Error('Incompatible autocomplete format version');
}
}
@ -71,11 +73,41 @@ export class LocalAutocompleter {
getResultAt(i) {
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
if (imageCount >>> 31 & 1) {
// This is actually an alias, so follow it
return this.getResultAt(imageCount & ~(1 << 31));
}
const [ name, associations ] = this.getTagFromLocation(nameLocation);
return { name, imageCount, associations };
}
/**
* Get a Result object as the ith tag inside the file, secondary ordering.
*
* @param {number} i
* @returns {Result}
*/
getSecondaryResultAt(i) {
const referenceIndex = this.view.getUint32(this.referenceStart + this.numTags * 8 + i * 4);
return this.getResultAt(referenceIndex);
}
/**
* Returns the name of a tag without any namespace component.
*
* @param {string} s
* @returns {string}
*/
nameInNamespace(s) {
const v = s.split(':', 2);
if (v.length === 2) return v[1];
return v[0];
}
/**
* Find the top k results by image count which match the given string prefix.
*
@ -126,6 +158,37 @@ export class LocalAutocompleter {
}
}
// Binary search again to find in secondary list
l = 0;
r = this.numSecondary;
while (l < r - 1) {
const m = (l + (r - l) / 2) | 0;
const { name } = this.getSecondaryResultAt(m);
if (this.nameInNamespace(name).slice(0, prefix.length) >= prefix) {
// too large, go left
r = m;
}
else {
// too small, go right
l = m;
}
}
// Scan forward until no more matches occur
while (l < this.numSecondary - 1) {
const result = this.getSecondaryResultAt(++l);
if (!this.nameInNamespace(result.name).startsWith(prefix)) {
break;
}
// Add if no associations are filtered
if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
results.push(result);
}
}
// Sort results by image count
results.sort((a, b) => b.imageCount - a.imageCount);

View file

@ -10,7 +10,7 @@ defmodule Philomena.Autocomplete do
alias Philomena.Images.Tagging
alias Philomena.Autocomplete.Autocomplete
@type tags_list() :: [{String.t(), number(), number()}]
@type tags_list() :: [{String.t(), number(), number(), String.t() | nil}]
@type assoc_map() :: %{String.t() => [number()]}
@spec get_autocomplete() :: Autocomplete.t() | nil
@ -34,32 +34,78 @@ defmodule Philomena.Autocomplete do
# uint32_t associations[];
# };
#
# struct tag_reference {
# uint32_t tag_location;
# uint32_t num_uses;
# };
#
{ac_file, references} =
Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} ->
{ac_file, name_locations} =
Enum.reduce(tags, {<<>>, %{}}, fn {name, _, _, _}, {file, name_locations} ->
pos = byte_size(file)
assn = Map.get(associations, name, [])
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
{
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
<<references::binary, pos::32-little, images_count::32-little>>
Map.put(name_locations, name, pos)
}
end)
# Link reference list; self-referential, so must be preprocessed to deal with aliases
#
# struct tag_reference {
# uint32_t tag_location;
# uint8_t is_aliased : 1;
# union {
# uint32_t num_uses : 31;
# uint32_t alias_index : 31;
# };
# };
#
ac_file = int32_align(ac_file)
reference_start = byte_size(ac_file)
size_of_reference = 8
reference_locations =
tags
|> Enum.with_index()
|> Enum.map(fn {name, index} -> {name, index} end)
|> Map.new()
references =
Enum.reduce(tags, <<>>, fn {name, images_count, _, alias_target}, references ->
pos = Map.fetch!(name_locations, name)
if not is_nil(alias_target) do
target = Map.fetch!(reference_locations, alias_target)
<<references::binary, pos::32-little, 1::1, target::31-little>>
else
<<references::binary, pos::32-little, 0::1, images_count::31-little>>
end
end)
# Reorder tags by name in their namespace to provide a secondary ordering
#
# struct secondary_reference {
# uint32_t primary_location;
# };
#
secondary_references =
tags
|> Enum.map(&{name_in_namespace(elem(&1, 0)), &1})
|> Enum.uniq_by(fn {k, _v} -> k end)
|> Enum.sort()
|> Enum.reduce(<<>>, fn {_k, v}, secondary_references ->
target = Map.fetch!(reference_locations, v)
<<secondary_references::binary, target::32-little>>
end)
# Finally add the reference start and number of tags in the footer
#
# struct autocomplete_file {
# struct tag tags[];
# struct tag_reference references[];
# struct tag_reference primary_references[];
# struct secondary_reference secondary_references[];
# uint32_t format_version;
# uint32_t reference_start;
# uint32_t num_tags;
@ -67,8 +113,14 @@ defmodule Philomena.Autocomplete do
#
ac_file =
<<ac_file::binary, references::binary, 1::32-little, reference_start::32-little,
length(tags)::32-little>>
<<
ac_file::binary,
references::binary,
secondary_references::binary,
2::32-little,
reference_start::32-little,
length(tags)::32-little
>>
# Insert the autocomplete binary
new_ac =
@ -88,13 +140,23 @@ defmodule Philomena.Autocomplete do
#
@spec get_tags() :: tags_list()
defp get_tags do
Tag
|> select([t], {t.name, t.images_count, t.id})
|> where([t], t.images_count > 0)
|> order_by(desc: :images_count)
|> limit(65_535)
|> Repo.all()
|> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end)
top_tags =
Tag
|> select([t], {t.name, t.images_count, t.id, nil})
|> where([t], t.images_count > 0)
|> order_by(desc: :images_count)
|> limit(50_000)
|> Repo.all()
aliases_of_top_tags =
Tag
|> where([t], t.aliased_tag_id in ^Enum.map(top_tags, fn {_, _, id, _} -> id end))
|> join(:inner, [t], _ in assoc(t, :aliased_tag))
|> select([t, a], {t.name, 0, 0, a.name})
|> Repo.all()
(aliases_of_top_tags ++ top_tags)
|> Enum.filter(fn {name, _, _, _} -> byte_size(name) < 255 end)
|> Enum.sort()
end
@ -104,7 +166,7 @@ defmodule Philomena.Autocomplete do
@spec get_associations(tags_list()) :: assoc_map()
defp get_associations(tags) do
tags
|> Enum.map(fn {name, images_count, id} ->
|> Enum.map(fn {name, images_count, id, _} ->
# Randomly sample 100 images with this tag
image_sample =
Tagging
@ -141,4 +203,22 @@ defmodule Philomena.Autocomplete do
<<bin::binary, 0::size(pad_bits)>>
end
#
# Remove the artist:, oc: etc. prefix from a tag name,
# if one is present.
#
@spec name_in_namespace(String.t()) :: String.t()
defp name_in_namespace(s) do
case String.split(s, ":", parts: 2, trim: true) do
[_namespace, name] ->
name
[name] ->
name
_unknown ->
s
end
end
end