mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Handle aliases, names in namespace
This commit is contained in:
parent
3074a291c6
commit
25e9739383
2 changed files with 164 additions and 21 deletions
|
@ -35,8 +35,10 @@ export class LocalAutocompleter {
|
|||
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
|
||||
/** @type {number} */
|
||||
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
|
||||
/** @type {number} */
|
||||
this.numSecondary = (backingStore.byteLength - this.referenceStart - this.numTags * 8 - 12) / 4;
|
||||
|
||||
if (this.formatVersion !== 1) {
|
||||
if (this.formatVersion !== 2) {
|
||||
throw new Error('Incompatible autocomplete format version');
|
||||
}
|
||||
}
|
||||
|
@ -71,11 +73,41 @@ export class LocalAutocompleter {
|
|||
getResultAt(i) {
|
||||
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
|
||||
|
||||
if (imageCount >>> 31 & 1) {
|
||||
// This is actually an alias, so follow it
|
||||
return this.getResultAt(imageCount & ~(1 << 31));
|
||||
}
|
||||
|
||||
const [ name, associations ] = this.getTagFromLocation(nameLocation);
|
||||
|
||||
return { name, imageCount, associations };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Result object as the ith tag inside the file, secondary ordering.
|
||||
*
|
||||
* @param {number} i
|
||||
* @returns {Result}
|
||||
*/
|
||||
getSecondaryResultAt(i) {
|
||||
const referenceIndex = this.view.getUint32(this.referenceStart + this.numTags * 8 + i * 4);
|
||||
return this.getResultAt(referenceIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of a tag without any namespace component.
|
||||
*
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
nameInNamespace(s) {
|
||||
const v = s.split(':', 2);
|
||||
|
||||
if (v.length === 2) return v[1];
|
||||
return v[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the top k results by image count which match the given string prefix.
|
||||
*
|
||||
|
@ -126,6 +158,37 @@ export class LocalAutocompleter {
|
|||
}
|
||||
}
|
||||
|
||||
// Binary search again to find in secondary list
|
||||
l = 0;
|
||||
r = this.numSecondary;
|
||||
|
||||
while (l < r - 1) {
|
||||
const m = (l + (r - l) / 2) | 0;
|
||||
const { name } = this.getSecondaryResultAt(m);
|
||||
|
||||
if (this.nameInNamespace(name).slice(0, prefix.length) >= prefix) {
|
||||
// too large, go left
|
||||
r = m;
|
||||
}
|
||||
else {
|
||||
// too small, go right
|
||||
l = m;
|
||||
}
|
||||
}
|
||||
|
||||
// Scan forward until no more matches occur
|
||||
while (l < this.numSecondary - 1) {
|
||||
const result = this.getSecondaryResultAt(++l);
|
||||
if (!this.nameInNamespace(result.name).startsWith(prefix)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add if no associations are filtered
|
||||
if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort results by image count
|
||||
results.sort((a, b) => b.imageCount - a.imageCount);
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ defmodule Philomena.Autocomplete do
|
|||
alias Philomena.Images.Tagging
|
||||
alias Philomena.Autocomplete.Autocomplete
|
||||
|
||||
@type tags_list() :: [{String.t(), number(), number()}]
|
||||
@type tags_list() :: [{String.t(), number(), number(), String.t() | nil}]
|
||||
@type assoc_map() :: %{String.t() => [number()]}
|
||||
|
||||
@spec get_autocomplete() :: Autocomplete.t() | nil
|
||||
|
@ -34,32 +34,78 @@ defmodule Philomena.Autocomplete do
|
|||
# uint32_t associations[];
|
||||
# };
|
||||
#
|
||||
# struct tag_reference {
|
||||
# uint32_t tag_location;
|
||||
# uint32_t num_uses;
|
||||
# };
|
||||
#
|
||||
|
||||
{ac_file, references} =
|
||||
Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} ->
|
||||
{ac_file, name_locations} =
|
||||
Enum.reduce(tags, {<<>>, %{}}, fn {name, _, _, _}, {file, name_locations} ->
|
||||
pos = byte_size(file)
|
||||
assn = Map.get(associations, name, [])
|
||||
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
|
||||
|
||||
{
|
||||
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
|
||||
<<references::binary, pos::32-little, images_count::32-little>>
|
||||
Map.put(name_locations, name, pos)
|
||||
}
|
||||
end)
|
||||
|
||||
# Link reference list; self-referential, so must be preprocessed to deal with aliases
|
||||
#
|
||||
# struct tag_reference {
|
||||
# uint32_t tag_location;
|
||||
# uint8_t is_aliased : 1;
|
||||
# union {
|
||||
# uint32_t num_uses : 31;
|
||||
# uint32_t alias_index : 31;
|
||||
# };
|
||||
# };
|
||||
#
|
||||
|
||||
ac_file = int32_align(ac_file)
|
||||
reference_start = byte_size(ac_file)
|
||||
size_of_reference = 8
|
||||
|
||||
reference_locations =
|
||||
tags
|
||||
|> Enum.with_index()
|
||||
|> Enum.map(fn {name, index} -> {name, index} end)
|
||||
|> Map.new()
|
||||
|
||||
references =
|
||||
Enum.reduce(tags, <<>>, fn {name, images_count, _, alias_target}, references ->
|
||||
pos = Map.fetch!(name_locations, name)
|
||||
|
||||
if not is_nil(alias_target) do
|
||||
target = Map.fetch!(reference_locations, alias_target)
|
||||
|
||||
<<references::binary, pos::32-little, 1::1, target::31-little>>
|
||||
else
|
||||
<<references::binary, pos::32-little, 0::1, images_count::31-little>>
|
||||
end
|
||||
end)
|
||||
|
||||
# Reorder tags by name in their namespace to provide a secondary ordering
|
||||
#
|
||||
# struct secondary_reference {
|
||||
# uint32_t primary_location;
|
||||
# };
|
||||
#
|
||||
|
||||
secondary_references =
|
||||
tags
|
||||
|> Enum.map(&{name_in_namespace(elem(&1, 0)), &1})
|
||||
|> Enum.uniq_by(fn {k, _v} -> k end)
|
||||
|> Enum.sort()
|
||||
|> Enum.reduce(<<>>, fn {_k, v}, secondary_references ->
|
||||
target = Map.fetch!(reference_locations, v)
|
||||
|
||||
<<secondary_references::binary, target::32-little>>
|
||||
end)
|
||||
|
||||
# Finally add the reference start and number of tags in the footer
|
||||
#
|
||||
# struct autocomplete_file {
|
||||
# struct tag tags[];
|
||||
# struct tag_reference references[];
|
||||
# struct tag_reference primary_references[];
|
||||
# struct secondary_reference secondary_references[];
|
||||
# uint32_t format_version;
|
||||
# uint32_t reference_start;
|
||||
# uint32_t num_tags;
|
||||
|
@ -67,8 +113,14 @@ defmodule Philomena.Autocomplete do
|
|||
#
|
||||
|
||||
ac_file =
|
||||
<<ac_file::binary, references::binary, 1::32-little, reference_start::32-little,
|
||||
length(tags)::32-little>>
|
||||
<<
|
||||
ac_file::binary,
|
||||
references::binary,
|
||||
secondary_references::binary,
|
||||
2::32-little,
|
||||
reference_start::32-little,
|
||||
length(tags)::32-little
|
||||
>>
|
||||
|
||||
# Insert the autocomplete binary
|
||||
new_ac =
|
||||
|
@ -88,13 +140,23 @@ defmodule Philomena.Autocomplete do
|
|||
#
|
||||
@spec get_tags() :: tags_list()
|
||||
defp get_tags do
|
||||
Tag
|
||||
|> select([t], {t.name, t.images_count, t.id})
|
||||
|> where([t], t.images_count > 0)
|
||||
|> order_by(desc: :images_count)
|
||||
|> limit(65_535)
|
||||
|> Repo.all()
|
||||
|> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end)
|
||||
top_tags =
|
||||
Tag
|
||||
|> select([t], {t.name, t.images_count, t.id, nil})
|
||||
|> where([t], t.images_count > 0)
|
||||
|> order_by(desc: :images_count)
|
||||
|> limit(50_000)
|
||||
|> Repo.all()
|
||||
|
||||
aliases_of_top_tags =
|
||||
Tag
|
||||
|> where([t], t.aliased_tag_id in ^Enum.map(top_tags, fn {_, _, id, _} -> id end))
|
||||
|> join(:inner, [t], _ in assoc(t, :aliased_tag))
|
||||
|> select([t, a], {t.name, 0, 0, a.name})
|
||||
|> Repo.all()
|
||||
|
||||
(aliases_of_top_tags ++ top_tags)
|
||||
|> Enum.filter(fn {name, _, _, _} -> byte_size(name) < 255 end)
|
||||
|> Enum.sort()
|
||||
end
|
||||
|
||||
|
@ -104,7 +166,7 @@ defmodule Philomena.Autocomplete do
|
|||
@spec get_associations(tags_list()) :: assoc_map()
|
||||
defp get_associations(tags) do
|
||||
tags
|
||||
|> Enum.map(fn {name, images_count, id} ->
|
||||
|> Enum.map(fn {name, images_count, id, _} ->
|
||||
# Randomly sample 100 images with this tag
|
||||
image_sample =
|
||||
Tagging
|
||||
|
@ -141,4 +203,22 @@ defmodule Philomena.Autocomplete do
|
|||
|
||||
<<bin::binary, 0::size(pad_bits)>>
|
||||
end
|
||||
|
||||
#
|
||||
# Remove the artist:, oc: etc. prefix from a tag name,
|
||||
# if one is present.
|
||||
#
|
||||
@spec name_in_namespace(String.t()) :: String.t()
|
||||
defp name_in_namespace(s) do
|
||||
case String.split(s, ":", parts: 2, trim: true) do
|
||||
[_namespace, name] ->
|
||||
name
|
||||
|
||||
[name] ->
|
||||
name
|
||||
|
||||
_unknown ->
|
||||
s
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue