mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-24 04:27:59 +01:00
Handle aliases, names in namespace
This commit is contained in:
parent
3074a291c6
commit
25e9739383
2 changed files with 164 additions and 21 deletions
|
@ -35,8 +35,10 @@ export class LocalAutocompleter {
|
||||||
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
|
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
|
||||||
/** @type {number} */
|
/** @type {number} */
|
||||||
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
|
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
|
||||||
|
/** @type {number} */
|
||||||
|
this.numSecondary = (backingStore.byteLength - this.referenceStart - this.numTags * 8 - 12) / 4;
|
||||||
|
|
||||||
if (this.formatVersion !== 1) {
|
if (this.formatVersion !== 2) {
|
||||||
throw new Error('Incompatible autocomplete format version');
|
throw new Error('Incompatible autocomplete format version');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,11 +73,41 @@ export class LocalAutocompleter {
|
||||||
getResultAt(i) {
|
getResultAt(i) {
|
||||||
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||||
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
|
const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true);
|
||||||
|
|
||||||
|
if (imageCount >>> 31 & 1) {
|
||||||
|
// This is actually an alias, so follow it
|
||||||
|
return this.getResultAt(imageCount & ~(1 << 31));
|
||||||
|
}
|
||||||
|
|
||||||
const [ name, associations ] = this.getTagFromLocation(nameLocation);
|
const [ name, associations ] = this.getTagFromLocation(nameLocation);
|
||||||
|
|
||||||
return { name, imageCount, associations };
|
return { name, imageCount, associations };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a Result object as the ith tag inside the file, secondary ordering.
|
||||||
|
*
|
||||||
|
* @param {number} i
|
||||||
|
* @returns {Result}
|
||||||
|
*/
|
||||||
|
getSecondaryResultAt(i) {
|
||||||
|
const referenceIndex = this.view.getUint32(this.referenceStart + this.numTags * 8 + i * 4);
|
||||||
|
return this.getResultAt(referenceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the name of a tag without any namespace component.
|
||||||
|
*
|
||||||
|
* @param {string} s
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
nameInNamespace(s) {
|
||||||
|
const v = s.split(':', 2);
|
||||||
|
|
||||||
|
if (v.length === 2) return v[1];
|
||||||
|
return v[0];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the top k results by image count which match the given string prefix.
|
* Find the top k results by image count which match the given string prefix.
|
||||||
*
|
*
|
||||||
|
@ -126,6 +158,37 @@ export class LocalAutocompleter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Binary search again to find in secondary list
|
||||||
|
l = 0;
|
||||||
|
r = this.numSecondary;
|
||||||
|
|
||||||
|
while (l < r - 1) {
|
||||||
|
const m = (l + (r - l) / 2) | 0;
|
||||||
|
const { name } = this.getSecondaryResultAt(m);
|
||||||
|
|
||||||
|
if (this.nameInNamespace(name).slice(0, prefix.length) >= prefix) {
|
||||||
|
// too large, go left
|
||||||
|
r = m;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// too small, go right
|
||||||
|
l = m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan forward until no more matches occur
|
||||||
|
while (l < this.numSecondary - 1) {
|
||||||
|
const result = this.getSecondaryResultAt(++l);
|
||||||
|
if (!this.nameInNamespace(result.name).startsWith(prefix)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add if no associations are filtered
|
||||||
|
if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sort results by image count
|
// Sort results by image count
|
||||||
results.sort((a, b) => b.imageCount - a.imageCount);
|
results.sort((a, b) => b.imageCount - a.imageCount);
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ defmodule Philomena.Autocomplete do
|
||||||
alias Philomena.Images.Tagging
|
alias Philomena.Images.Tagging
|
||||||
alias Philomena.Autocomplete.Autocomplete
|
alias Philomena.Autocomplete.Autocomplete
|
||||||
|
|
||||||
@type tags_list() :: [{String.t(), number(), number()}]
|
@type tags_list() :: [{String.t(), number(), number(), String.t() | nil}]
|
||||||
@type assoc_map() :: %{String.t() => [number()]}
|
@type assoc_map() :: %{String.t() => [number()]}
|
||||||
|
|
||||||
@spec get_autocomplete() :: Autocomplete.t() | nil
|
@spec get_autocomplete() :: Autocomplete.t() | nil
|
||||||
|
@ -34,32 +34,78 @@ defmodule Philomena.Autocomplete do
|
||||||
# uint32_t associations[];
|
# uint32_t associations[];
|
||||||
# };
|
# };
|
||||||
#
|
#
|
||||||
# struct tag_reference {
|
|
||||||
# uint32_t tag_location;
|
|
||||||
# uint32_t num_uses;
|
|
||||||
# };
|
|
||||||
#
|
|
||||||
|
|
||||||
{ac_file, references} =
|
{ac_file, name_locations} =
|
||||||
Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} ->
|
Enum.reduce(tags, {<<>>, %{}}, fn {name, _, _, _}, {file, name_locations} ->
|
||||||
pos = byte_size(file)
|
pos = byte_size(file)
|
||||||
assn = Map.get(associations, name, [])
|
assn = Map.get(associations, name, [])
|
||||||
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
|
assn_bin = for id <- assn, into: <<>>, do: <<id::32-little>>
|
||||||
|
|
||||||
{
|
{
|
||||||
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
|
<<file::binary, byte_size(name)::8, name::binary, length(assn)::8, assn_bin::binary>>,
|
||||||
<<references::binary, pos::32-little, images_count::32-little>>
|
Map.put(name_locations, name, pos)
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
# Link reference list; self-referential, so must be preprocessed to deal with aliases
|
||||||
|
#
|
||||||
|
# struct tag_reference {
|
||||||
|
# uint32_t tag_location;
|
||||||
|
# uint8_t is_aliased : 1;
|
||||||
|
# union {
|
||||||
|
# uint32_t num_uses : 31;
|
||||||
|
# uint32_t alias_index : 31;
|
||||||
|
# };
|
||||||
|
# };
|
||||||
|
#
|
||||||
|
|
||||||
ac_file = int32_align(ac_file)
|
ac_file = int32_align(ac_file)
|
||||||
reference_start = byte_size(ac_file)
|
reference_start = byte_size(ac_file)
|
||||||
|
size_of_reference = 8
|
||||||
|
|
||||||
|
reference_locations =
|
||||||
|
tags
|
||||||
|
|> Enum.with_index()
|
||||||
|
|> Enum.map(fn {name, index} -> {name, index} end)
|
||||||
|
|> Map.new()
|
||||||
|
|
||||||
|
references =
|
||||||
|
Enum.reduce(tags, <<>>, fn {name, images_count, _, alias_target}, references ->
|
||||||
|
pos = Map.fetch!(name_locations, name)
|
||||||
|
|
||||||
|
if not is_nil(alias_target) do
|
||||||
|
target = Map.fetch!(reference_locations, alias_target)
|
||||||
|
|
||||||
|
<<references::binary, pos::32-little, 1::1, target::31-little>>
|
||||||
|
else
|
||||||
|
<<references::binary, pos::32-little, 0::1, images_count::31-little>>
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
# Reorder tags by name in their namespace to provide a secondary ordering
|
||||||
|
#
|
||||||
|
# struct secondary_reference {
|
||||||
|
# uint32_t primary_location;
|
||||||
|
# };
|
||||||
|
#
|
||||||
|
|
||||||
|
secondary_references =
|
||||||
|
tags
|
||||||
|
|> Enum.map(&{name_in_namespace(elem(&1, 0)), &1})
|
||||||
|
|> Enum.uniq_by(fn {k, _v} -> k end)
|
||||||
|
|> Enum.sort()
|
||||||
|
|> Enum.reduce(<<>>, fn {_k, v}, secondary_references ->
|
||||||
|
target = Map.fetch!(reference_locations, v)
|
||||||
|
|
||||||
|
<<secondary_references::binary, target::32-little>>
|
||||||
|
end)
|
||||||
|
|
||||||
# Finally add the reference start and number of tags in the footer
|
# Finally add the reference start and number of tags in the footer
|
||||||
#
|
#
|
||||||
# struct autocomplete_file {
|
# struct autocomplete_file {
|
||||||
# struct tag tags[];
|
# struct tag tags[];
|
||||||
# struct tag_reference references[];
|
# struct tag_reference primary_references[];
|
||||||
|
# struct secondary_reference secondary_references[];
|
||||||
# uint32_t format_version;
|
# uint32_t format_version;
|
||||||
# uint32_t reference_start;
|
# uint32_t reference_start;
|
||||||
# uint32_t num_tags;
|
# uint32_t num_tags;
|
||||||
|
@ -67,8 +113,14 @@ defmodule Philomena.Autocomplete do
|
||||||
#
|
#
|
||||||
|
|
||||||
ac_file =
|
ac_file =
|
||||||
<<ac_file::binary, references::binary, 1::32-little, reference_start::32-little,
|
<<
|
||||||
length(tags)::32-little>>
|
ac_file::binary,
|
||||||
|
references::binary,
|
||||||
|
secondary_references::binary,
|
||||||
|
2::32-little,
|
||||||
|
reference_start::32-little,
|
||||||
|
length(tags)::32-little
|
||||||
|
>>
|
||||||
|
|
||||||
# Insert the autocomplete binary
|
# Insert the autocomplete binary
|
||||||
new_ac =
|
new_ac =
|
||||||
|
@ -88,13 +140,23 @@ defmodule Philomena.Autocomplete do
|
||||||
#
|
#
|
||||||
@spec get_tags() :: tags_list()
|
@spec get_tags() :: tags_list()
|
||||||
defp get_tags do
|
defp get_tags do
|
||||||
|
top_tags =
|
||||||
Tag
|
Tag
|
||||||
|> select([t], {t.name, t.images_count, t.id})
|
|> select([t], {t.name, t.images_count, t.id, nil})
|
||||||
|> where([t], t.images_count > 0)
|
|> where([t], t.images_count > 0)
|
||||||
|> order_by(desc: :images_count)
|
|> order_by(desc: :images_count)
|
||||||
|> limit(65_535)
|
|> limit(50_000)
|
||||||
|> Repo.all()
|
|> Repo.all()
|
||||||
|> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end)
|
|
||||||
|
aliases_of_top_tags =
|
||||||
|
Tag
|
||||||
|
|> where([t], t.aliased_tag_id in ^Enum.map(top_tags, fn {_, _, id, _} -> id end))
|
||||||
|
|> join(:inner, [t], _ in assoc(t, :aliased_tag))
|
||||||
|
|> select([t, a], {t.name, 0, 0, a.name})
|
||||||
|
|> Repo.all()
|
||||||
|
|
||||||
|
(aliases_of_top_tags ++ top_tags)
|
||||||
|
|> Enum.filter(fn {name, _, _, _} -> byte_size(name) < 255 end)
|
||||||
|> Enum.sort()
|
|> Enum.sort()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -104,7 +166,7 @@ defmodule Philomena.Autocomplete do
|
||||||
@spec get_associations(tags_list()) :: assoc_map()
|
@spec get_associations(tags_list()) :: assoc_map()
|
||||||
defp get_associations(tags) do
|
defp get_associations(tags) do
|
||||||
tags
|
tags
|
||||||
|> Enum.map(fn {name, images_count, id} ->
|
|> Enum.map(fn {name, images_count, id, _} ->
|
||||||
# Randomly sample 100 images with this tag
|
# Randomly sample 100 images with this tag
|
||||||
image_sample =
|
image_sample =
|
||||||
Tagging
|
Tagging
|
||||||
|
@ -141,4 +203,22 @@ defmodule Philomena.Autocomplete do
|
||||||
|
|
||||||
<<bin::binary, 0::size(pad_bits)>>
|
<<bin::binary, 0::size(pad_bits)>>
|
||||||
end
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Remove the artist:, oc: etc. prefix from a tag name,
|
||||||
|
# if one is present.
|
||||||
|
#
|
||||||
|
@spec name_in_namespace(String.t()) :: String.t()
|
||||||
|
defp name_in_namespace(s) do
|
||||||
|
case String.split(s, ":", parts: 2, trim: true) do
|
||||||
|
[_namespace, name] ->
|
||||||
|
name
|
||||||
|
|
||||||
|
[name] ->
|
||||||
|
name
|
||||||
|
|
||||||
|
_unknown ->
|
||||||
|
s
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue