diff --git a/assets/js/utils/local-autocompleter.js b/assets/js/utils/local-autocompleter.js index cbf290fb..2bf5321b 100644 --- a/assets/js/utils/local-autocompleter.js +++ b/assets/js/utils/local-autocompleter.js @@ -35,8 +35,10 @@ export class LocalAutocompleter { this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true); /** @type {number} */ this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true); + /** @type {number} */ + this.numSecondary = (backingStore.byteLength - this.referenceStart - this.numTags * 8 - 12) / 4; - if (this.formatVersion !== 1) { + if (this.formatVersion !== 2) { throw new Error('Incompatible autocomplete format version'); } } @@ -71,11 +73,41 @@ export class LocalAutocompleter { getResultAt(i) { const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true); const imageCount = this.view.getUint32(this.referenceStart + i * 8 + 4, true); + + if (imageCount >>> 31 & 1) { + // This is actually an alias, so follow it + return this.getResultAt(imageCount & ~(1 << 31)); + } + const [ name, associations ] = this.getTagFromLocation(nameLocation); return { name, imageCount, associations }; } + /** + * Get a Result object as the ith tag inside the file, secondary ordering. + * + * @param {number} i + * @returns {Result} + */ + getSecondaryResultAt(i) { + const referenceIndex = this.view.getUint32(this.referenceStart + this.numTags * 8 + i * 4); + return this.getResultAt(referenceIndex); + } + + /** + * Returns the name of a tag without any namespace component. + * + * @param {string} s + * @returns {string} + */ + nameInNamespace(s) { + const v = s.split(':', 2); + + if (v.length === 2) return v[1]; + return v[0]; + } + /** * Find the top k results by image count which match the given string prefix. * @@ -126,6 +158,37 @@ export class LocalAutocompleter { } } + // Binary search again to find in secondary list + l = 0; + r = this.numSecondary; + + while (l < r - 1) { + const m = (l + (r - l) / 2) | 0; + const { name } = this.getSecondaryResultAt(m); + + if (this.nameInNamespace(name).slice(0, prefix.length) >= prefix) { + // too large, go left + r = m; + } + else { + // too small, go right + l = m; + } + } + + // Scan forward until no more matches occur + while (l < this.numSecondary - 1) { + const result = this.getSecondaryResultAt(++l); + if (!this.nameInNamespace(result.name).startsWith(prefix)) { + break; + } + + // Add if no associations are filtered + if (hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) { + results.push(result); + } + } + // Sort results by image count results.sort((a, b) => b.imageCount - a.imageCount); diff --git a/lib/philomena/autocomplete.ex b/lib/philomena/autocomplete.ex index 4cbf9b7d..cbb1a48b 100644 --- a/lib/philomena/autocomplete.ex +++ b/lib/philomena/autocomplete.ex @@ -10,7 +10,7 @@ defmodule Philomena.Autocomplete do alias Philomena.Images.Tagging alias Philomena.Autocomplete.Autocomplete - @type tags_list() :: [{String.t(), number(), number()}] + @type tags_list() :: [{String.t(), number(), number(), String.t() | nil}] @type assoc_map() :: %{String.t() => [number()]} @spec get_autocomplete() :: Autocomplete.t() | nil @@ -34,32 +34,78 @@ defmodule Philomena.Autocomplete do # uint32_t associations[]; # }; # - # struct tag_reference { - # uint32_t tag_location; - # uint32_t num_uses; - # }; - # - {ac_file, references} = - Enum.reduce(tags, {<<>>, <<>>}, fn {name, images_count, _}, {file, references} -> + {ac_file, name_locations} = + Enum.reduce(tags, {<<>>, %{}}, fn {name, _, _, _}, {file, name_locations} -> pos = byte_size(file) assn = Map.get(associations, name, []) assn_bin = for id <- assn, into: <<>>, do: <> { <>, - <> + Map.put(name_locations, name, pos) } end) + # Link reference list; self-referential, so must be preprocessed to deal with aliases + # + # struct tag_reference { + # uint32_t tag_location; + # uint8_t is_aliased : 1; + # union { + # uint32_t num_uses : 31; + # uint32_t alias_index : 31; + # }; + # }; + # + ac_file = int32_align(ac_file) reference_start = byte_size(ac_file) + size_of_reference = 8 + + reference_locations = + tags + |> Enum.with_index() + |> Enum.map(fn {name, index} -> {name, index} end) + |> Map.new() + + references = + Enum.reduce(tags, <<>>, fn {name, images_count, _, alias_target}, references -> + pos = Map.fetch!(name_locations, name) + + if not is_nil(alias_target) do + target = Map.fetch!(reference_locations, alias_target) + + <> + else + <> + end + end) + + # Reorder tags by name in their namespace to provide a secondary ordering + # + # struct secondary_reference { + # uint32_t primary_location; + # }; + # + + secondary_references = + tags + |> Enum.map(&{name_in_namespace(elem(&1, 0)), &1}) + |> Enum.uniq_by(fn {k, _v} -> k end) + |> Enum.sort() + |> Enum.reduce(<<>>, fn {_k, v}, secondary_references -> + target = Map.fetch!(reference_locations, v) + + <> + end) # Finally add the reference start and number of tags in the footer # # struct autocomplete_file { # struct tag tags[]; - # struct tag_reference references[]; + # struct tag_reference primary_references[]; + # struct secondary_reference secondary_references[]; # uint32_t format_version; # uint32_t reference_start; # uint32_t num_tags; @@ -67,8 +113,14 @@ defmodule Philomena.Autocomplete do # ac_file = - <> + << + ac_file::binary, + references::binary, + secondary_references::binary, + 2::32-little, + reference_start::32-little, + length(tags)::32-little + >> # Insert the autocomplete binary new_ac = @@ -88,13 +140,23 @@ defmodule Philomena.Autocomplete do # @spec get_tags() :: tags_list() defp get_tags do - Tag - |> select([t], {t.name, t.images_count, t.id}) - |> where([t], t.images_count > 0) - |> order_by(desc: :images_count) - |> limit(65_535) - |> Repo.all() - |> Enum.filter(fn {name, _, _} -> byte_size(name) < 255 end) + top_tags = + Tag + |> select([t], {t.name, t.images_count, t.id, nil}) + |> where([t], t.images_count > 0) + |> order_by(desc: :images_count) + |> limit(50_000) + |> Repo.all() + + aliases_of_top_tags = + Tag + |> where([t], t.aliased_tag_id in ^Enum.map(top_tags, fn {_, _, id, _} -> id end)) + |> join(:inner, [t], _ in assoc(t, :aliased_tag)) + |> select([t, a], {t.name, 0, 0, a.name}) + |> Repo.all() + + (aliases_of_top_tags ++ top_tags) + |> Enum.filter(fn {name, _, _, _} -> byte_size(name) < 255 end) |> Enum.sort() end @@ -104,7 +166,7 @@ defmodule Philomena.Autocomplete do @spec get_associations(tags_list()) :: assoc_map() defp get_associations(tags) do tags - |> Enum.map(fn {name, images_count, id} -> + |> Enum.map(fn {name, images_count, id, _} -> # Randomly sample 100 images with this tag image_sample = Tagging @@ -141,4 +203,22 @@ defmodule Philomena.Autocomplete do <> end + + # + # Remove the artist:, oc: etc. prefix from a tag name, + # if one is present. + # + @spec name_in_namespace(String.t()) :: String.t() + defp name_in_namespace(s) do + case String.split(s, ":", parts: 2, trim: true) do + [_namespace, name] -> + name + + [name] -> + name + + _unknown -> + s + end + end end