From 6cd01b5164995df5fa766649bdc66a3f3234d344 Mon Sep 17 00:00:00 2001 From: MareStare Date: Mon, 17 Mar 2025 19:10:31 +0000 Subject: [PATCH 1/5] Add more docs to the local autocomplete binary layout --- lib/philomena/autocomplete/generator.ex | 31 ++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/lib/philomena/autocomplete/generator.ex b/lib/philomena/autocomplete/generator.ex index 6493027d..7f21742d 100644 --- a/lib/philomena/autocomplete/generator.ex +++ b/lib/philomena/autocomplete/generator.ex @@ -6,31 +6,46 @@ defmodule Philomena.Autocomplete.Generator do The file follows the following binary format: struct tag { - uint8_t key_length; - uint8_t key[]; - uint8_t association_length; + // Full name of the tag in UTF8. + uint8_t name_length; + uint8_t name[]; + + // List of IDs of other tags that appear on the same images in >50% of cases + uint8_t associations_length; uint32_t associations[]; }; struct tag_reference { + // Index of the tag in the `tags` array uint32_t tag_location; - union { - int32_t raw; - uint32_t num_uses; ///< when positive - uint32_t alias_index; ///< when negative, -alias_index - 1 - }; + + // If >=0 then this tag is canonical and the `meta` is the number of images with this tag + // + // If <0 then this is the index of the canonical tag aliased by this one + // in the `primary_references` array + int32_t meta; }; struct secondary_reference { + // Index of the tag in the `tags` array uint32_t primary_location; }; struct autocomplete_file { struct tag tags[]; + + // List of canonical/alias tags references with their respective metadata struct tag_reference primary_references[]; + + // List of references to tags sorted by tag name discarding the namespace struct secondary_reference secondary_references[]; + uint32_t format_version; + + // Byte index of the `tag_reference` array start uint32_t reference_start; + + // Length of the `tags` array. uint32_t num_tags; }; From ec482db85235cb0424effa2ee27e4df0d491f8e0 Mon Sep 17 00:00:00 2001 From: MareStare Date: Mon, 17 Mar 2025 19:44:29 +0000 Subject: [PATCH 2/5] Apply Liam's corrections to the docs --- lib/philomena/autocomplete/generator.ex | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/philomena/autocomplete/generator.ex b/lib/philomena/autocomplete/generator.ex index 7f21742d..af93c9d6 100644 --- a/lib/philomena/autocomplete/generator.ex +++ b/lib/philomena/autocomplete/generator.ex @@ -5,6 +5,9 @@ defmodule Philomena.Autocomplete.Generator do See assets/js/utils/local-autocompleter.ts for how this should be used. The file follows the following binary format: + // Note that all pointer types (`struct T*`) are all relative to the binary payload start. + + // Variable-length struct. struct tag { // Full name of the tag in UTF8. uint8_t name_length; @@ -15,23 +18,28 @@ defmodule Philomena.Autocomplete.Generator do uint32_t associations[]; }; + // Fixed-length struct. struct tag_reference { - // Index of the tag in the `tags` array - uint32_t tag_location; + // 32 bit pointer to a tag in the `tags` array + struct tag* tag; // If >=0 then this tag is canonical and the `meta` is the number of images with this tag // - // If <0 then this is the index of the canonical tag aliased by this one - // in the `primary_references` array + // If <0 then do the following calculation: `-meta - 1` and you'll get a 32 bit pointer + // `struct tag_reference*` to the canonical tag aliased by this one in the `primary_references` array int32_t meta; }; + // Fixed-length struct. struct secondary_reference { - // Index of the tag in the `tags` array - uint32_t primary_location; + // Index of the `tag_reference` in the `primary_references` array + uint32_t primary_reference_index; }; + // Variable-length struct. struct autocomplete_file { + // Array of variable-length structs, so any references to the items in + // this array must be direct pointers instead of item indexes. struct tag tags[]; // List of canonical/alias tags references with their respective metadata @@ -42,8 +50,8 @@ defmodule Philomena.Autocomplete.Generator do uint32_t format_version; - // Byte index of the `tag_reference` array start - uint32_t reference_start; + // 32 bit pointer to the `primary_references` array start + struct tag_reference* primary_references_start; // Length of the `tags` array. uint32_t num_tags; From fbdafd723c94d98a97551c9aee13754c25e74c1f Mon Sep 17 00:00:00 2001 From: MareStare Date: Mon, 17 Mar 2025 19:48:03 +0000 Subject: [PATCH 3/5] Fix another logic error --- lib/philomena/autocomplete/generator.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/philomena/autocomplete/generator.ex b/lib/philomena/autocomplete/generator.ex index af93c9d6..fcb2e7e3 100644 --- a/lib/philomena/autocomplete/generator.ex +++ b/lib/philomena/autocomplete/generator.ex @@ -25,8 +25,9 @@ defmodule Philomena.Autocomplete.Generator do // If >=0 then this tag is canonical and the `meta` is the number of images with this tag // - // If <0 then do the following calculation: `-meta - 1` and you'll get a 32 bit pointer - // `struct tag_reference*` to the canonical tag aliased by this one in the `primary_references` array + // If <0 then do the following calculation: `-meta - 1` and you'll get an index + // of a `tag_reference` for the canonical tag aliased by this one + // in the `primary_references` array int32_t meta; }; From b2897187ab18c937a17991b2656c81d18a4e4fe3 Mon Sep 17 00:00:00 2001 From: MareStare Date: Mon, 17 Mar 2025 20:03:47 +0000 Subject: [PATCH 4/5] Replace 2nd person with 3rd --- lib/philomena/autocomplete/generator.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/philomena/autocomplete/generator.ex b/lib/philomena/autocomplete/generator.ex index fcb2e7e3..7f7ffbb5 100644 --- a/lib/philomena/autocomplete/generator.ex +++ b/lib/philomena/autocomplete/generator.ex @@ -25,7 +25,7 @@ defmodule Philomena.Autocomplete.Generator do // If >=0 then this tag is canonical and the `meta` is the number of images with this tag // - // If <0 then do the following calculation: `-meta - 1` and you'll get an index + // If <0 then the following calculation: `-meta - 1` and will give the index // of a `tag_reference` for the canonical tag aliased by this one // in the `primary_references` array int32_t meta; From db54e1be3a52d823ddda33991d1aa9b8d897b941 Mon Sep 17 00:00:00 2001 From: MareStare Date: Tue, 18 Mar 2025 00:07:45 +0200 Subject: [PATCH 5/5] Add more clarity for why the `-1` decision was made Co-authored-by: liamwhite --- lib/philomena/autocomplete/generator.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/philomena/autocomplete/generator.ex b/lib/philomena/autocomplete/generator.ex index 7f7ffbb5..b559f873 100644 --- a/lib/philomena/autocomplete/generator.ex +++ b/lib/philomena/autocomplete/generator.ex @@ -25,7 +25,7 @@ defmodule Philomena.Autocomplete.Generator do // If >=0 then this tag is canonical and the `meta` is the number of images with this tag // - // If <0 then the following calculation: `-meta - 1` and will give the index + // If <0 then `-meta - 1` (to allow 0 as a possibility for the target) will give the index // of a `tag_reference` for the canonical tag aliased by this one // in the `primary_references` array int32_t meta;