Refactor local autocompleter

This commit is contained in:
MareStare 2025-03-21 03:53:32 +00:00
parent 8af67ab63a
commit 3877be15c5
5 changed files with 97 additions and 62 deletions

View file

@ -75,8 +75,8 @@ it('records search history', async () => {
"(history) foo1", "(history) foo1",
"-----------", "-----------",
"forest 3", "forest 3",
"fog 1",
"force field 1", "force field 1",
"fog 1",
"flower 1", "flower 1",
], ],
} }
@ -93,8 +93,8 @@ it('records search history', async () => {
"(history) foo1", "(history) foo1",
"-----------", "-----------",
"forest 3", "forest 3",
"fog 1",
"force field 1", "force field 1",
"fog 1",
"flower 1", "flower 1",
], ],
} }

View file

@ -12,8 +12,8 @@ it('supports navigation via keyboard', async () => {
"input": "forest<>", "input": "forest<>",
"suggestions": [ "suggestions": [
"👉 forest 3", "👉 forest 3",
"fog 1",
"force field 1", "force field 1",
"fog 1",
"flower 1", "flower 1",
], ],
} }
@ -23,11 +23,11 @@ it('supports navigation via keyboard', async () => {
ctx.expectUi().toMatchInlineSnapshot(` ctx.expectUi().toMatchInlineSnapshot(`
{ {
"input": "fog<>", "input": "force field<>",
"suggestions": [ "suggestions": [
"forest 3", "forest 3",
"👉 fog 1", "👉 force field 1",
"force field 1", "fog 1",
"flower 1", "flower 1",
], ],
} }
@ -40,8 +40,8 @@ it('supports navigation via keyboard', async () => {
"input": "flower<>", "input": "flower<>",
"suggestions": [ "suggestions": [
"forest 3", "forest 3",
"fog 1",
"force field 1", "force field 1",
"fog 1",
"👉 flower 1", "👉 flower 1",
], ],
} }
@ -54,8 +54,8 @@ it('supports navigation via keyboard', async () => {
"input": "forest<>", "input": "forest<>",
"suggestions": [ "suggestions": [
"👉 forest 3", "👉 forest 3",
"fog 1",
"force field 1", "force field 1",
"fog 1",
"flower 1", "flower 1",
], ],
} }

View file

@ -54,6 +54,16 @@ describe('LocalAutocompleter', () => {
const joinMatchParts = (parts: MatchPart[]) => const joinMatchParts = (parts: MatchPart[]) =>
parts.map(part => (typeof part === 'string' ? part : `{${part.matched}}`)).join(''); parts.map(part => (typeof part === 'string' ? part : `{${part.matched}}`)).join('');
// Make sure results are ordered by (images, name)
expect(results).toEqual(
[...results].sort((a, b) => {
return (
b.images - a.images ||
joinMatchParts(b.alias ?? b.canonical).localeCompare(joinMatchParts(a.alias ?? a.canonical))
);
}),
);
const actual = results.map(result => { const actual = results.map(result => {
if (result.alias) { if (result.alias) {
return `${joinMatchParts(result.alias)} -> ${result.canonical} (${result.images})`; return `${joinMatchParts(result.alias)} -> ${result.canonical} (${result.images})`;
@ -97,8 +107,8 @@ describe('LocalAutocompleter', () => {
expectLocalAutocomplete(termStem).toMatchInlineSnapshot(` expectLocalAutocomplete(termStem).toMatchInlineSnapshot(`
[ [
"{fo}rest (3)", "{fo}rest (3)",
"{fo}g (1)",
"{fo}rce field (1)", "{fo}rce field (1)",
"{fo}g (1)",
] ]
`); `);
}); });

View file

@ -46,6 +46,10 @@ function nameInNamespace(s: Uint8Array): Uint8Array {
return s; return s;
} }
function identity<T>(value: T) {
return value;
}
/** /**
* See lib/philomena/autocomplete.ex for binary structure details. * See lib/philomena/autocomplete.ex for binary structure details.
* *
@ -55,37 +59,43 @@ function nameInNamespace(s: Uint8Array): Uint8Array {
export class LocalAutocompleter { export class LocalAutocompleter {
private encoder: TextEncoder; private encoder: TextEncoder;
private decoder: TextDecoder; private decoder: TextDecoder;
private data: Uint8Array;
private view: DataView; private view: DataView;
private numTags: number; private numTags: number;
private referenceStart: number; private referenceStart: number;
private secondaryStart: number; private secondaryStart: number;
private formatVersion: number; private hiddenTags: Set<number>;
private tagReferenceHeapStorage: Uint32Array;
/** /**
* Build a new local autocompleter. * Build a new local autocompleter from the compiled autocomplete index.
*/ */
constructor(backingStore: ArrayBuffer) { constructor(buffer: ArrayBuffer) {
this.encoder = new TextEncoder(); this.view = new DataView(buffer);
this.decoder = new TextDecoder();
this.data = new Uint8Array(backingStore);
this.view = new DataView(backingStore);
this.numTags = this.view.getUint32(backingStore.byteLength - 4, true);
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
this.secondaryStart = this.referenceStart + 8 * this.numTags;
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
if (this.formatVersion !== 2) { const formatVersion = this.view.getUint32(buffer.byteLength - 12, true);
if (formatVersion !== 2) {
throw new Error('Incompatible autocomplete format version'); throw new Error('Incompatible autocomplete format version');
} }
this.encoder = new TextEncoder();
this.decoder = new TextDecoder();
this.numTags = this.view.getUint32(buffer.byteLength - 4, true);
this.referenceStart = this.view.getUint32(buffer.byteLength - 8, true);
this.secondaryStart = this.referenceStart + 8 * this.numTags;
this.tagReferenceHeapStorage = new Uint32Array(this.numTags);
this.hiddenTags = new Set(window.booru.hiddenTagList);
} }
/** /**
* Return the pointer to tag data for the given reference index. * Return the pointer to tag data for the given reference index.
*/ */
private resolveTagReference(i: TagReferenceIndex, resolveAlias: boolean = true): TagPointer { private resolveTagReference(i: TagReferenceIndex, resolveAlias = true): TagPointer {
const tagPointer = this.view.getUint32(this.referenceStart + i * 8, true); const refPointer = this.referenceStart + i * 8;
const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true); const tagPointer = this.view.getUint32(refPointer, true);
const imageCount = this.view.getInt32(refPointer + 4, true);
if (resolveAlias && imageCount < 0) { if (resolveAlias && imageCount < 0) {
// This is actually an alias, so follow it // This is actually an alias, so follow it
@ -119,16 +129,17 @@ export class LocalAutocompleter {
/** /**
* Return the name buffer of the pointed-to result. * Return the name buffer of the pointed-to result.
*/ */
private referenceToName(i: TagReferenceIndex, resolveAlias: boolean = true): Uint8Array { private referenceToName(i: TagReferenceIndex, resolveAlias = true): Uint8Array {
const pointer = this.resolveTagReference(i, resolveAlias); const pointer = this.resolveTagReference(i, resolveAlias);
const nameLength = this.view.getUint8(pointer); const nameLength = this.view.getUint8(pointer);
return this.data.slice(pointer + 1, pointer + nameLength + 1);
return new Uint8Array(this.view.buffer, pointer + 1, nameLength);
} }
/** /**
* Return whether any associations in the pointed-to result are in comparisonValues. * Return `true` if any associated tags are hidden for this tag.
*/ */
private isFilteredByReference(comparisonValues: Set<number>, i: TagReferenceIndex): boolean { private isHiddenTag(i: TagReferenceIndex): boolean {
const pointer = this.resolveTagReference(i); const pointer = this.resolveTagReference(i);
const nameLength = this.view.getUint8(pointer); const nameLength = this.view.getUint8(pointer);
const assnLength = this.view.getUint8(pointer + 1 + nameLength); const assnLength = this.view.getUint8(pointer + 1 + nameLength);
@ -136,7 +147,7 @@ export class LocalAutocompleter {
for (let j = 0; j < assnLength; j++) { for (let j = 0; j < assnLength; j++) {
const assnValue = this.view.getUint32(pointer + 1 + nameLength + 1 + j * 4, true); const assnValue = this.view.getUint32(pointer + 1 + nameLength + 1 + j * 4, true);
if (comparisonValues.has(assnValue)) { if (this.hiddenTags.has(assnValue)) {
return true; return true;
} }
} }
@ -145,7 +156,10 @@ export class LocalAutocompleter {
} }
/** /**
* Return whether Result a is considered less than Result b. * Return a number with the result of the comparison.
* `=0` - means both tags are equal
* `>0` - means `a` is greater than `b`
* `<0` - means `b` is greater than `a`
*/ */
private compareReferenceToReference(a: TagReferenceIndex, b: TagReferenceIndex): number { private compareReferenceToReference(a: TagReferenceIndex, b: TagReferenceIndex): number {
const imagesA = this.getImageCount(a); const imagesA = this.getImageCount(a);
@ -155,16 +169,17 @@ export class LocalAutocompleter {
return imagesA - imagesB; return imagesA - imagesB;
} }
const nameA = this.referenceToName(a); const nameA = this.referenceToName(a, false);
const nameB = this.referenceToName(a); const nameB = this.referenceToName(b, false);
return strcmp(nameA, nameB); return strcmp(nameA, nameB);
} }
/** /**
* Get a Result object as the ith tag inside the file, secondary ordering. * Get a tag reference from the secondary index that is ordered by tag names
* stripped from their namespace.
*/ */
private getSecondaryResultAt(i: number): TagReferenceIndex { private getSecondaryReferenceAt(i: number): TagReferenceIndex {
return this.view.getUint32(this.secondaryStart + i * 4, true); return this.view.getUint32(this.secondaryStart + i * 4, true);
} }
@ -172,21 +187,29 @@ export class LocalAutocompleter {
* Perform a binary search with a subsequent forward scan to fetch all results * Perform a binary search with a subsequent forward scan to fetch all results
* matching a `compare` condition. * matching a `compare` condition.
*/ */
private scanResults( private queryIndex({
getResult: (i: number) => TagReferenceIndex, prefix,
compare: (result: TagReferenceIndex) => number, mapName,
hasFilteredAssociation: (result: TagReferenceIndex) => boolean, mapIndex,
isAlias: (result: TagReferenceIndex) => boolean, results,
results: UniqueHeap<TagReferenceIndex>, }: {
) { prefix: Uint8Array;
mapName(name: Uint8Array): Uint8Array;
mapIndex(index: number): TagReferenceIndex;
results: UniqueHeap<TagReferenceIndex>;
}) {
const filter = !store.get('unfilter_tag_suggestions'); const filter = !store.get('unfilter_tag_suggestions');
let min = 0; let min = 0;
let max = this.numTags; let max = this.numTags;
const compare = (index: TagReferenceIndex) => {
return strcmp(mapName(this.referenceToName(index, false)).slice(0, prefix.length), prefix);
};
while (min < max - 1) { while (min < max - 1) {
const med = min + (((max - min) / 2) | 0); const med = min + (((max - min) / 2) | 0);
const referenceIndex = getResult(med); const referenceIndex = mapIndex(med);
if (compare(referenceIndex) >= 0) { if (compare(referenceIndex) >= 0) {
// too large, go left // too large, go left
@ -199,19 +222,19 @@ export class LocalAutocompleter {
// Scan forward until no more matches occur // Scan forward until no more matches occur
while (min < this.numTags - 1) { while (min < this.numTags - 1) {
const referenceIndex = getResult(++min); const referenceIndex = mapIndex(++min);
if (compare(referenceIndex) !== 0) { if (compare(referenceIndex) !== 0) {
break; break;
} }
// Check if any associations are filtered // Check if any associations are filtered
if (filter && hasFilteredAssociation(referenceIndex)) { if (filter && this.isHiddenTag(referenceIndex)) {
continue; continue;
} }
// Nothing was filtered, so add // Nothing was filtered, so add
results.append(referenceIndex, !isAlias(referenceIndex)); results.append(referenceIndex, !this.tagReferenceIsAlias(referenceIndex));
} }
} }
@ -228,25 +251,27 @@ export class LocalAutocompleter {
const results = new UniqueHeap<TagReferenceIndex>( const results = new UniqueHeap<TagReferenceIndex>(
this.compareReferenceToReference.bind(this), this.compareReferenceToReference.bind(this),
this.resolveTagReference.bind(this), this.resolveTagReference.bind(this),
new Uint32Array(this.numTags),
// We don't need to clear the buffer after previous usages. The `UniqueHeap`
// tracks the length of the used area internally.
this.tagReferenceHeapStorage,
); );
// Set up filter context // Find tags ordered by their full name
const hiddenTags = new Set(window.booru.hiddenTagList); this.queryIndex({
const hasFilteredAssociation = this.isFilteredByReference.bind(this, hiddenTags); mapIndex: identity,
const isAlias = this.tagReferenceIsAlias.bind(this); mapName: identity,
prefix,
// Find tags ordered by full name results,
const prefixMatch = (i: TagReferenceIndex) => });
strcmp(this.referenceToName(i, false).slice(0, prefix.length), prefix);
const referenceToNameIndex = (i: number) => i;
this.scanResults(referenceToNameIndex, prefixMatch, hasFilteredAssociation, isAlias, results);
// Find tags ordered by name in namespace // Find tags ordered by name in namespace
const namespaceMatch = (i: TagReferenceIndex) => this.queryIndex({
strcmp(nameInNamespace(this.referenceToName(i, false)).slice(0, prefix.length), prefix); mapIndex: this.getSecondaryReferenceAt.bind(this),
const referenceToAliasIndex = this.getSecondaryResultAt.bind(this); mapName: nameInNamespace,
this.scanResults(referenceToAliasIndex, namespaceMatch, hasFilteredAssociation, isAlias, results); prefix,
results,
});
// Convert top K from heap into result array // Convert top K from heap into result array
return results.topK(k).map((i: TagReferenceIndex) => { return results.topK(k).map((i: TagReferenceIndex) => {

View file

@ -20,7 +20,7 @@ export class UniqueHeap<T> {
this.unique = unique; this.unique = unique;
} }
append(value: T, forceReplace: boolean = false) { append(value: T, forceReplace = false) {
const key = this.unique(value); const key = this.unique(value);
const prevIndex = this.keys.get(key); const prevIndex = this.keys.get(key);