Increase memory efficiency of local autocomplete

This commit is contained in:
Liam 2024-08-13 17:16:47 -04:00
parent 70145f3926
commit 6e64e4b6f0
6 changed files with 239 additions and 52 deletions

View file

@ -125,7 +125,7 @@ export default tsEslint.config(
'no-irregular-whitespace': 2, 'no-irregular-whitespace': 2,
'no-iterator': 2, 'no-iterator': 2,
'no-label-var': 2, 'no-label-var': 2,
'no-labels': 2, 'no-labels': [2, { allowSwitch: true, allowLoop: true }],
'no-lone-blocks': 2, 'no-lone-blocks': 2,
'no-lonely-if': 0, 'no-lonely-if': 0,
'no-loop-func': 2, 'no-loop-func': 2,

View file

@ -237,7 +237,8 @@ function listenAutocomplete() {
} }
const suggestions = localAc const suggestions = localAc
.topK(originalTerm, suggestionsCount) .matchPrefix(originalTerm)
.topK(suggestionsCount)
.map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name })); .map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
if (suggestions.length) { if (suggestions.length) {

View file

@ -58,42 +58,44 @@ describe('Local Autocompleter', () => {
}); });
it('should return suggestions for exact tag name match', () => { it('should return suggestions for exact tag name match', () => {
const result = localAc.topK('safe', defaultK); const result = localAc.matchPrefix('safe').topK(defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'safe', imageCount: 6 })]); expect(result).toEqual([expect.objectContaining({ aliasName: 'safe', name: 'safe', imageCount: 6 })]);
}); });
it('should return suggestion for original tag when passed an alias', () => { it('should return suggestion for original tag when passed an alias', () => {
const result = localAc.topK('flowers', defaultK); const result = localAc.matchPrefix('flowers').topK(defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'flower', imageCount: 1 })]); expect(result).toEqual([expect.objectContaining({ aliasName: 'flowers', name: 'flower', imageCount: 1 })]);
}); });
it('should return suggestions sorted by image count', () => { it('should return suggestions sorted by image count', () => {
const result = localAc.topK(termStem, defaultK); const result = localAc.matchPrefix(termStem).topK(defaultK);
expect(result).toEqual([ expect(result).toEqual([
expect.objectContaining({ name: 'forest', imageCount: 3 }), expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 }),
expect.objectContaining({ name: 'fog', imageCount: 1 }), expect.objectContaining({ aliasName: 'fog', name: 'fog', imageCount: 1 }),
expect.objectContaining({ name: 'force field', imageCount: 1 }), expect.objectContaining({ aliasName: 'force field', name: 'force field', imageCount: 1 }),
]); ]);
}); });
it('should return namespaced suggestions without including namespace', () => { it('should return namespaced suggestions without including namespace', () => {
const result = localAc.topK('test', defaultK); const result = localAc.matchPrefix('test').topK(defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'artist:test', imageCount: 1 })]); expect(result).toEqual([
expect.objectContaining({ aliasName: 'artist:test', name: 'artist:test', imageCount: 1 }),
]);
}); });
it('should return only the required number of suggestions', () => { it('should return only the required number of suggestions', () => {
const result = localAc.topK(termStem, 1); const result = localAc.matchPrefix(termStem).topK(1);
expect(result).toEqual([expect.objectContaining({ name: 'forest', imageCount: 3 })]); expect(result).toEqual([expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 })]);
}); });
it('should NOT return suggestions associated with hidden tags', () => { it('should NOT return suggestions associated with hidden tags', () => {
window.booru.hiddenTagList = [1]; window.booru.hiddenTagList = [1];
const result = localAc.topK(termStem, defaultK); const result = localAc.matchPrefix(termStem).topK(defaultK);
expect(result).toEqual([]); expect(result).toEqual([]);
}); });
it('should return empty array for empty prefix', () => { it('should return empty array for empty prefix', () => {
const result = localAc.topK('', defaultK); const result = localAc.matchPrefix('').topK(defaultK);
expect(result).toEqual([]); expect(result).toEqual([]);
}); });
}); });

View file

@ -0,0 +1,70 @@
import { UniqueHeap } from '../unique-heap';
describe('Unique Heap', () => {
interface Result {
name: string;
}
function compare(a: Result, b: Result): boolean {
return a.name < b.name;
}
test('it should return no results when empty', () => {
const heap = new UniqueHeap<Result>(compare, 'name');
expect(heap.topK(5)).toEqual([]);
});
test("doesn't insert duplicate results", () => {
const heap = new UniqueHeap<Result>(compare, 'name');
heap.append({ name: 'name' });
heap.append({ name: 'name' });
expect(heap.topK(2)).toEqual([expect.objectContaining({ name: 'name' })]);
});
test('it should return results in reverse sorted order', () => {
const heap = new UniqueHeap<Result>(compare, 'name');
const names = [
'alpha',
'beta',
'gamma',
'delta',
'epsilon',
'zeta',
'eta',
'theta',
'iota',
'kappa',
'lambda',
'mu',
'nu',
'xi',
'omicron',
'pi',
'rho',
'sigma',
'tau',
'upsilon',
'phi',
'chi',
'psi',
'omega',
];
for (const name of names) {
heap.append({ name });
}
const results = heap.topK(5);
expect(results).toEqual([
expect.objectContaining({ name: 'zeta' }),
expect.objectContaining({ name: 'xi' }),
expect.objectContaining({ name: 'upsilon' }),
expect.objectContaining({ name: 'theta' }),
expect.objectContaining({ name: 'tau' }),
]);
});
});

View file

@ -1,12 +1,21 @@
// Client-side tag completion. // Client-side tag completion.
import { UniqueHeap } from './unique-heap';
import store from './store'; import store from './store';
interface Result { export interface Result {
aliasName: string;
name: string; name: string;
imageCount: number; imageCount: number;
associations: number[]; associations: number[];
} }
/**
* Returns whether Result a is considered less than Result b.
*/
function compareResult(a: Result, b: Result): boolean {
return a.imageCount === b.imageCount ? a.name > b.name : a.imageCount < b.imageCount;
}
/** /**
* Compare two strings, C-style. * Compare two strings, C-style.
*/ */
@ -18,10 +27,13 @@ function strcmp(a: string, b: string): number {
* Returns the name of a tag without any namespace component. * Returns the name of a tag without any namespace component.
*/ */
function nameInNamespace(s: string): string { function nameInNamespace(s: string): string {
const v = s.split(':', 2); const first = s.indexOf(':');
if (v.length === 2) return v[1]; if (first !== -1) {
return v[0]; return s.slice(first + 1);
}
return s;
} }
/** /**
@ -59,7 +71,7 @@ export class LocalAutocompleter {
/** /**
* Get a tag's name and its associations given a byte location inside the file. * Get a tag's name and its associations given a byte location inside the file.
*/ */
getTagFromLocation(location: number): [string, number[]] { private getTagFromLocation(location: number, imageCount: number, aliasName?: string): Result {
const nameLength = this.view.getUint8(location); const nameLength = this.view.getUint8(location);
const assnLength = this.view.getUint8(location + 1 + nameLength); const assnLength = this.view.getUint8(location + 1 + nameLength);
@ -70,29 +82,29 @@ export class LocalAutocompleter {
associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true)); associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true));
} }
return [name, associations]; return { aliasName: aliasName || name, name, imageCount, associations };
} }
/** /**
* Get a Result object as the ith tag inside the file. * Get a Result object as the ith tag inside the file.
*/ */
getResultAt(i: number): [string, Result] { private getResultAt(i: number, aliasName?: string): Result {
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true); const tagLocation = this.view.getUint32(this.referenceStart + i * 8, true);
const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true); const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true);
const [name, associations] = this.getTagFromLocation(nameLocation); const result = this.getTagFromLocation(tagLocation, imageCount, aliasName);
if (imageCount < 0) { if (imageCount < 0) {
// This is actually an alias, so follow it // This is actually an alias, so follow it
return [name, this.getResultAt(-imageCount - 1)[1]]; return this.getResultAt(-imageCount - 1, aliasName || result.name);
} }
return [name, { name, imageCount, associations }]; return result;
} }
/** /**
* Get a Result object as the ith tag inside the file, secondary ordering. * Get a Result object as the ith tag inside the file, secondary ordering.
*/ */
getSecondaryResultAt(i: number): [string, Result] { private getSecondaryResultAt(i: number): Result {
const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true); const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true);
return this.getResultAt(referenceIndex); return this.getResultAt(referenceIndex);
} }
@ -100,23 +112,22 @@ export class LocalAutocompleter {
/** /**
* Perform a binary search to fetch all results matching a condition. * Perform a binary search to fetch all results matching a condition.
*/ */
scanResults( private scanResults(
getResult: (i: number) => [string, Result], getResult: (i: number) => Result,
compare: (name: string) => number, compare: (name: string) => number,
results: Record<string, Result>, results: UniqueHeap<Result>,
hiddenTags: Set<number>,
) { ) {
const unfilter = store.get('unfilter_tag_suggestions'); const filter = !store.get('unfilter_tag_suggestions');
let min = 0; let min = 0;
let max = this.numTags; let max = this.numTags;
const hiddenTags = window.booru.hiddenTagList;
while (min < max - 1) { while (min < max - 1) {
const med = (min + (max - min) / 2) | 0; const med = min + (((max - min) / 2) | 0);
const sortKey = getResult(med)[0]; const result = getResult(med);
if (compare(sortKey) >= 0) { if (compare(result.aliasName) >= 0) {
// too large, go left // too large, go left
max = med; max = med;
} else { } else {
@ -126,40 +137,47 @@ export class LocalAutocompleter {
} }
// Scan forward until no more matches occur // Scan forward until no more matches occur
while (min < this.numTags - 1) { outer: while (min < this.numTags - 1) {
const [sortKey, result] = getResult(++min); const result = getResult(++min);
if (compare(sortKey) !== 0) {
if (compare(result.aliasName) !== 0) {
break; break;
} }
// Add if not filtering or no associations are filtered // Check if any associations are filtered
if (unfilter || hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) { if (filter) {
results[result.name] = result; for (const association of result.associations) {
if (hiddenTags.has(association)) {
continue outer;
} }
} }
} }
// Nothing was filtered, so add
results.append(result);
}
}
/** /**
* Find the top k results by image count which match the given string prefix. * Find the top k results by image count which match the given string prefix.
*/ */
topK(prefix: string, k: number): Result[] { matchPrefix(prefix: string): UniqueHeap<Result> {
const results: Record<string, Result> = {}; const results = new UniqueHeap<Result>(compareResult, 'name');
if (prefix === '') { if (prefix === '') {
return []; return results;
} }
const hiddenTags = new Set(window.booru.hiddenTagList);
// Find normally, in full name-sorted order // Find normally, in full name-sorted order
const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix); const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix);
this.scanResults(this.getResultAt.bind(this), prefixMatch, results); this.scanResults(this.getResultAt.bind(this), prefixMatch, results, hiddenTags);
// Find in secondary order // Find in secondary order
const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix); const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix);
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results); this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results, hiddenTags);
// Sort results by image count return results;
const sorted = Object.values(results).sort((a, b) => b.imageCount - a.imageCount);
return sorted.slice(0, k);
} }
} }

View file

@ -0,0 +1,96 @@
export type Compare<T> = (a: T, b: T) => boolean;
export class UniqueHeap<T extends object> {
private keys: Set<unknown>;
private values: T[];
private keyName: keyof T;
private compare: Compare<T>;
constructor(compare: Compare<T>, keyName: keyof T) {
this.keys = new Set();
this.values = [];
this.keyName = keyName;
this.compare = compare;
}
append(value: T) {
const key = value[this.keyName];
if (!this.keys.has(key)) {
this.keys.add(key);
this.values.push(value);
}
}
topK(k: number): T[] {
// Create the output array.
const output: T[] = [];
for (const result of this.results()) {
if (output.length >= k) {
break;
}
output.push(result);
}
return output;
}
*results(): Generator<T, void, void> {
const { values } = this;
const length = values.length;
// Build the heap.
for (let i = (length >> 1) - 1; i >= 0; i--) {
this.heapify(length, i);
}
// Begin extracting values.
for (let i = 0; i < length; i++) {
// Top value is the largest.
yield values[0];
// Swap with the element at the end.
const lastIndex = length - i - 1;
values[0] = values[lastIndex];
// Restore top value being the largest.
this.heapify(lastIndex, 0);
}
}
private heapify(length: number, initialIndex: number) {
const { compare, values } = this;
let i = initialIndex;
while (true) {
const left = 2 * i + 1;
const right = 2 * i + 2;
let largest = i;
if (left < length && compare(values[largest], values[left])) {
// Left child is in-bounds and larger than parent. Swap with left.
largest = left;
}
if (right < length && compare(values[largest], values[right])) {
// Right child is in-bounds and larger than parent or left. Swap with right.
largest = right;
}
if (largest === i) {
// Largest value was already the parent. Done.
return;
}
// Swap.
const temp = values[i];
values[i] = values[largest];
values[largest] = temp;
// Repair the subtree previously containing the largest element.
i = largest;
}
}
}