Increase memory efficiency of local autocomplete

This commit is contained in:
Liam 2024-08-13 17:16:47 -04:00
parent 70145f3926
commit 6e64e4b6f0
6 changed files with 239 additions and 52 deletions

View file

@ -125,7 +125,7 @@ export default tsEslint.config(
'no-irregular-whitespace': 2,
'no-iterator': 2,
'no-label-var': 2,
'no-labels': 2,
'no-labels': [2, { allowSwitch: true, allowLoop: true }],
'no-lone-blocks': 2,
'no-lonely-if': 0,
'no-loop-func': 2,

View file

@ -237,7 +237,8 @@ function listenAutocomplete() {
}
const suggestions = localAc
.topK(originalTerm, suggestionsCount)
.matchPrefix(originalTerm)
.topK(suggestionsCount)
.map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
if (suggestions.length) {

View file

@ -58,42 +58,44 @@ describe('Local Autocompleter', () => {
});
it('should return suggestions for exact tag name match', () => {
const result = localAc.topK('safe', defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'safe', imageCount: 6 })]);
const result = localAc.matchPrefix('safe').topK(defaultK);
expect(result).toEqual([expect.objectContaining({ aliasName: 'safe', name: 'safe', imageCount: 6 })]);
});
it('should return suggestion for original tag when passed an alias', () => {
const result = localAc.topK('flowers', defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'flower', imageCount: 1 })]);
const result = localAc.matchPrefix('flowers').topK(defaultK);
expect(result).toEqual([expect.objectContaining({ aliasName: 'flowers', name: 'flower', imageCount: 1 })]);
});
it('should return suggestions sorted by image count', () => {
const result = localAc.topK(termStem, defaultK);
const result = localAc.matchPrefix(termStem).topK(defaultK);
expect(result).toEqual([
expect.objectContaining({ name: 'forest', imageCount: 3 }),
expect.objectContaining({ name: 'fog', imageCount: 1 }),
expect.objectContaining({ name: 'force field', imageCount: 1 }),
expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 }),
expect.objectContaining({ aliasName: 'fog', name: 'fog', imageCount: 1 }),
expect.objectContaining({ aliasName: 'force field', name: 'force field', imageCount: 1 }),
]);
});
it('should return namespaced suggestions without including namespace', () => {
const result = localAc.topK('test', defaultK);
expect(result).toEqual([expect.objectContaining({ name: 'artist:test', imageCount: 1 })]);
const result = localAc.matchPrefix('test').topK(defaultK);
expect(result).toEqual([
expect.objectContaining({ aliasName: 'artist:test', name: 'artist:test', imageCount: 1 }),
]);
});
it('should return only the required number of suggestions', () => {
const result = localAc.topK(termStem, 1);
expect(result).toEqual([expect.objectContaining({ name: 'forest', imageCount: 3 })]);
const result = localAc.matchPrefix(termStem).topK(1);
expect(result).toEqual([expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 })]);
});
it('should NOT return suggestions associated with hidden tags', () => {
window.booru.hiddenTagList = [1];
const result = localAc.topK(termStem, defaultK);
const result = localAc.matchPrefix(termStem).topK(defaultK);
expect(result).toEqual([]);
});
it('should return empty array for empty prefix', () => {
const result = localAc.topK('', defaultK);
const result = localAc.matchPrefix('').topK(defaultK);
expect(result).toEqual([]);
});
});

View file

@ -0,0 +1,70 @@
import { UniqueHeap } from '../unique-heap';
describe('Unique Heap', () => {
interface Result {
name: string;
}
function compare(a: Result, b: Result): boolean {
return a.name < b.name;
}
test('it should return no results when empty', () => {
const heap = new UniqueHeap<Result>(compare, 'name');
expect(heap.topK(5)).toEqual([]);
});
test("doesn't insert duplicate results", () => {
const heap = new UniqueHeap<Result>(compare, 'name');
heap.append({ name: 'name' });
heap.append({ name: 'name' });
expect(heap.topK(2)).toEqual([expect.objectContaining({ name: 'name' })]);
});
test('it should return results in reverse sorted order', () => {
const heap = new UniqueHeap<Result>(compare, 'name');
const names = [
'alpha',
'beta',
'gamma',
'delta',
'epsilon',
'zeta',
'eta',
'theta',
'iota',
'kappa',
'lambda',
'mu',
'nu',
'xi',
'omicron',
'pi',
'rho',
'sigma',
'tau',
'upsilon',
'phi',
'chi',
'psi',
'omega',
];
for (const name of names) {
heap.append({ name });
}
const results = heap.topK(5);
expect(results).toEqual([
expect.objectContaining({ name: 'zeta' }),
expect.objectContaining({ name: 'xi' }),
expect.objectContaining({ name: 'upsilon' }),
expect.objectContaining({ name: 'theta' }),
expect.objectContaining({ name: 'tau' }),
]);
});
});

View file

@ -1,12 +1,21 @@
// Client-side tag completion.
import { UniqueHeap } from './unique-heap';
import store from './store';
interface Result {
export interface Result {
aliasName: string;
name: string;
imageCount: number;
associations: number[];
}
/**
* Returns whether Result a is considered less than Result b.
*/
function compareResult(a: Result, b: Result): boolean {
return a.imageCount === b.imageCount ? a.name > b.name : a.imageCount < b.imageCount;
}
/**
* Compare two strings, C-style.
*/
@ -18,10 +27,13 @@ function strcmp(a: string, b: string): number {
* Returns the name of a tag without any namespace component.
*/
function nameInNamespace(s: string): string {
const v = s.split(':', 2);
const first = s.indexOf(':');
if (v.length === 2) return v[1];
return v[0];
if (first !== -1) {
return s.slice(first + 1);
}
return s;
}
/**
@ -59,7 +71,7 @@ export class LocalAutocompleter {
/**
* Get a tag's name and its associations given a byte location inside the file.
*/
getTagFromLocation(location: number): [string, number[]] {
private getTagFromLocation(location: number, imageCount: number, aliasName?: string): Result {
const nameLength = this.view.getUint8(location);
const assnLength = this.view.getUint8(location + 1 + nameLength);
@ -70,29 +82,29 @@ export class LocalAutocompleter {
associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true));
}
return [name, associations];
return { aliasName: aliasName || name, name, imageCount, associations };
}
/**
* Get a Result object as the ith tag inside the file.
*/
getResultAt(i: number): [string, Result] {
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
private getResultAt(i: number, aliasName?: string): Result {
const tagLocation = this.view.getUint32(this.referenceStart + i * 8, true);
const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true);
const [name, associations] = this.getTagFromLocation(nameLocation);
const result = this.getTagFromLocation(tagLocation, imageCount, aliasName);
if (imageCount < 0) {
// This is actually an alias, so follow it
return [name, this.getResultAt(-imageCount - 1)[1]];
return this.getResultAt(-imageCount - 1, aliasName || result.name);
}
return [name, { name, imageCount, associations }];
return result;
}
/**
* Get a Result object as the ith tag inside the file, secondary ordering.
*/
getSecondaryResultAt(i: number): [string, Result] {
private getSecondaryResultAt(i: number): Result {
const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true);
return this.getResultAt(referenceIndex);
}
@ -100,23 +112,22 @@ export class LocalAutocompleter {
/**
* Perform a binary search to fetch all results matching a condition.
*/
scanResults(
getResult: (i: number) => [string, Result],
private scanResults(
getResult: (i: number) => Result,
compare: (name: string) => number,
results: Record<string, Result>,
results: UniqueHeap<Result>,
hiddenTags: Set<number>,
) {
const unfilter = store.get('unfilter_tag_suggestions');
const filter = !store.get('unfilter_tag_suggestions');
let min = 0;
let max = this.numTags;
const hiddenTags = window.booru.hiddenTagList;
while (min < max - 1) {
const med = (min + (max - min) / 2) | 0;
const sortKey = getResult(med)[0];
const med = min + (((max - min) / 2) | 0);
const result = getResult(med);
if (compare(sortKey) >= 0) {
if (compare(result.aliasName) >= 0) {
// too large, go left
max = med;
} else {
@ -126,40 +137,47 @@ export class LocalAutocompleter {
}
// Scan forward until no more matches occur
while (min < this.numTags - 1) {
const [sortKey, result] = getResult(++min);
if (compare(sortKey) !== 0) {
outer: while (min < this.numTags - 1) {
const result = getResult(++min);
if (compare(result.aliasName) !== 0) {
break;
}
// Add if not filtering or no associations are filtered
if (unfilter || hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
results[result.name] = result;
// Check if any associations are filtered
if (filter) {
for (const association of result.associations) {
if (hiddenTags.has(association)) {
continue outer;
}
}
}
// Nothing was filtered, so add
results.append(result);
}
}
/**
* Find the top k results by image count which match the given string prefix.
*/
topK(prefix: string, k: number): Result[] {
const results: Record<string, Result> = {};
matchPrefix(prefix: string): UniqueHeap<Result> {
const results = new UniqueHeap<Result>(compareResult, 'name');
if (prefix === '') {
return [];
return results;
}
const hiddenTags = new Set(window.booru.hiddenTagList);
// Find normally, in full name-sorted order
const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix);
this.scanResults(this.getResultAt.bind(this), prefixMatch, results);
this.scanResults(this.getResultAt.bind(this), prefixMatch, results, hiddenTags);
// Find in secondary order
const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix);
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results);
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results, hiddenTags);
// Sort results by image count
const sorted = Object.values(results).sort((a, b) => b.imageCount - a.imageCount);
return sorted.slice(0, k);
return results;
}
}

View file

@ -0,0 +1,96 @@
export type Compare<T> = (a: T, b: T) => boolean;
export class UniqueHeap<T extends object> {
private keys: Set<unknown>;
private values: T[];
private keyName: keyof T;
private compare: Compare<T>;
constructor(compare: Compare<T>, keyName: keyof T) {
this.keys = new Set();
this.values = [];
this.keyName = keyName;
this.compare = compare;
}
append(value: T) {
const key = value[this.keyName];
if (!this.keys.has(key)) {
this.keys.add(key);
this.values.push(value);
}
}
topK(k: number): T[] {
// Create the output array.
const output: T[] = [];
for (const result of this.results()) {
if (output.length >= k) {
break;
}
output.push(result);
}
return output;
}
*results(): Generator<T, void, void> {
const { values } = this;
const length = values.length;
// Build the heap.
for (let i = (length >> 1) - 1; i >= 0; i--) {
this.heapify(length, i);
}
// Begin extracting values.
for (let i = 0; i < length; i++) {
// Top value is the largest.
yield values[0];
// Swap with the element at the end.
const lastIndex = length - i - 1;
values[0] = values[lastIndex];
// Restore top value being the largest.
this.heapify(lastIndex, 0);
}
}
private heapify(length: number, initialIndex: number) {
const { compare, values } = this;
let i = initialIndex;
while (true) {
const left = 2 * i + 1;
const right = 2 * i + 2;
let largest = i;
if (left < length && compare(values[largest], values[left])) {
// Left child is in-bounds and larger than parent. Swap with left.
largest = left;
}
if (right < length && compare(values[largest], values[right])) {
// Right child is in-bounds and larger than parent or left. Swap with right.
largest = right;
}
if (largest === i) {
// Largest value was already the parent. Done.
return;
}
// Swap.
const temp = values[i];
values[i] = values[largest];
values[largest] = temp;
// Repair the subtree previously containing the largest element.
i = largest;
}
}
}