mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-01-19 14:17:59 +01:00
Increase memory efficiency of local autocomplete
This commit is contained in:
parent
70145f3926
commit
6e64e4b6f0
6 changed files with 239 additions and 52 deletions
|
@ -125,7 +125,7 @@ export default tsEslint.config(
|
|||
'no-irregular-whitespace': 2,
|
||||
'no-iterator': 2,
|
||||
'no-label-var': 2,
|
||||
'no-labels': 2,
|
||||
'no-labels': [2, { allowSwitch: true, allowLoop: true }],
|
||||
'no-lone-blocks': 2,
|
||||
'no-lonely-if': 0,
|
||||
'no-loop-func': 2,
|
||||
|
|
|
@ -237,7 +237,8 @@ function listenAutocomplete() {
|
|||
}
|
||||
|
||||
const suggestions = localAc
|
||||
.topK(originalTerm, suggestionsCount)
|
||||
.matchPrefix(originalTerm)
|
||||
.topK(suggestionsCount)
|
||||
.map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
|
||||
|
||||
if (suggestions.length) {
|
||||
|
|
|
@ -58,42 +58,44 @@ describe('Local Autocompleter', () => {
|
|||
});
|
||||
|
||||
it('should return suggestions for exact tag name match', () => {
|
||||
const result = localAc.topK('safe', defaultK);
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'safe', imageCount: 6 })]);
|
||||
const result = localAc.matchPrefix('safe').topK(defaultK);
|
||||
expect(result).toEqual([expect.objectContaining({ aliasName: 'safe', name: 'safe', imageCount: 6 })]);
|
||||
});
|
||||
|
||||
it('should return suggestion for original tag when passed an alias', () => {
|
||||
const result = localAc.topK('flowers', defaultK);
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'flower', imageCount: 1 })]);
|
||||
const result = localAc.matchPrefix('flowers').topK(defaultK);
|
||||
expect(result).toEqual([expect.objectContaining({ aliasName: 'flowers', name: 'flower', imageCount: 1 })]);
|
||||
});
|
||||
|
||||
it('should return suggestions sorted by image count', () => {
|
||||
const result = localAc.topK(termStem, defaultK);
|
||||
const result = localAc.matchPrefix(termStem).topK(defaultK);
|
||||
expect(result).toEqual([
|
||||
expect.objectContaining({ name: 'forest', imageCount: 3 }),
|
||||
expect.objectContaining({ name: 'fog', imageCount: 1 }),
|
||||
expect.objectContaining({ name: 'force field', imageCount: 1 }),
|
||||
expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 }),
|
||||
expect.objectContaining({ aliasName: 'fog', name: 'fog', imageCount: 1 }),
|
||||
expect.objectContaining({ aliasName: 'force field', name: 'force field', imageCount: 1 }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return namespaced suggestions without including namespace', () => {
|
||||
const result = localAc.topK('test', defaultK);
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'artist:test', imageCount: 1 })]);
|
||||
const result = localAc.matchPrefix('test').topK(defaultK);
|
||||
expect(result).toEqual([
|
||||
expect.objectContaining({ aliasName: 'artist:test', name: 'artist:test', imageCount: 1 }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return only the required number of suggestions', () => {
|
||||
const result = localAc.topK(termStem, 1);
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'forest', imageCount: 3 })]);
|
||||
const result = localAc.matchPrefix(termStem).topK(1);
|
||||
expect(result).toEqual([expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 })]);
|
||||
});
|
||||
|
||||
it('should NOT return suggestions associated with hidden tags', () => {
|
||||
window.booru.hiddenTagList = [1];
|
||||
const result = localAc.topK(termStem, defaultK);
|
||||
const result = localAc.matchPrefix(termStem).topK(defaultK);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty array for empty prefix', () => {
|
||||
const result = localAc.topK('', defaultK);
|
||||
const result = localAc.matchPrefix('').topK(defaultK);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
|
70
assets/js/utils/__tests__/unique-heap.spec.ts
Normal file
70
assets/js/utils/__tests__/unique-heap.spec.ts
Normal file
|
@ -0,0 +1,70 @@
|
|||
import { UniqueHeap } from '../unique-heap';
|
||||
|
||||
describe('Unique Heap', () => {
|
||||
interface Result {
|
||||
name: string;
|
||||
}
|
||||
|
||||
function compare(a: Result, b: Result): boolean {
|
||||
return a.name < b.name;
|
||||
}
|
||||
|
||||
test('it should return no results when empty', () => {
|
||||
const heap = new UniqueHeap<Result>(compare, 'name');
|
||||
expect(heap.topK(5)).toEqual([]);
|
||||
});
|
||||
|
||||
test("doesn't insert duplicate results", () => {
|
||||
const heap = new UniqueHeap<Result>(compare, 'name');
|
||||
|
||||
heap.append({ name: 'name' });
|
||||
heap.append({ name: 'name' });
|
||||
|
||||
expect(heap.topK(2)).toEqual([expect.objectContaining({ name: 'name' })]);
|
||||
});
|
||||
|
||||
test('it should return results in reverse sorted order', () => {
|
||||
const heap = new UniqueHeap<Result>(compare, 'name');
|
||||
|
||||
const names = [
|
||||
'alpha',
|
||||
'beta',
|
||||
'gamma',
|
||||
'delta',
|
||||
'epsilon',
|
||||
'zeta',
|
||||
'eta',
|
||||
'theta',
|
||||
'iota',
|
||||
'kappa',
|
||||
'lambda',
|
||||
'mu',
|
||||
'nu',
|
||||
'xi',
|
||||
'omicron',
|
||||
'pi',
|
||||
'rho',
|
||||
'sigma',
|
||||
'tau',
|
||||
'upsilon',
|
||||
'phi',
|
||||
'chi',
|
||||
'psi',
|
||||
'omega',
|
||||
];
|
||||
|
||||
for (const name of names) {
|
||||
heap.append({ name });
|
||||
}
|
||||
|
||||
const results = heap.topK(5);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({ name: 'zeta' }),
|
||||
expect.objectContaining({ name: 'xi' }),
|
||||
expect.objectContaining({ name: 'upsilon' }),
|
||||
expect.objectContaining({ name: 'theta' }),
|
||||
expect.objectContaining({ name: 'tau' }),
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -1,12 +1,21 @@
|
|||
// Client-side tag completion.
|
||||
import { UniqueHeap } from './unique-heap';
|
||||
import store from './store';
|
||||
|
||||
interface Result {
|
||||
export interface Result {
|
||||
aliasName: string;
|
||||
name: string;
|
||||
imageCount: number;
|
||||
associations: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether Result a is considered less than Result b.
|
||||
*/
|
||||
function compareResult(a: Result, b: Result): boolean {
|
||||
return a.imageCount === b.imageCount ? a.name > b.name : a.imageCount < b.imageCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two strings, C-style.
|
||||
*/
|
||||
|
@ -18,10 +27,13 @@ function strcmp(a: string, b: string): number {
|
|||
* Returns the name of a tag without any namespace component.
|
||||
*/
|
||||
function nameInNamespace(s: string): string {
|
||||
const v = s.split(':', 2);
|
||||
const first = s.indexOf(':');
|
||||
|
||||
if (v.length === 2) return v[1];
|
||||
return v[0];
|
||||
if (first !== -1) {
|
||||
return s.slice(first + 1);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -59,7 +71,7 @@ export class LocalAutocompleter {
|
|||
/**
|
||||
* Get a tag's name and its associations given a byte location inside the file.
|
||||
*/
|
||||
getTagFromLocation(location: number): [string, number[]] {
|
||||
private getTagFromLocation(location: number, imageCount: number, aliasName?: string): Result {
|
||||
const nameLength = this.view.getUint8(location);
|
||||
const assnLength = this.view.getUint8(location + 1 + nameLength);
|
||||
|
||||
|
@ -70,29 +82,29 @@ export class LocalAutocompleter {
|
|||
associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true));
|
||||
}
|
||||
|
||||
return [name, associations];
|
||||
return { aliasName: aliasName || name, name, imageCount, associations };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Result object as the ith tag inside the file.
|
||||
*/
|
||||
getResultAt(i: number): [string, Result] {
|
||||
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||
private getResultAt(i: number, aliasName?: string): Result {
|
||||
const tagLocation = this.view.getUint32(this.referenceStart + i * 8, true);
|
||||
const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true);
|
||||
const [name, associations] = this.getTagFromLocation(nameLocation);
|
||||
const result = this.getTagFromLocation(tagLocation, imageCount, aliasName);
|
||||
|
||||
if (imageCount < 0) {
|
||||
// This is actually an alias, so follow it
|
||||
return [name, this.getResultAt(-imageCount - 1)[1]];
|
||||
return this.getResultAt(-imageCount - 1, aliasName || result.name);
|
||||
}
|
||||
|
||||
return [name, { name, imageCount, associations }];
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Result object as the ith tag inside the file, secondary ordering.
|
||||
*/
|
||||
getSecondaryResultAt(i: number): [string, Result] {
|
||||
private getSecondaryResultAt(i: number): Result {
|
||||
const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true);
|
||||
return this.getResultAt(referenceIndex);
|
||||
}
|
||||
|
@ -100,23 +112,22 @@ export class LocalAutocompleter {
|
|||
/**
|
||||
* Perform a binary search to fetch all results matching a condition.
|
||||
*/
|
||||
scanResults(
|
||||
getResult: (i: number) => [string, Result],
|
||||
private scanResults(
|
||||
getResult: (i: number) => Result,
|
||||
compare: (name: string) => number,
|
||||
results: Record<string, Result>,
|
||||
results: UniqueHeap<Result>,
|
||||
hiddenTags: Set<number>,
|
||||
) {
|
||||
const unfilter = store.get('unfilter_tag_suggestions');
|
||||
const filter = !store.get('unfilter_tag_suggestions');
|
||||
|
||||
let min = 0;
|
||||
let max = this.numTags;
|
||||
|
||||
const hiddenTags = window.booru.hiddenTagList;
|
||||
|
||||
while (min < max - 1) {
|
||||
const med = (min + (max - min) / 2) | 0;
|
||||
const sortKey = getResult(med)[0];
|
||||
const med = min + (((max - min) / 2) | 0);
|
||||
const result = getResult(med);
|
||||
|
||||
if (compare(sortKey) >= 0) {
|
||||
if (compare(result.aliasName) >= 0) {
|
||||
// too large, go left
|
||||
max = med;
|
||||
} else {
|
||||
|
@ -126,40 +137,47 @@ export class LocalAutocompleter {
|
|||
}
|
||||
|
||||
// Scan forward until no more matches occur
|
||||
while (min < this.numTags - 1) {
|
||||
const [sortKey, result] = getResult(++min);
|
||||
if (compare(sortKey) !== 0) {
|
||||
outer: while (min < this.numTags - 1) {
|
||||
const result = getResult(++min);
|
||||
|
||||
if (compare(result.aliasName) !== 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add if not filtering or no associations are filtered
|
||||
if (unfilter || hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
|
||||
results[result.name] = result;
|
||||
// Check if any associations are filtered
|
||||
if (filter) {
|
||||
for (const association of result.associations) {
|
||||
if (hiddenTags.has(association)) {
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing was filtered, so add
|
||||
results.append(result);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the top k results by image count which match the given string prefix.
|
||||
*/
|
||||
topK(prefix: string, k: number): Result[] {
|
||||
const results: Record<string, Result> = {};
|
||||
matchPrefix(prefix: string): UniqueHeap<Result> {
|
||||
const results = new UniqueHeap<Result>(compareResult, 'name');
|
||||
|
||||
if (prefix === '') {
|
||||
return [];
|
||||
return results;
|
||||
}
|
||||
|
||||
const hiddenTags = new Set(window.booru.hiddenTagList);
|
||||
|
||||
// Find normally, in full name-sorted order
|
||||
const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix);
|
||||
this.scanResults(this.getResultAt.bind(this), prefixMatch, results);
|
||||
this.scanResults(this.getResultAt.bind(this), prefixMatch, results, hiddenTags);
|
||||
|
||||
// Find in secondary order
|
||||
const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix);
|
||||
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results);
|
||||
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results, hiddenTags);
|
||||
|
||||
// Sort results by image count
|
||||
const sorted = Object.values(results).sort((a, b) => b.imageCount - a.imageCount);
|
||||
|
||||
return sorted.slice(0, k);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
|
96
assets/js/utils/unique-heap.ts
Normal file
96
assets/js/utils/unique-heap.ts
Normal file
|
@ -0,0 +1,96 @@
|
|||
export type Compare<T> = (a: T, b: T) => boolean;
|
||||
|
||||
export class UniqueHeap<T extends object> {
|
||||
private keys: Set<unknown>;
|
||||
private values: T[];
|
||||
private keyName: keyof T;
|
||||
private compare: Compare<T>;
|
||||
|
||||
constructor(compare: Compare<T>, keyName: keyof T) {
|
||||
this.keys = new Set();
|
||||
this.values = [];
|
||||
this.keyName = keyName;
|
||||
this.compare = compare;
|
||||
}
|
||||
|
||||
append(value: T) {
|
||||
const key = value[this.keyName];
|
||||
|
||||
if (!this.keys.has(key)) {
|
||||
this.keys.add(key);
|
||||
this.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
topK(k: number): T[] {
|
||||
// Create the output array.
|
||||
const output: T[] = [];
|
||||
|
||||
for (const result of this.results()) {
|
||||
if (output.length >= k) {
|
||||
break;
|
||||
}
|
||||
|
||||
output.push(result);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
*results(): Generator<T, void, void> {
|
||||
const { values } = this;
|
||||
const length = values.length;
|
||||
|
||||
// Build the heap.
|
||||
for (let i = (length >> 1) - 1; i >= 0; i--) {
|
||||
this.heapify(length, i);
|
||||
}
|
||||
|
||||
// Begin extracting values.
|
||||
for (let i = 0; i < length; i++) {
|
||||
// Top value is the largest.
|
||||
yield values[0];
|
||||
|
||||
// Swap with the element at the end.
|
||||
const lastIndex = length - i - 1;
|
||||
values[0] = values[lastIndex];
|
||||
|
||||
// Restore top value being the largest.
|
||||
this.heapify(lastIndex, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private heapify(length: number, initialIndex: number) {
|
||||
const { compare, values } = this;
|
||||
let i = initialIndex;
|
||||
|
||||
while (true) {
|
||||
const left = 2 * i + 1;
|
||||
const right = 2 * i + 2;
|
||||
let largest = i;
|
||||
|
||||
if (left < length && compare(values[largest], values[left])) {
|
||||
// Left child is in-bounds and larger than parent. Swap with left.
|
||||
largest = left;
|
||||
}
|
||||
|
||||
if (right < length && compare(values[largest], values[right])) {
|
||||
// Right child is in-bounds and larger than parent or left. Swap with right.
|
||||
largest = right;
|
||||
}
|
||||
|
||||
if (largest === i) {
|
||||
// Largest value was already the parent. Done.
|
||||
return;
|
||||
}
|
||||
|
||||
// Swap.
|
||||
const temp = values[i];
|
||||
values[i] = values[largest];
|
||||
values[largest] = temp;
|
||||
|
||||
// Repair the subtree previously containing the largest element.
|
||||
i = largest;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue