philomena/assets/js/utils/local-autocompleter.ts

164 lines
4.8 KiB
TypeScript
Raw Normal View History

// Client-side tag completion.
import store from './store';
2021-12-27 01:16:21 +01:00
interface Result {
name: string;
imageCount: number;
associations: number[];
}
2021-12-27 01:16:21 +01:00
2021-12-30 01:52:15 +01:00
/**
* Compare two strings, C-style.
*/
2022-04-17 14:42:13 +02:00
function strcmp(a: string, b: string): number {
2021-12-30 01:52:15 +01:00
return a < b ? -1 : Number(a > b);
}
/**
* Returns the name of a tag without any namespace component.
*/
function nameInNamespace(s: string): string {
2021-12-30 01:52:15 +01:00
const v = s.split(':', 2);
if (v.length === 2) return v[1];
return v[0];
}
2021-12-27 01:16:21 +01:00
/**
* See lib/philomena/autocomplete.ex for binary structure details.
*
* A binary blob is used to avoid the creation of large amounts of garbage on
* the JS heap and speed up the execution of the search.
*/
export class LocalAutocompleter {
private data: Uint8Array;
private view: DataView;
private decoder: TextDecoder;
private numTags: number;
private referenceStart: number;
private secondaryStart: number;
private formatVersion: number;
2021-12-27 01:16:21 +01:00
/**
* Build a new local autocompleter.
*/
constructor(backingStore: ArrayBuffer) {
2021-12-27 01:16:21 +01:00
this.data = new Uint8Array(backingStore);
this.view = new DataView(backingStore);
this.decoder = new TextDecoder();
this.numTags = this.view.getUint32(backingStore.byteLength - 4, true);
this.referenceStart = this.view.getUint32(backingStore.byteLength - 8, true);
2021-12-30 01:52:15 +01:00
this.secondaryStart = this.referenceStart + 8 * this.numTags;
this.formatVersion = this.view.getUint32(backingStore.byteLength - 12, true);
2021-12-27 01:16:21 +01:00
2021-12-28 00:19:08 +01:00
if (this.formatVersion !== 2) {
2021-12-27 01:16:21 +01:00
throw new Error('Incompatible autocomplete format version');
}
}
/**
* Get a tag's name and its associations given a byte location inside the file.
*/
getTagFromLocation(location: number): [string, number[]] {
2021-12-27 01:16:21 +01:00
const nameLength = this.view.getUint8(location);
const assnLength = this.view.getUint8(location + 1 + nameLength);
/** @type {number[]} */
const associations = [];
const name = this.decoder.decode(this.data.slice(location + 1, location + nameLength + 1));
for (let i = 0; i < assnLength; i++) {
2021-12-30 04:15:14 +01:00
associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true));
2021-12-27 01:16:21 +01:00
}
return [ name, associations ];
}
/**
* Get a Result object as the ith tag inside the file.
*/
getResultAt(i: number): [string, Result] {
2021-12-27 01:16:21 +01:00
const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true);
2021-12-30 01:52:15 +01:00
const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true);
2021-12-30 02:08:41 +01:00
const [ name, associations ] = this.getTagFromLocation(nameLocation);
2021-12-28 00:19:08 +01:00
2021-12-30 01:52:15 +01:00
if (imageCount < 0) {
2021-12-28 00:19:08 +01:00
// This is actually an alias, so follow it
2021-12-30 03:13:39 +01:00
return [ name, this.getResultAt(-imageCount - 1)[1] ];
2021-12-28 00:19:08 +01:00
}
2021-12-30 02:08:41 +01:00
return [ name, { name, imageCount, associations } ];
2021-12-27 01:16:21 +01:00
}
2021-12-28 00:19:08 +01:00
/**
* Get a Result object as the ith tag inside the file, secondary ordering.
*/
getSecondaryResultAt(i: number): [string, Result] {
2021-12-30 01:52:15 +01:00
const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true);
2021-12-28 00:19:08 +01:00
return this.getResultAt(referenceIndex);
}
/**
2021-12-30 01:52:15 +01:00
* Perform a binary search to fetch all results matching a condition.
2021-12-28 00:19:08 +01:00
*/
scanResults(getResult: (i: number) => [string, Result], compare: (name: string) => number, results: Record<string, Result>) {
const unfilter = store.get('unfilter_tag_suggestions');
2021-12-30 01:52:15 +01:00
let min = 0;
let max = this.numTags;
2021-12-27 01:16:21 +01:00
const hiddenTags = window.booru.hiddenTagList;
2021-12-30 01:52:15 +01:00
while (min < max - 1) {
const med = min + (max - min) / 2 | 0;
2021-12-30 02:08:41 +01:00
const sortKey = getResult(med)[0];
2021-12-27 01:16:21 +01:00
2021-12-30 02:08:41 +01:00
if (compare(sortKey) >= 0) {
2021-12-27 01:16:21 +01:00
// too large, go left
2021-12-30 01:52:15 +01:00
max = med;
2021-12-27 01:16:21 +01:00
}
else {
// too small, go right
2021-12-30 01:52:15 +01:00
min = med;
2021-12-27 01:16:21 +01:00
}
}
// Scan forward until no more matches occur
2021-12-30 01:52:15 +01:00
while (min < this.numTags - 1) {
2021-12-30 02:08:41 +01:00
const [ sortKey, result ] = getResult(++min);
if (compare(sortKey) !== 0) {
2021-12-27 01:16:21 +01:00
break;
}
// Add if not filtering or no associations are filtered
if (unfilter || hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) {
2021-12-30 01:52:15 +01:00
results[result.name] = result;
2021-12-27 01:16:21 +01:00
}
}
2021-12-30 01:52:15 +01:00
}
2021-12-27 01:16:21 +01:00
2021-12-30 01:52:15 +01:00
/**
* Find the top k results by image count which match the given string prefix.
*/
2022-04-17 14:42:13 +02:00
topK(prefix: string, k: number): Result[] {
const results: Record<string, Result> = {};
2021-12-28 00:19:08 +01:00
2021-12-30 01:52:15 +01:00
if (prefix === '') {
return [];
2021-12-28 00:19:08 +01:00
}
2021-12-30 01:52:15 +01:00
// Find normally, in full name-sorted order
const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix);
2021-12-30 01:52:15 +01:00
this.scanResults(this.getResultAt.bind(this), prefixMatch, results);
2021-12-28 00:19:08 +01:00
2021-12-30 01:52:15 +01:00
// Find in secondary order
const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix);
2021-12-30 01:52:15 +01:00
this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results);
2021-12-28 00:19:08 +01:00
2021-12-27 01:16:21 +01:00
// Sort results by image count
2021-12-30 01:52:15 +01:00
const sorted = Object.values(results).sort((a, b) => b.imageCount - a.imageCount);
2021-12-27 01:16:21 +01:00
2021-12-30 01:52:15 +01:00
return sorted.slice(0, k);
2021-12-27 01:16:21 +01:00
}
}