From c0ddf55b48a1348bb88337d70684c81d7732b94e Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 28 May 2024 22:54:45 -0400 Subject: [PATCH] Add lex stage with intermediate result generation --- assets/js/booru.js | 2 +- assets/js/match_query.ts | 8 +-- assets/js/query/__tests__/lex.spec.ts | 4 +- assets/js/query/lex.ts | 74 +++++++++++++++++++-------- assets/js/query/parse.ts | 8 +-- assets/js/utils/__tests__/tag.spec.ts | 2 +- 6 files changed, 68 insertions(+), 30 deletions(-) diff --git a/assets/js/booru.js b/assets/js/booru.js index 03d0dff8..1f963dbe 100644 --- a/assets/js/booru.js +++ b/assets/js/booru.js @@ -1,5 +1,5 @@ import { $ } from './utils/dom'; -import parseSearch from './match_query'; +import { parseSearch } from './match_query'; import store from './utils/store'; /** diff --git a/assets/js/match_query.ts b/assets/js/match_query.ts index 5ddafea3..d6142b94 100644 --- a/assets/js/match_query.ts +++ b/assets/js/match_query.ts @@ -1,5 +1,5 @@ import { defaultMatcher } from './query/matcher'; -import { generateLexArray } from './query/lex'; +import { generateLexArray, generateLexResult } from './query/lex'; import { parseTokens } from './query/parse'; import { getAstMatcherForTerm } from './query/term'; @@ -7,9 +7,11 @@ function parseWithDefaultMatcher(term: string, fuzz: number) { return getAstMatcherForTerm(term, fuzz, defaultMatcher); } -function parseSearch(query: string) { +export function parseSearch(query: string) { const tokens = generateLexArray(query, parseWithDefaultMatcher); return parseTokens(tokens); } -export default parseSearch; +export function getTermContexts(query: string) { + return generateLexResult(query, parseWithDefaultMatcher).termContexts; +} diff --git a/assets/js/query/__tests__/lex.spec.ts b/assets/js/query/__tests__/lex.spec.ts index 427f4dfb..19516a4a 100644 --- a/assets/js/query/__tests__/lex.spec.ts +++ b/assets/js/query/__tests__/lex.spec.ts @@ -170,8 +170,8 @@ describe('Lexical analysis', () => { expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']); }); - it('should throw exception on mismatched parentheses', () => { + it('should mark error on mismatched parentheses', () => { expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.'); - // expect(() => generateLexArray(')bad', parseTerm)).toThrow('Mismatched parentheses.'); + // expect(() => generateLexArray(')bad', parseTerm).error).toThrow('Mismatched parentheses.'); }); }); diff --git a/assets/js/query/lex.ts b/assets/js/query/lex.ts index d234b1c8..2c950bd1 100644 --- a/assets/js/query/lex.ts +++ b/assets/js/query/lex.ts @@ -22,10 +22,18 @@ const tokenList: Token[] = [ export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher; -export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList { +export type Range = [number, number]; +export type TermContext = [Range, string]; + +export interface LexResult { + tokenList: TokenList, + termContexts: TermContext[], + error: ParseError | null +} + +export function generateLexResult(searchStr: string, parseTerm: ParseTerm): LexResult { const opQueue: string[] = [], - groupNegate: boolean[] = [], - tokenStack: TokenList = []; + groupNegate: boolean[] = []; let searchTerm: string | null = null; let boostFuzzStr = ''; @@ -35,10 +43,25 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token let fuzz = 0; let lparenCtr = 0; - const pushTerm = () => { + let termIndex = 0; + let index = 0; + + const ret: LexResult = { + tokenList: [], + termContexts: [], + error: null + }; + + const beginTerm = (token: string) => { + searchTerm = token; + termIndex = index; + }; + + const endTerm = () => { if (searchTerm !== null) { // Push to stack. - tokenStack.push(parseTerm(searchTerm, fuzz, boost)); + ret.tokenList.push(parseTerm(searchTerm, fuzz, boost)); + ret.termContexts.push([[termIndex, termIndex + searchTerm.length], searchTerm]); // Reset term and options data. boost = 1; fuzz = 0; @@ -48,7 +71,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token } if (negate) { - tokenStack.push('not_op'); + ret.tokenList.push('not_op'); negate = false; } }; @@ -64,19 +87,19 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token const token = match[0]; if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) { - pushTerm(); + endTerm(); } switch (tokenName) { case 'and_op': while (opQueue[0] === 'and_op') { - tokenStack.push(assertNotUndefined(opQueue.shift())); + ret.tokenList.push(assertNotUndefined(opQueue.shift())); } opQueue.unshift('and_op'); break; case 'or_op': while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') { - tokenStack.push(assertNotUndefined(opQueue.shift())); + ret.tokenList.push(assertNotUndefined(opQueue.shift())); } opQueue.unshift('or_op'); break; @@ -113,10 +136,10 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token if (op === 'lparen') { break; } - tokenStack.push(op); + ret.tokenList.push(op); } if (groupNegate.length > 0 && groupNegate.pop()) { - tokenStack.push('not_op'); + ret.tokenList.push('not_op'); } } break; @@ -128,7 +151,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token boostFuzzStr += token; } else { - searchTerm = token; + beginTerm(token); } break; case 'boost': @@ -137,7 +160,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token boostFuzzStr += token; } else { - searchTerm = token; + beginTerm(token); } break; case 'quoted_lit': @@ -145,7 +168,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token searchTerm += token; } else { - searchTerm = token; + beginTerm(token); } break; case 'word': @@ -159,7 +182,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token searchTerm += token; } else { - searchTerm = token; + beginTerm(token); } break; default: @@ -171,6 +194,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token // Truncate string and restart the token tests. localSearchStr = localSearchStr.substring(token.length); + index += token.length; // Break since we have found a match. break; @@ -178,14 +202,24 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token } // Append final tokens to the stack. - pushTerm(); + endTerm(); if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) { - throw new ParseError('Mismatched parentheses.'); + ret.error = new ParseError('Mismatched parentheses.'); } - // Concatenatte remaining operators to the token stack. - tokenStack.push(...opQueue); + // Concatenate remaining operators to the token stack. + ret.tokenList.push(...opQueue); - return tokenStack; + return ret; +} + +export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList { + const ret = generateLexResult(searchStr, parseTerm); + + if (ret.error) { + throw ret.error; + } + + return ret.tokenList; } diff --git a/assets/js/query/parse.ts b/assets/js/query/parse.ts index f5a09fcc..fea7659b 100644 --- a/assets/js/query/parse.ts +++ b/assets/js/query/parse.ts @@ -4,9 +4,11 @@ import { AstMatcher, ParseError, TokenList } from './types'; export function parseTokens(lexicalArray: TokenList): AstMatcher { const operandStack: AstMatcher[] = []; - lexicalArray.forEach((token, i) => { + for (let i = 0; i < lexicalArray.length; i += 1) { + const token = lexicalArray[i]; + if (token === 'not_op') { - return; + continue; } let intermediate: AstMatcher; @@ -36,7 +38,7 @@ export function parseTokens(lexicalArray: TokenList): AstMatcher { else { operandStack.push(intermediate); } - }); + } if (operandStack.length > 1) { throw new ParseError('Missing operator.'); diff --git a/assets/js/utils/__tests__/tag.spec.ts b/assets/js/utils/__tests__/tag.spec.ts index 44bc565f..61a196b8 100644 --- a/assets/js/utils/__tests__/tag.spec.ts +++ b/assets/js/utils/__tests__/tag.spec.ts @@ -1,7 +1,7 @@ import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag'; import { mockStorage } from '../../../test/mock-storage'; import { getRandomArrayItem } from '../../../test/randomness'; -import parseSearch from '../../match_query'; +import { parseSearch } from '../../match_query'; import { SpoilerType } from '../../../types/booru-object'; describe('Tag utilities', () => {