Add lex stage with intermediate result generation

This commit is contained in:
Liam 2024-05-28 22:54:45 -04:00
parent 194b2686f6
commit c0ddf55b48
6 changed files with 68 additions and 30 deletions

View file

@ -1,5 +1,5 @@
import { $ } from './utils/dom'; import { $ } from './utils/dom';
import parseSearch from './match_query'; import { parseSearch } from './match_query';
import store from './utils/store'; import store from './utils/store';
/** /**

View file

@ -1,5 +1,5 @@
import { defaultMatcher } from './query/matcher'; import { defaultMatcher } from './query/matcher';
import { generateLexArray } from './query/lex'; import { generateLexArray, generateLexResult } from './query/lex';
import { parseTokens } from './query/parse'; import { parseTokens } from './query/parse';
import { getAstMatcherForTerm } from './query/term'; import { getAstMatcherForTerm } from './query/term';
@ -7,9 +7,11 @@ function parseWithDefaultMatcher(term: string, fuzz: number) {
return getAstMatcherForTerm(term, fuzz, defaultMatcher); return getAstMatcherForTerm(term, fuzz, defaultMatcher);
} }
function parseSearch(query: string) { export function parseSearch(query: string) {
const tokens = generateLexArray(query, parseWithDefaultMatcher); const tokens = generateLexArray(query, parseWithDefaultMatcher);
return parseTokens(tokens); return parseTokens(tokens);
} }
export default parseSearch; export function getTermContexts(query: string) {
return generateLexResult(query, parseWithDefaultMatcher).termContexts;
}

View file

@ -170,8 +170,8 @@ describe('Lexical analysis', () => {
expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']); expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']);
}); });
it('should throw exception on mismatched parentheses', () => { it('should mark error on mismatched parentheses', () => {
expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.'); expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.');
// expect(() => generateLexArray(')bad', parseTerm)).toThrow('Mismatched parentheses.'); // expect(() => generateLexArray(')bad', parseTerm).error).toThrow('Mismatched parentheses.');
}); });
}); });

View file

@ -22,10 +22,18 @@ const tokenList: Token[] = [
export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher; export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher;
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList { export type Range = [number, number];
export type TermContext = [Range, string];
export interface LexResult {
tokenList: TokenList,
termContexts: TermContext[],
error: ParseError | null
}
export function generateLexResult(searchStr: string, parseTerm: ParseTerm): LexResult {
const opQueue: string[] = [], const opQueue: string[] = [],
groupNegate: boolean[] = [], groupNegate: boolean[] = [];
tokenStack: TokenList = [];
let searchTerm: string | null = null; let searchTerm: string | null = null;
let boostFuzzStr = ''; let boostFuzzStr = '';
@ -35,10 +43,25 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
let fuzz = 0; let fuzz = 0;
let lparenCtr = 0; let lparenCtr = 0;
const pushTerm = () => { let termIndex = 0;
let index = 0;
const ret: LexResult = {
tokenList: [],
termContexts: [],
error: null
};
const beginTerm = (token: string) => {
searchTerm = token;
termIndex = index;
};
const endTerm = () => {
if (searchTerm !== null) { if (searchTerm !== null) {
// Push to stack. // Push to stack.
tokenStack.push(parseTerm(searchTerm, fuzz, boost)); ret.tokenList.push(parseTerm(searchTerm, fuzz, boost));
ret.termContexts.push([[termIndex, termIndex + searchTerm.length], searchTerm]);
// Reset term and options data. // Reset term and options data.
boost = 1; boost = 1;
fuzz = 0; fuzz = 0;
@ -48,7 +71,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
} }
if (negate) { if (negate) {
tokenStack.push('not_op'); ret.tokenList.push('not_op');
negate = false; negate = false;
} }
}; };
@ -64,19 +87,19 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
const token = match[0]; const token = match[0];
if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) { if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) {
pushTerm(); endTerm();
} }
switch (tokenName) { switch (tokenName) {
case 'and_op': case 'and_op':
while (opQueue[0] === 'and_op') { while (opQueue[0] === 'and_op') {
tokenStack.push(assertNotUndefined(opQueue.shift())); ret.tokenList.push(assertNotUndefined(opQueue.shift()));
} }
opQueue.unshift('and_op'); opQueue.unshift('and_op');
break; break;
case 'or_op': case 'or_op':
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') { while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
tokenStack.push(assertNotUndefined(opQueue.shift())); ret.tokenList.push(assertNotUndefined(opQueue.shift()));
} }
opQueue.unshift('or_op'); opQueue.unshift('or_op');
break; break;
@ -113,10 +136,10 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
if (op === 'lparen') { if (op === 'lparen') {
break; break;
} }
tokenStack.push(op); ret.tokenList.push(op);
} }
if (groupNegate.length > 0 && groupNegate.pop()) { if (groupNegate.length > 0 && groupNegate.pop()) {
tokenStack.push('not_op'); ret.tokenList.push('not_op');
} }
} }
break; break;
@ -128,7 +151,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
boostFuzzStr += token; boostFuzzStr += token;
} }
else { else {
searchTerm = token; beginTerm(token);
} }
break; break;
case 'boost': case 'boost':
@ -137,7 +160,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
boostFuzzStr += token; boostFuzzStr += token;
} }
else { else {
searchTerm = token; beginTerm(token);
} }
break; break;
case 'quoted_lit': case 'quoted_lit':
@ -145,7 +168,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
searchTerm += token; searchTerm += token;
} }
else { else {
searchTerm = token; beginTerm(token);
} }
break; break;
case 'word': case 'word':
@ -159,7 +182,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
searchTerm += token; searchTerm += token;
} }
else { else {
searchTerm = token; beginTerm(token);
} }
break; break;
default: default:
@ -171,6 +194,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
// Truncate string and restart the token tests. // Truncate string and restart the token tests.
localSearchStr = localSearchStr.substring(token.length); localSearchStr = localSearchStr.substring(token.length);
index += token.length;
// Break since we have found a match. // Break since we have found a match.
break; break;
@ -178,14 +202,24 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
} }
// Append final tokens to the stack. // Append final tokens to the stack.
pushTerm(); endTerm();
if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) { if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) {
throw new ParseError('Mismatched parentheses.'); ret.error = new ParseError('Mismatched parentheses.');
} }
// Concatenatte remaining operators to the token stack. // Concatenate remaining operators to the token stack.
tokenStack.push(...opQueue); ret.tokenList.push(...opQueue);
return tokenStack; return ret;
}
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
const ret = generateLexResult(searchStr, parseTerm);
if (ret.error) {
throw ret.error;
}
return ret.tokenList;
} }

View file

@ -4,9 +4,11 @@ import { AstMatcher, ParseError, TokenList } from './types';
export function parseTokens(lexicalArray: TokenList): AstMatcher { export function parseTokens(lexicalArray: TokenList): AstMatcher {
const operandStack: AstMatcher[] = []; const operandStack: AstMatcher[] = [];
lexicalArray.forEach((token, i) => { for (let i = 0; i < lexicalArray.length; i += 1) {
const token = lexicalArray[i];
if (token === 'not_op') { if (token === 'not_op') {
return; continue;
} }
let intermediate: AstMatcher; let intermediate: AstMatcher;
@ -36,7 +38,7 @@ export function parseTokens(lexicalArray: TokenList): AstMatcher {
else { else {
operandStack.push(intermediate); operandStack.push(intermediate);
} }
}); }
if (operandStack.length > 1) { if (operandStack.length > 1) {
throw new ParseError('Missing operator.'); throw new ParseError('Missing operator.');

View file

@ -1,7 +1,7 @@
import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag'; import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag';
import { mockStorage } from '../../../test/mock-storage'; import { mockStorage } from '../../../test/mock-storage';
import { getRandomArrayItem } from '../../../test/randomness'; import { getRandomArrayItem } from '../../../test/randomness';
import parseSearch from '../../match_query'; import { parseSearch } from '../../match_query';
import { SpoilerType } from '../../../types/booru-object'; import { SpoilerType } from '../../../types/booru-object';
describe('Tag utilities', () => { describe('Tag utilities', () => {