mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 05:37:59 +01:00
Add lex stage with intermediate result generation
This commit is contained in:
parent
194b2686f6
commit
c0ddf55b48
6 changed files with 68 additions and 30 deletions
|
@ -1,5 +1,5 @@
|
|||
import { $ } from './utils/dom';
|
||||
import parseSearch from './match_query';
|
||||
import { parseSearch } from './match_query';
|
||||
import store from './utils/store';
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import { defaultMatcher } from './query/matcher';
|
||||
import { generateLexArray } from './query/lex';
|
||||
import { generateLexArray, generateLexResult } from './query/lex';
|
||||
import { parseTokens } from './query/parse';
|
||||
import { getAstMatcherForTerm } from './query/term';
|
||||
|
||||
|
@ -7,9 +7,11 @@ function parseWithDefaultMatcher(term: string, fuzz: number) {
|
|||
return getAstMatcherForTerm(term, fuzz, defaultMatcher);
|
||||
}
|
||||
|
||||
function parseSearch(query: string) {
|
||||
export function parseSearch(query: string) {
|
||||
const tokens = generateLexArray(query, parseWithDefaultMatcher);
|
||||
return parseTokens(tokens);
|
||||
}
|
||||
|
||||
export default parseSearch;
|
||||
export function getTermContexts(query: string) {
|
||||
return generateLexResult(query, parseWithDefaultMatcher).termContexts;
|
||||
}
|
||||
|
|
|
@ -170,8 +170,8 @@ describe('Lexical analysis', () => {
|
|||
expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']);
|
||||
});
|
||||
|
||||
it('should throw exception on mismatched parentheses', () => {
|
||||
it('should mark error on mismatched parentheses', () => {
|
||||
expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.');
|
||||
// expect(() => generateLexArray(')bad', parseTerm)).toThrow('Mismatched parentheses.');
|
||||
// expect(() => generateLexArray(')bad', parseTerm).error).toThrow('Mismatched parentheses.');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -22,10 +22,18 @@ const tokenList: Token[] = [
|
|||
|
||||
export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher;
|
||||
|
||||
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
|
||||
export type Range = [number, number];
|
||||
export type TermContext = [Range, string];
|
||||
|
||||
export interface LexResult {
|
||||
tokenList: TokenList,
|
||||
termContexts: TermContext[],
|
||||
error: ParseError | null
|
||||
}
|
||||
|
||||
export function generateLexResult(searchStr: string, parseTerm: ParseTerm): LexResult {
|
||||
const opQueue: string[] = [],
|
||||
groupNegate: boolean[] = [],
|
||||
tokenStack: TokenList = [];
|
||||
groupNegate: boolean[] = [];
|
||||
|
||||
let searchTerm: string | null = null;
|
||||
let boostFuzzStr = '';
|
||||
|
@ -35,10 +43,25 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
let fuzz = 0;
|
||||
let lparenCtr = 0;
|
||||
|
||||
const pushTerm = () => {
|
||||
let termIndex = 0;
|
||||
let index = 0;
|
||||
|
||||
const ret: LexResult = {
|
||||
tokenList: [],
|
||||
termContexts: [],
|
||||
error: null
|
||||
};
|
||||
|
||||
const beginTerm = (token: string) => {
|
||||
searchTerm = token;
|
||||
termIndex = index;
|
||||
};
|
||||
|
||||
const endTerm = () => {
|
||||
if (searchTerm !== null) {
|
||||
// Push to stack.
|
||||
tokenStack.push(parseTerm(searchTerm, fuzz, boost));
|
||||
ret.tokenList.push(parseTerm(searchTerm, fuzz, boost));
|
||||
ret.termContexts.push([[termIndex, termIndex + searchTerm.length], searchTerm]);
|
||||
// Reset term and options data.
|
||||
boost = 1;
|
||||
fuzz = 0;
|
||||
|
@ -48,7 +71,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
}
|
||||
|
||||
if (negate) {
|
||||
tokenStack.push('not_op');
|
||||
ret.tokenList.push('not_op');
|
||||
negate = false;
|
||||
}
|
||||
};
|
||||
|
@ -64,19 +87,19 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
const token = match[0];
|
||||
|
||||
if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) {
|
||||
pushTerm();
|
||||
endTerm();
|
||||
}
|
||||
|
||||
switch (tokenName) {
|
||||
case 'and_op':
|
||||
while (opQueue[0] === 'and_op') {
|
||||
tokenStack.push(assertNotUndefined(opQueue.shift()));
|
||||
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
|
||||
}
|
||||
opQueue.unshift('and_op');
|
||||
break;
|
||||
case 'or_op':
|
||||
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
|
||||
tokenStack.push(assertNotUndefined(opQueue.shift()));
|
||||
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
|
||||
}
|
||||
opQueue.unshift('or_op');
|
||||
break;
|
||||
|
@ -113,10 +136,10 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
if (op === 'lparen') {
|
||||
break;
|
||||
}
|
||||
tokenStack.push(op);
|
||||
ret.tokenList.push(op);
|
||||
}
|
||||
if (groupNegate.length > 0 && groupNegate.pop()) {
|
||||
tokenStack.push('not_op');
|
||||
ret.tokenList.push('not_op');
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -128,7 +151,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
boostFuzzStr += token;
|
||||
}
|
||||
else {
|
||||
searchTerm = token;
|
||||
beginTerm(token);
|
||||
}
|
||||
break;
|
||||
case 'boost':
|
||||
|
@ -137,7 +160,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
boostFuzzStr += token;
|
||||
}
|
||||
else {
|
||||
searchTerm = token;
|
||||
beginTerm(token);
|
||||
}
|
||||
break;
|
||||
case 'quoted_lit':
|
||||
|
@ -145,7 +168,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
searchTerm += token;
|
||||
}
|
||||
else {
|
||||
searchTerm = token;
|
||||
beginTerm(token);
|
||||
}
|
||||
break;
|
||||
case 'word':
|
||||
|
@ -159,7 +182,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
searchTerm += token;
|
||||
}
|
||||
else {
|
||||
searchTerm = token;
|
||||
beginTerm(token);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -171,6 +194,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
|
||||
// Truncate string and restart the token tests.
|
||||
localSearchStr = localSearchStr.substring(token.length);
|
||||
index += token.length;
|
||||
|
||||
// Break since we have found a match.
|
||||
break;
|
||||
|
@ -178,14 +202,24 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
|||
}
|
||||
|
||||
// Append final tokens to the stack.
|
||||
pushTerm();
|
||||
endTerm();
|
||||
|
||||
if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) {
|
||||
throw new ParseError('Mismatched parentheses.');
|
||||
ret.error = new ParseError('Mismatched parentheses.');
|
||||
}
|
||||
|
||||
// Concatenatte remaining operators to the token stack.
|
||||
tokenStack.push(...opQueue);
|
||||
// Concatenate remaining operators to the token stack.
|
||||
ret.tokenList.push(...opQueue);
|
||||
|
||||
return tokenStack;
|
||||
return ret;
|
||||
}
|
||||
|
||||
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
|
||||
const ret = generateLexResult(searchStr, parseTerm);
|
||||
|
||||
if (ret.error) {
|
||||
throw ret.error;
|
||||
}
|
||||
|
||||
return ret.tokenList;
|
||||
}
|
||||
|
|
|
@ -4,9 +4,11 @@ import { AstMatcher, ParseError, TokenList } from './types';
|
|||
export function parseTokens(lexicalArray: TokenList): AstMatcher {
|
||||
const operandStack: AstMatcher[] = [];
|
||||
|
||||
lexicalArray.forEach((token, i) => {
|
||||
for (let i = 0; i < lexicalArray.length; i += 1) {
|
||||
const token = lexicalArray[i];
|
||||
|
||||
if (token === 'not_op') {
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
|
||||
let intermediate: AstMatcher;
|
||||
|
@ -36,7 +38,7 @@ export function parseTokens(lexicalArray: TokenList): AstMatcher {
|
|||
else {
|
||||
operandStack.push(intermediate);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (operandStack.length > 1) {
|
||||
throw new ParseError('Missing operator.');
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag';
|
||||
import { mockStorage } from '../../../test/mock-storage';
|
||||
import { getRandomArrayItem } from '../../../test/randomness';
|
||||
import parseSearch from '../../match_query';
|
||||
import { parseSearch } from '../../match_query';
|
||||
import { SpoilerType } from '../../../types/booru-object';
|
||||
|
||||
describe('Tag utilities', () => {
|
||||
|
|
Loading…
Reference in a new issue