mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-30 14:57:59 +01:00
Add lex stage with intermediate result generation
This commit is contained in:
parent
194b2686f6
commit
c0ddf55b48
6 changed files with 68 additions and 30 deletions
|
@ -1,5 +1,5 @@
|
||||||
import { $ } from './utils/dom';
|
import { $ } from './utils/dom';
|
||||||
import parseSearch from './match_query';
|
import { parseSearch } from './match_query';
|
||||||
import store from './utils/store';
|
import store from './utils/store';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import { defaultMatcher } from './query/matcher';
|
import { defaultMatcher } from './query/matcher';
|
||||||
import { generateLexArray } from './query/lex';
|
import { generateLexArray, generateLexResult } from './query/lex';
|
||||||
import { parseTokens } from './query/parse';
|
import { parseTokens } from './query/parse';
|
||||||
import { getAstMatcherForTerm } from './query/term';
|
import { getAstMatcherForTerm } from './query/term';
|
||||||
|
|
||||||
|
@ -7,9 +7,11 @@ function parseWithDefaultMatcher(term: string, fuzz: number) {
|
||||||
return getAstMatcherForTerm(term, fuzz, defaultMatcher);
|
return getAstMatcherForTerm(term, fuzz, defaultMatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseSearch(query: string) {
|
export function parseSearch(query: string) {
|
||||||
const tokens = generateLexArray(query, parseWithDefaultMatcher);
|
const tokens = generateLexArray(query, parseWithDefaultMatcher);
|
||||||
return parseTokens(tokens);
|
return parseTokens(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default parseSearch;
|
export function getTermContexts(query: string) {
|
||||||
|
return generateLexResult(query, parseWithDefaultMatcher).termContexts;
|
||||||
|
}
|
||||||
|
|
|
@ -170,8 +170,8 @@ describe('Lexical analysis', () => {
|
||||||
expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']);
|
expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should throw exception on mismatched parentheses', () => {
|
it('should mark error on mismatched parentheses', () => {
|
||||||
expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.');
|
expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.');
|
||||||
// expect(() => generateLexArray(')bad', parseTerm)).toThrow('Mismatched parentheses.');
|
// expect(() => generateLexArray(')bad', parseTerm).error).toThrow('Mismatched parentheses.');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -22,10 +22,18 @@ const tokenList: Token[] = [
|
||||||
|
|
||||||
export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher;
|
export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher;
|
||||||
|
|
||||||
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
|
export type Range = [number, number];
|
||||||
|
export type TermContext = [Range, string];
|
||||||
|
|
||||||
|
export interface LexResult {
|
||||||
|
tokenList: TokenList,
|
||||||
|
termContexts: TermContext[],
|
||||||
|
error: ParseError | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export function generateLexResult(searchStr: string, parseTerm: ParseTerm): LexResult {
|
||||||
const opQueue: string[] = [],
|
const opQueue: string[] = [],
|
||||||
groupNegate: boolean[] = [],
|
groupNegate: boolean[] = [];
|
||||||
tokenStack: TokenList = [];
|
|
||||||
|
|
||||||
let searchTerm: string | null = null;
|
let searchTerm: string | null = null;
|
||||||
let boostFuzzStr = '';
|
let boostFuzzStr = '';
|
||||||
|
@ -35,10 +43,25 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
let fuzz = 0;
|
let fuzz = 0;
|
||||||
let lparenCtr = 0;
|
let lparenCtr = 0;
|
||||||
|
|
||||||
const pushTerm = () => {
|
let termIndex = 0;
|
||||||
|
let index = 0;
|
||||||
|
|
||||||
|
const ret: LexResult = {
|
||||||
|
tokenList: [],
|
||||||
|
termContexts: [],
|
||||||
|
error: null
|
||||||
|
};
|
||||||
|
|
||||||
|
const beginTerm = (token: string) => {
|
||||||
|
searchTerm = token;
|
||||||
|
termIndex = index;
|
||||||
|
};
|
||||||
|
|
||||||
|
const endTerm = () => {
|
||||||
if (searchTerm !== null) {
|
if (searchTerm !== null) {
|
||||||
// Push to stack.
|
// Push to stack.
|
||||||
tokenStack.push(parseTerm(searchTerm, fuzz, boost));
|
ret.tokenList.push(parseTerm(searchTerm, fuzz, boost));
|
||||||
|
ret.termContexts.push([[termIndex, termIndex + searchTerm.length], searchTerm]);
|
||||||
// Reset term and options data.
|
// Reset term and options data.
|
||||||
boost = 1;
|
boost = 1;
|
||||||
fuzz = 0;
|
fuzz = 0;
|
||||||
|
@ -48,7 +71,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
}
|
}
|
||||||
|
|
||||||
if (negate) {
|
if (negate) {
|
||||||
tokenStack.push('not_op');
|
ret.tokenList.push('not_op');
|
||||||
negate = false;
|
negate = false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -64,19 +87,19 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
const token = match[0];
|
const token = match[0];
|
||||||
|
|
||||||
if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) {
|
if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) {
|
||||||
pushTerm();
|
endTerm();
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (tokenName) {
|
switch (tokenName) {
|
||||||
case 'and_op':
|
case 'and_op':
|
||||||
while (opQueue[0] === 'and_op') {
|
while (opQueue[0] === 'and_op') {
|
||||||
tokenStack.push(assertNotUndefined(opQueue.shift()));
|
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
|
||||||
}
|
}
|
||||||
opQueue.unshift('and_op');
|
opQueue.unshift('and_op');
|
||||||
break;
|
break;
|
||||||
case 'or_op':
|
case 'or_op':
|
||||||
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
|
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
|
||||||
tokenStack.push(assertNotUndefined(opQueue.shift()));
|
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
|
||||||
}
|
}
|
||||||
opQueue.unshift('or_op');
|
opQueue.unshift('or_op');
|
||||||
break;
|
break;
|
||||||
|
@ -113,10 +136,10 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
if (op === 'lparen') {
|
if (op === 'lparen') {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
tokenStack.push(op);
|
ret.tokenList.push(op);
|
||||||
}
|
}
|
||||||
if (groupNegate.length > 0 && groupNegate.pop()) {
|
if (groupNegate.length > 0 && groupNegate.pop()) {
|
||||||
tokenStack.push('not_op');
|
ret.tokenList.push('not_op');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -128,7 +151,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
boostFuzzStr += token;
|
boostFuzzStr += token;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
searchTerm = token;
|
beginTerm(token);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'boost':
|
case 'boost':
|
||||||
|
@ -137,7 +160,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
boostFuzzStr += token;
|
boostFuzzStr += token;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
searchTerm = token;
|
beginTerm(token);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'quoted_lit':
|
case 'quoted_lit':
|
||||||
|
@ -145,7 +168,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
searchTerm += token;
|
searchTerm += token;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
searchTerm = token;
|
beginTerm(token);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'word':
|
case 'word':
|
||||||
|
@ -159,7 +182,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
searchTerm += token;
|
searchTerm += token;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
searchTerm = token;
|
beginTerm(token);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -171,6 +194,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
|
|
||||||
// Truncate string and restart the token tests.
|
// Truncate string and restart the token tests.
|
||||||
localSearchStr = localSearchStr.substring(token.length);
|
localSearchStr = localSearchStr.substring(token.length);
|
||||||
|
index += token.length;
|
||||||
|
|
||||||
// Break since we have found a match.
|
// Break since we have found a match.
|
||||||
break;
|
break;
|
||||||
|
@ -178,14 +202,24 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append final tokens to the stack.
|
// Append final tokens to the stack.
|
||||||
pushTerm();
|
endTerm();
|
||||||
|
|
||||||
if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) {
|
if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) {
|
||||||
throw new ParseError('Mismatched parentheses.');
|
ret.error = new ParseError('Mismatched parentheses.');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Concatenatte remaining operators to the token stack.
|
// Concatenate remaining operators to the token stack.
|
||||||
tokenStack.push(...opQueue);
|
ret.tokenList.push(...opQueue);
|
||||||
|
|
||||||
return tokenStack;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
|
||||||
|
const ret = generateLexResult(searchStr, parseTerm);
|
||||||
|
|
||||||
|
if (ret.error) {
|
||||||
|
throw ret.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret.tokenList;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,9 +4,11 @@ import { AstMatcher, ParseError, TokenList } from './types';
|
||||||
export function parseTokens(lexicalArray: TokenList): AstMatcher {
|
export function parseTokens(lexicalArray: TokenList): AstMatcher {
|
||||||
const operandStack: AstMatcher[] = [];
|
const operandStack: AstMatcher[] = [];
|
||||||
|
|
||||||
lexicalArray.forEach((token, i) => {
|
for (let i = 0; i < lexicalArray.length; i += 1) {
|
||||||
|
const token = lexicalArray[i];
|
||||||
|
|
||||||
if (token === 'not_op') {
|
if (token === 'not_op') {
|
||||||
return;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let intermediate: AstMatcher;
|
let intermediate: AstMatcher;
|
||||||
|
@ -36,7 +38,7 @@ export function parseTokens(lexicalArray: TokenList): AstMatcher {
|
||||||
else {
|
else {
|
||||||
operandStack.push(intermediate);
|
operandStack.push(intermediate);
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
|
||||||
if (operandStack.length > 1) {
|
if (operandStack.length > 1) {
|
||||||
throw new ParseError('Missing operator.');
|
throw new ParseError('Missing operator.');
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag';
|
import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag';
|
||||||
import { mockStorage } from '../../../test/mock-storage';
|
import { mockStorage } from '../../../test/mock-storage';
|
||||||
import { getRandomArrayItem } from '../../../test/randomness';
|
import { getRandomArrayItem } from '../../../test/randomness';
|
||||||
import parseSearch from '../../match_query';
|
import { parseSearch } from '../../match_query';
|
||||||
import { SpoilerType } from '../../../types/booru-object';
|
import { SpoilerType } from '../../../types/booru-object';
|
||||||
|
|
||||||
describe('Tag utilities', () => {
|
describe('Tag utilities', () => {
|
||||||
|
|
Loading…
Reference in a new issue