philomena/assets/js/match_query.js

878 lines
23 KiB
JavaScript
Raw Normal View History

2019-10-05 02:09:52 +02:00
/**
* booru.match_query: A port and modification of the search_parser library for
* performing client-side filtering.
*/
const tokenList = [
['fuzz', /^~(?:\d+(\.\d+)?|\.\d+)/],
['boost', /^\^[-+]?\d+(\.\d+)?/],
['quoted_lit', /^\s*"(?:[^"]|\\")+"/],
2019-10-05 02:09:52 +02:00
['lparen', /^\s*\(\s*/],
['rparen', /^\s*\)\s*/],
['and_op', /^\s*(?:&&|AND)\s+/],
2019-10-05 02:09:52 +02:00
['and_op', /^\s*,\s*/],
['or_op', /^\s*(?:\|\||OR)\s+/],
['not_op', /^\s*NOT(?:\s+|(?=\())/],
['not_op', /^\s*[!-]\s*/],
2019-10-05 02:09:52 +02:00
['space', /^\s+/],
['word', /^(?:\\[\s,()^~]|[^\s,()^~])+/],
['word', /^(?:\\[\s,()]|[^\s,()])+/]
2019-10-05 02:09:52 +02:00
],
numberFields = ['id', 'width', 'height', 'aspect_ratio',
'comment_count', 'score', 'upvotes', 'downvotes',
'faves', 'tag_count'],
2019-10-05 02:09:52 +02:00
dateFields = ['created_at'],
literalFields = ['tags', 'orig_sha512_hash', 'sha512_hash',
'score', 'uploader', 'source_url', 'description'],
termSpaceToImageField = {
tags: 'data-image-tag-aliases',
score: 'data-score',
upvotes: 'data-upvotes',
downvotes: 'data-downvotes',
uploader: 'data-uploader',
// Yeah, I don't think this is reasonably supportable.
// faved_by: 'data-faved-by',
id: 'data-image-id',
width: 'data-width',
height: 'data-height',
/* eslint-disable camelcase */
2019-10-05 02:09:52 +02:00
aspect_ratio: 'data-aspect-ratio',
comment_count: 'data-comment-count',
tag_count: 'data-tag-count',
2019-10-05 02:09:52 +02:00
source_url: 'data-source-url',
faves: 'data-faves',
sha512_hash: 'data-sha512',
orig_sha512_hash: 'data-orig-sha512',
created_at: 'data-created-at'
/* eslint-enable camelcase */
2019-10-05 02:09:52 +02:00
};
function SearchTerm(termStr) {
2019-10-05 02:09:52 +02:00
this.term = termStr.trim();
this.parsed = false;
}
SearchTerm.prototype.append = function(substr) {
this.term += substr;
this.parsed = false;
};
SearchTerm.prototype.parseRangeField = function(field) {
if (numberFields.indexOf(field) !== -1) {
return [field, 'eq', 'number'];
}
if (dateFields.indexOf(field) !== -1) {
return [field, 'eq', 'date'];
}
const qual = /^(\w+)\.([lg]te?|eq)$/.exec(field);
2019-10-05 02:09:52 +02:00
if (qual) {
if (numberFields.indexOf(qual[1]) !== -1) {
return [qual[1], qual[2], 'number'];
}
if (dateFields.indexOf(qual[1]) !== -1) {
return [qual[1], qual[2], 'date'];
}
}
return null;
};
SearchTerm.prototype.parseRelativeDate = function(dateVal, qual) {
const match = /(\d+) (second|minute|hour|day|week|month|year)s? ago/.exec(dateVal);
const bounds = {
second: 1000,
minute: 60000,
hour: 3600000,
day: 86400000,
week: 604800000,
month: 2592000000,
year: 31536000000
};
if (match) {
const amount = parseInt(match[1], 10);
const scale = bounds[match[2]];
const now = new Date().getTime();
const bottomDate = new Date(now - (amount * scale));
const topDate = new Date(now - ((amount - 1) * scale));
switch (qual) {
case 'lte':
return [bottomDate, 'lt'];
case 'gte':
return [bottomDate, 'gte'];
case 'lt':
return [bottomDate, 'lt'];
case 'gt':
return [bottomDate, 'gte'];
default:
return [[bottomDate, topDate], 'eq'];
}
}
else {
throw new Error(`Cannot parse date string: ${dateVal}`);
2019-10-05 02:09:52 +02:00
}
};
SearchTerm.prototype.parseAbsoluteDate = function(dateVal, qual) {
const parseRes = [
/^(\d{4})/,
/^-(\d{2})/,
/^-(\d{2})/,
/^(?:\s+|T|t)(\d{2})/,
/^:(\d{2})/,
/^:(\d{2})/
],
timeZoneOffset = [0, 0],
timeData = [0, 0, 1, 0, 0, 0],
origDateVal = dateVal;
let topDate = null,
2019-10-05 02:09:52 +02:00
i,
match,
bottomDate = null,
localDateVal = origDateVal;
2019-10-05 02:09:52 +02:00
match = /([+-])(\d{2}):(\d{2})$/.exec(localDateVal);
2019-10-05 02:09:52 +02:00
if (match) {
timeZoneOffset[0] = parseInt(match[2], 10);
timeZoneOffset[1] = parseInt(match[3], 10);
if (match[1] === '-') {
timeZoneOffset[0] *= -1;
timeZoneOffset[1] *= -1;
}
localDateVal = localDateVal.substr(0, localDateVal.length - 6);
2019-10-05 02:09:52 +02:00
}
else {
localDateVal = localDateVal.replace(/[Zz]$/, '');
2019-10-05 02:09:52 +02:00
}
for (i = 0; i < parseRes.length; i += 1) {
if (localDateVal.length === 0) {
2019-10-05 02:09:52 +02:00
break;
}
match = parseRes[i].exec(localDateVal);
2019-10-05 02:09:52 +02:00
if (match) {
if (i === 1) {
timeData[i] = parseInt(match[1], 10) - 1;
}
else {
timeData[i] = parseInt(match[1], 10);
}
localDateVal = localDateVal.substr(
match[0].length, localDateVal.length - match[0].length
2019-10-05 02:09:52 +02:00
);
}
else {
throw new Error(`Cannot parse date string: ${origDateVal}`);
2019-10-05 02:09:52 +02:00
}
}
if (localDateVal.length > 0) {
throw new Error(`Cannot parse date string: ${origDateVal}`);
2019-10-05 02:09:52 +02:00
}
// Apply the user-specified time zone offset. The JS Date constructor
// is very flexible here.
timeData[3] -= timeZoneOffset[0];
timeData[4] -= timeZoneOffset[1];
switch (qual) {
case 'lte':
timeData[i - 1] += 1;
return [Date.UTC.apply(Date, timeData), 'lt'];
case 'gte':
return [Date.UTC.apply(Date, timeData), 'gte'];
case 'lt':
return [Date.UTC.apply(Date, timeData), 'lt'];
case 'gt':
timeData[i - 1] += 1;
return [Date.UTC.apply(Date, timeData), 'gte'];
default:
bottomDate = Date.UTC.apply(Date, timeData);
timeData[i - 1] += 1;
topDate = Date.UTC.apply(Date, timeData);
return [[bottomDate, topDate], 'eq'];
}
};
SearchTerm.prototype.parseDate = function(dateVal, qual) {
try {
return this.parseAbsoluteDate(dateVal, qual);
}
catch (_) {
return this.parseRelativeDate(dateVal, qual);
}
};
SearchTerm.prototype.parse = function() {
let rangeParsing,
2019-10-05 02:09:52 +02:00
candidateTermSpace,
termCandidate;
this.wildcardable = !this.fuzz && !/^"([^"]|\\")+"$/.test(this.term);
if (!this.wildcardable && !this.fuzz) {
this.term = this.term.substr(1, this.term.length - 2);
}
this.term = this._normalizeTerm();
// N.B.: For the purposes of this parser, boosting effects are ignored.
// Default.
this.termSpace = 'tags';
this.termType = 'literal';
const matchArr = this.term.split(':');
2019-10-05 02:09:52 +02:00
if (matchArr.length > 1) {
candidateTermSpace = matchArr[0];
termCandidate = matchArr.slice(1).join(':');
rangeParsing = this.parseRangeField(candidateTermSpace);
if (rangeParsing) {
this.termSpace = rangeParsing[0];
this.termType = rangeParsing[2];
if (this.termType === 'date') {
rangeParsing = this.parseDate(termCandidate, rangeParsing[1]);
this.term = rangeParsing[0];
this.compare = rangeParsing[1];
}
else {
this.term = parseFloat(termCandidate);
this.compare = rangeParsing[1];
}
this.wildcardable = false;
}
else if (literalFields.indexOf(candidateTermSpace) !== -1) {
this.termType = 'literal';
this.term = termCandidate;
this.termSpace = candidateTermSpace;
}
else if (candidateTermSpace === 'my') {
2019-10-05 02:09:52 +02:00
this.termType = 'my';
this.termSpace = termCandidate;
}
}
if (this.wildcardable) {
// Transforms wildcard match into regular expression.
// A custom NFA with caching may be more sophisticated but not
// likely to be faster.
this.term = new RegExp(
`^${
this.term.replace(/([.+^$[\]\\(){}|-])/g, '\\$1')
.replace(/([^\\]|[^\\](?:\\\\)+)\*/g, '$1.*')
.replace(/^(?:\\\\)*\*/g, '.*')
.replace(/([^\\]|[^\\](?:\\\\)+)\?/g, '$1.?')
.replace(/^(?:\\\\)*\?/g, '.?')
}$`, 'i'
);
}
// Update parse status flag to indicate the new properties are ready.
this.parsed = true;
};
SearchTerm.prototype._normalizeTerm = function() {
if (!this.wildcardable) {
return this.term.replace('"', '"');
2019-10-05 02:09:52 +02:00
}
return this.term.replace(/\\([^*?])/g, '$1');
2019-10-05 02:09:52 +02:00
};
SearchTerm.prototype.fuzzyMatch = function(targetStr) {
let targetDistance,
i,
j,
// Work vectors, representing the last three populated
// rows of the dynamic programming matrix of the iterative
// optimal string alignment calculation.
v0 = [],
v1 = [],
v2 = [],
temp;
if (this.fuzz < 1.0) {
targetDistance = targetStr.length * (1.0 - this.fuzz);
}
else {
targetDistance = this.fuzz;
}
const targetStrLower = targetStr.toLowerCase();
2019-10-05 02:09:52 +02:00
for (i = 0; i <= targetStrLower.length; i += 1) {
2019-10-05 02:09:52 +02:00
v1.push(i);
}
for (i = 0; i < this.term.length; i += 1) {
v2[0] = i;
for (j = 0; j < targetStrLower.length; j += 1) {
const cost = this.term[i] === targetStrLower[j] ? 0 : 1;
2019-10-05 02:09:52 +02:00
v2[j + 1] = Math.min(
// Deletion.
v1[j + 1] + 1,
// Insertion.
v2[j] + 1,
// Substitution or No Change.
v1[j] + cost
);
if (i > 1 && j > 1 && this.term[i] === targetStrLower[j - 1] &&
targetStrLower[i - 1] === targetStrLower[j]) {
2019-10-05 02:09:52 +02:00
v2[j + 1] = Math.min(v2[j], v0[j - 1] + cost);
}
}
// Rotate dem vec pointers bra.
temp = v0;
v0 = v1;
v1 = v2;
v2 = temp;
}
return v1[targetStrLower.length] <= targetDistance;
2019-10-05 02:09:52 +02:00
};
SearchTerm.prototype.exactMatch = function(targetStr) {
return this.term.toLowerCase() === targetStr.toLowerCase();
};
SearchTerm.prototype.wildcardMatch = function(targetStr) {
return this.term.test(targetStr);
};
SearchTerm.prototype.interactionMatch = function(imageID, type, interaction, interactions) {
let ret = false;
interactions.forEach(v => {
if (v.image_id === imageID && v.interaction_type === type && (interaction === null || v.value === interaction)) {
2019-10-05 02:09:52 +02:00
ret = true;
}
});
return ret;
};
SearchTerm.prototype.match = function(target) {
// eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this
const ohffs = this;
2019-10-05 02:09:52 +02:00
let ret = false,
compFunc,
numbuh,
date;
if (!this.parsed) {
this.parse();
}
if (this.termType === 'literal') {
// Literal matching.
if (this.fuzz) {
compFunc = this.fuzzyMatch;
}
else if (this.wildcardable) {
compFunc = this.wildcardMatch;
}
else {
compFunc = this.exactMatch;
}
if (this.termSpace === 'tags') {
target.getAttribute('data-image-tag-aliases').split(', ').every(
str => {
if (compFunc.call(ohffs, str)) {
ret = true;
return false;
}
return true;
}
);
}
else {
ret = compFunc.call(
this, target.getAttribute(termSpaceToImageField[this.termSpace])
);
}
}
else if (this.termType === 'my' && window.booru.interactions.length > 0) {
// Should work with most my:conditions except watched.
switch (this.termSpace) {
case 'faves':
ret = this.interactionMatch(target.getAttribute('data-image-id'), 'faved', null, window.booru.interactions);
break;
case 'upvotes':
ret = this.interactionMatch(target.getAttribute('data-image-id'), 'voted', 'up', window.booru.interactions);
break;
case 'downvotes':
ret = this.interactionMatch(target.getAttribute('data-image-id'), 'voted', 'down', window.booru.interactions);
break;
default:
ret = false; // Other my: interactions aren't supported, return false to prevent them from triggering spoiler.
break;
}
}
else if (this.termType === 'date') {
// Date matching.
date = new Date(
2019-10-05 02:09:52 +02:00
target.getAttribute(termSpaceToImageField[this.termSpace])
).getTime();
2019-10-05 02:09:52 +02:00
switch (this.compare) {
// The open-left, closed-right date range specified by the
// date/time format limits the types of comparisons that are
// done compared to numeric ranges.
case 'lt':
ret = this.term > date;
break;
case 'gte':
ret = this.term <= date;
break;
default:
ret = this.term[0] <= date && this.term[1] > date;
}
}
else {
// Range matching.
numbuh = parseFloat(
target.getAttribute(termSpaceToImageField[this.termSpace])
);
if (isNaN(this.term)) {
ret = false;
}
else if (this.fuzz) {
ret = this.term <= numbuh + this.fuzz &&
this.term + this.fuzz >= numbuh;
}
else {
switch (this.compare) {
case 'lt':
ret = this.term > numbuh;
break;
case 'gt':
ret = this.term < numbuh;
break;
case 'lte':
ret = this.term >= numbuh;
break;
case 'gte':
ret = this.term <= numbuh;
break;
default:
ret = this.term === numbuh;
}
}
}
return ret;
};
function generateLexArray(searchStr) {
const opQueue = [],
groupNegate = [],
tokenStack = [];
let searchTerm = null,
2019-10-05 02:09:52 +02:00
boost = null,
fuzz = null,
lparenCtr = 0,
negate = false,
boostFuzzStr = '',
localSearchStr = searchStr;
2019-10-05 02:09:52 +02:00
while (localSearchStr.length > 0) {
// eslint-disable-next-line no-loop-func
2019-10-05 02:09:52 +02:00
tokenList.every(tokenArr => {
const tokenName = tokenArr[0],
tokenRE = tokenArr[1];
let match = tokenRE.exec(localSearchStr),
op;
2019-10-05 02:09:52 +02:00
if (match) {
match = match[0];
if (Boolean(searchTerm) && (
['and_op', 'or_op'].indexOf(tokenName) !== -1 ||
tokenName === 'rparen' && lparenCtr === 0)) {
// Set options.
searchTerm.boost = boost;
searchTerm.fuzz = fuzz;
// Push to stack.
tokenStack.push(searchTerm);
// Reset term and options data.
searchTerm = fuzz = boost = null;
boostFuzzStr = '';
lparenCtr = 0;
if (negate) {
tokenStack.push('not_op');
negate = false;
}
}
switch (tokenName) {
case 'and_op':
while (opQueue[0] === 'and_op') {
tokenStack.push(opQueue.shift());
}
opQueue.unshift('and_op');
break;
case 'or_op':
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
tokenStack.push(opQueue.shift());
}
opQueue.unshift('or_op');
break;
case 'not_op':
if (searchTerm) {
// We're already inside a search term, so it does
// not apply, obv.
searchTerm.append(match);
}
else {
negate = !negate;
}
break;
case 'lparen':
if (searchTerm) {
// If we are inside the search term, do not error
// out just yet; instead, consider it as part of
// the search term, as a user convenience.
searchTerm.append(match);
lparenCtr += 1;
}
else {
opQueue.unshift('lparen');
groupNegate.push(negate);
2019-10-05 02:09:52 +02:00
negate = false;
}
break;
case 'rparen':
if (lparenCtr > 0) {
if (searchTerm) {
searchTerm.append(match);
}
else {
searchTerm = new SearchTerm(match);
}
2019-10-05 02:09:52 +02:00
lparenCtr -= 1;
}
else {
while (opQueue.length) {
op = opQueue.shift();
if (op === 'lparen') {
break;
}
tokenStack.push(op);
}
if (groupNegate.length > 0 && groupNegate.pop()) {
tokenStack.push('not_op');
}
2019-10-05 02:09:52 +02:00
}
break;
case 'fuzz':
if (searchTerm) {
// For this and boost operations, we store the
// current match so far to a temporary string in
// case this is actually inside the term.
fuzz = parseFloat(match.substr(1));
boostFuzzStr += match;
}
else {
searchTerm = new SearchTerm(match);
2019-10-05 02:09:52 +02:00
}
break;
case 'boost':
if (searchTerm) {
boost = match.substr(1);
boostFuzzStr += match;
}
else {
searchTerm = new SearchTerm(match);
2019-10-05 02:09:52 +02:00
}
break;
case 'quoted_lit':
if (searchTerm) {
searchTerm.append(match);
}
else {
searchTerm = new SearchTerm(match);
2019-10-05 02:09:52 +02:00
}
break;
case 'word':
if (searchTerm) {
if (fuzz || boost) {
boost = fuzz = null;
searchTerm.append(boostFuzzStr);
boostFuzzStr = '';
}
searchTerm.append(match);
}
else {
searchTerm = new SearchTerm(match);
2019-10-05 02:09:52 +02:00
}
break;
default:
// Append extra spaces within search terms.
if (searchTerm) {
searchTerm.append(match);
}
}
// Truncate string and restart the token tests.
localSearchStr = localSearchStr.substr(
match.length, localSearchStr.length - match.length
2019-10-05 02:09:52 +02:00
);
// Break since we have found a match.
return false;
}
return true;
});
}
// Append final tokens to the stack, starting with the search term.
if (searchTerm) {
searchTerm.boost = boost;
searchTerm.fuzz = fuzz;
tokenStack.push(searchTerm);
}
if (negate) {
tokenStack.push('not_op');
}
if (opQueue.indexOf('rparen') !== -1 ||
opQueue.indexOf('lparen') !== -1) {
throw new Error('Mismatched parentheses.');
2019-10-05 02:09:52 +02:00
}
// Memory-efficient concatenation of remaining operators queue to the
// token stack.
tokenStack.push.apply(tokenStack, opQueue);
return tokenStack;
}
function parseTokens(lexicalArray) {
const operandStack = [];
let negate, op1, op2;
2019-10-05 02:09:52 +02:00
lexicalArray.forEach((token, i) => {
if (token !== 'not_op') {
negate = lexicalArray[i + 1] === 'not_op';
if (typeof token === 'string') {
op2 = operandStack.pop();
op1 = operandStack.pop();
if (typeof op1 === 'undefined' || typeof op2 === 'undefined') {
throw new Error('Missing operand.');
2019-10-05 02:09:52 +02:00
}
operandStack.push(new SearchAST(token, negate, op1, op2));
}
else {
if (negate) {
operandStack.push(new SearchAST(null, true, token));
}
else {
operandStack.push(token);
}
}
}
});
if (operandStack.length > 1) {
throw new Error('Missing operator.');
2019-10-05 02:09:52 +02:00
}
op1 = operandStack.pop();
if (typeof op1 === 'undefined') {
return new SearchAST();
}
if (isTerminal(op1)) {
return new SearchAST(null, false, op1);
}
return op1;
}
function parseSearch(searchStr) {
return parseTokens(generateLexArray(searchStr));
2019-10-05 02:09:52 +02:00
}
function isTerminal(operand) {
// Whether operand is a terminal SearchTerm.
return typeof operand.term !== 'undefined';
}
function SearchAST(op, negate, leftOperand, rightOperand) {
this.negate = Boolean(negate);
this.leftOperand = leftOperand || null;
this.op = op || null;
this.rightOperand = rightOperand || null;
}
function combineOperands(ast1, ast2, parentAST) {
let localAst1;
2019-10-05 02:09:52 +02:00
if (parentAST.op === 'and_op') {
localAst1 = ast1 && ast2;
2019-10-05 02:09:52 +02:00
}
else {
localAst1 = ast1 || ast2;
2019-10-05 02:09:52 +02:00
}
if (parentAST.negate) {
return !localAst1;
2019-10-05 02:09:52 +02:00
}
return localAst1;
2019-10-05 02:09:52 +02:00
}
// Evaluation of the AST in regard to a target image
SearchAST.prototype.hitsImage = function(image) {
const treeStack = [];
// Left side node.
// eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this
let ast1 = this,
2019-10-05 02:09:52 +02:00
// Right side node.
ast2,
// Parent node of the current subtree.
parentAST;
// Build the initial tree node traversal stack, of the "far left" side.
// The general idea is to accumulate from the bottom and make stacks
// of right-hand subtrees that themselves accumulate upward. The left
// side node, ast1, will always be a Boolean representing the left-side
// evaluated value, up to the current subtree (parentAST).
while (!isTerminal(ast1)) {
treeStack.push(ast1);
ast1 = ast1.leftOperand;
if (!ast1) {
// Empty tree.
return false;
}
}
ast1 = ast1.match(image);
treeStack.push(ast1);
while (treeStack.length > 0) {
parentAST = treeStack.pop();
if (parentAST === null) {
// We are at the end of a virtual stack for a right node
// subtree. We switch the result of this stack from left
// (ast1) to right (ast2), pop the original left node,
// and finally pop the parent subtree itself. See near the
// end of this function to view how this is populated.
ast2 = ast1;
ast1 = treeStack.pop();
parentAST = treeStack.pop();
}
else {
// First, check to see if we can do a short-circuit
// evaluation to skip evaluating the right side entirely.
if (!ast1 && parentAST.op === 'and_op') {
ast1 = parentAST.negate;
continue;
}
if (ast1 && parentAST.op === 'or_op') {
ast1 = !parentAST.negate;
continue;
}
// If we are not at the end of a stack, grab the right
// node. The left node (ast1) is currently a terminal Boolean.
ast2 = parentAST.rightOperand;
}
if (typeof ast2 === 'boolean') {
ast1 = combineOperands(ast1, ast2, parentAST);
}
else if (!ast2) {
// A subtree with a single node. This is generally the case
// for negated tokens.
if (parentAST.negate) {
ast1 = !ast1;
}
}
else if (isTerminal(ast2)) {
// We are finally at a leaf and can evaluate.
ast2 = ast2.match(image);
ast1 = combineOperands(ast1, ast2, parentAST);
}
else {
// We are at a node whose right side is a new subtree.
// We will build a new "virtual" stack, but instead of
// building a new Array, we can insert a null object as a
// marker.
treeStack.push(parentAST, ast1, null);
do {
treeStack.push(ast2);
ast2 = ast2.leftOperand;
} while (!isTerminal(ast2));
ast1 = ast2.match(image);
}
}
return ast1;
};
SearchAST.prototype.dumpTree = function() {
// Dumps to string a simple diagram of the syntax tree structure
// (starting with this object as the root) for debugging purposes.
const retStrArr = [],
treeQueue = [['', this]];
let treeArr,
2019-10-05 02:09:52 +02:00
prefix,
tree;
while (treeQueue.length > 0) {
treeArr = treeQueue.shift();
prefix = treeArr[0];
tree = treeArr[1];
if (isTerminal(tree)) {
retStrArr.push(`${prefix}-> ${tree.term}`);
}
else {
if (tree.negate) {
retStrArr.push(`${prefix}+ NOT_OP`);
prefix += '\t';
}
if (tree.op) {
retStrArr.push(`${prefix}+ ${tree.op.toUpperCase()}`);
prefix += '\t';
treeQueue.unshift([prefix, tree.rightOperand]);
treeQueue.unshift([prefix, tree.leftOperand]);
}
else {
treeQueue.unshift([prefix, tree.leftOperand]);
}
}
}
return retStrArr.join('\n');
};
export default parseSearch;