/** * booru.match_query: A port and modification of the search_parser library for * performing client-side filtering. */ const tokenList = [ ['fuzz', /^~(?:\d+(\.\d+)?|\.\d+)/], ['boost', /^\^[-+]?\d+(\.\d+)?/], ['quoted_lit', /^\s*"(?:[^"]|\\")+"/], ['lparen', /^\s*\(\s*/], ['rparen', /^\s*\)\s*/], ['and_op', /^\s*(?:&&|AND)\s+/], ['and_op', /^\s*,\s*/], ['or_op', /^\s*(?:\|\||OR)\s+/], ['not_op', /^\s*NOT(?:\s+|(?=\())/], ['not_op', /^\s*[!-]\s*/], ['space', /^\s+/], ['word', /^(?:[^\s,()^~]|\\[\s,()^~])+/], ['word', /^(?:[^\s,()]|\\[\s,()])+/] ], numberFields = ['id', 'width', 'height', 'aspect_ratio', 'comment_count', 'score', 'upvotes', 'downvotes', 'faves', 'tag_count'], dateFields = ['created_at'], literalFields = ['tags', 'orig_sha512_hash', 'sha512_hash', 'score', 'uploader', 'source_url', 'description'], termSpaceToImageField = { tags: 'data-image-tag-aliases', score: 'data-score', upvotes: 'data-upvotes', downvotes: 'data-downvotes', uploader: 'data-uploader', // Yeah, I don't think this is reasonably supportable. // faved_by: 'data-faved-by', id: 'data-image-id', width: 'data-width', height: 'data-height', /* eslint-disable camelcase */ aspect_ratio: 'data-aspect-ratio', comment_count: 'data-comment-count', tag_count: 'data-tag-count', source_url: 'data-source-url', faves: 'data-faves', sha512_hash: 'data-sha512', orig_sha512_hash: 'data-orig-sha512', created_at: 'data-created-at' /* eslint-enable camelcase */ }; function SearchTerm(termStr) { this.term = termStr.trim(); this.parsed = false; } SearchTerm.prototype.append = function(substr) { this.term += substr; this.parsed = false; }; SearchTerm.prototype.parseRangeField = function(field) { if (numberFields.indexOf(field) !== -1) { return [field, 'eq', 'number']; } if (dateFields.indexOf(field) !== -1) { return [field, 'eq', 'date']; } const qual = /^(\w+)\.([lg]te?|eq)$/.exec(field); if (qual) { if (numberFields.indexOf(qual[1]) !== -1) { return [qual[1], qual[2], 'number']; } if (dateFields.indexOf(qual[1]) !== -1) { return [qual[1], qual[2], 'date']; } } return null; }; SearchTerm.prototype.parseRelativeDate = function(dateVal, qual) { const match = /(\d+) (second|minute|hour|day|week|month|year)s? ago/.exec(dateVal); const bounds = { second: 1000, minute: 60000, hour: 3600000, day: 86400000, week: 604800000, month: 2592000000, year: 31536000000 }; if (match) { const amount = parseInt(match[1], 10); const scale = bounds[match[2]]; const now = new Date().getTime(); const bottomDate = new Date(now - (amount * scale)); const topDate = new Date(now - ((amount - 1) * scale)); switch (qual) { case 'lte': return [bottomDate, 'lt']; case 'gte': return [bottomDate, 'gte']; case 'lt': return [bottomDate, 'lt']; case 'gt': return [bottomDate, 'gte']; default: return [[bottomDate, topDate], 'eq']; } } else { throw new Error(`Cannot parse date string: ${dateVal}`); } }; SearchTerm.prototype.parseAbsoluteDate = function(dateVal, qual) { const parseRes = [ /^(\d{4})/, /^-(\d{2})/, /^-(\d{2})/, /^(?:\s+|T|t)(\d{2})/, /^:(\d{2})/, /^:(\d{2})/ ], timeZoneOffset = [0, 0], timeData = [0, 0, 1, 0, 0, 0], origDateVal = dateVal; let topDate = null, i, match, bottomDate = null, localDateVal = origDateVal; match = /([+-])(\d{2}):(\d{2})$/.exec(localDateVal); if (match) { timeZoneOffset[0] = parseInt(match[2], 10); timeZoneOffset[1] = parseInt(match[3], 10); if (match[1] === '-') { timeZoneOffset[0] *= -1; timeZoneOffset[1] *= -1; } localDateVal = localDateVal.substr(0, localDateVal.length - 6); } else { localDateVal = localDateVal.replace(/[Zz]$/, ''); } for (i = 0; i < parseRes.length; i += 1) { if (localDateVal.length === 0) { break; } match = parseRes[i].exec(localDateVal); if (match) { if (i === 1) { timeData[i] = parseInt(match[1], 10) - 1; } else { timeData[i] = parseInt(match[1], 10); } localDateVal = localDateVal.substr( match[0].length, localDateVal.length - match[0].length ); } else { throw new Error(`Cannot parse date string: ${origDateVal}`); } } if (localDateVal.length > 0) { throw new Error(`Cannot parse date string: ${origDateVal}`); } // Apply the user-specified time zone offset. The JS Date constructor // is very flexible here. timeData[3] -= timeZoneOffset[0]; timeData[4] -= timeZoneOffset[1]; switch (qual) { case 'lte': timeData[i - 1] += 1; return [Date.UTC.apply(Date, timeData), 'lt']; case 'gte': return [Date.UTC.apply(Date, timeData), 'gte']; case 'lt': return [Date.UTC.apply(Date, timeData), 'lt']; case 'gt': timeData[i - 1] += 1; return [Date.UTC.apply(Date, timeData), 'gte']; default: bottomDate = Date.UTC.apply(Date, timeData); timeData[i - 1] += 1; topDate = Date.UTC.apply(Date, timeData); return [[bottomDate, topDate], 'eq']; } }; SearchTerm.prototype.parseDate = function(dateVal, qual) { try { return this.parseAbsoluteDate(dateVal, qual); } catch (_) { return this.parseRelativeDate(dateVal, qual); } }; SearchTerm.prototype.parse = function() { let rangeParsing, candidateTermSpace, termCandidate; this.wildcardable = !this.fuzz && !/^"([^"]|\\")+"$/.test(this.term); if (!this.wildcardable && !this.fuzz) { this.term = this.term.substr(1, this.term.length - 2); } this.term = this._normalizeTerm(); // N.B.: For the purposes of this parser, boosting effects are ignored. // Default. this.termSpace = 'tags'; this.termType = 'literal'; const matchArr = this.term.split(':'); if (matchArr.length > 1) { candidateTermSpace = matchArr[0]; termCandidate = matchArr.slice(1).join(':'); rangeParsing = this.parseRangeField(candidateTermSpace); if (rangeParsing) { this.termSpace = rangeParsing[0]; this.termType = rangeParsing[2]; if (this.termType === 'date') { rangeParsing = this.parseDate(termCandidate, rangeParsing[1]); this.term = rangeParsing[0]; this.compare = rangeParsing[1]; } else { this.term = parseFloat(termCandidate); this.compare = rangeParsing[1]; } this.wildcardable = false; } else if (literalFields.indexOf(candidateTermSpace) !== -1) { this.termType = 'literal'; this.term = termCandidate; this.termSpace = candidateTermSpace; } else if (candidateTermSpace === 'my') { this.termType = 'my'; this.termSpace = termCandidate; } } if (this.wildcardable) { // Transforms wildcard match into regular expression. // A custom NFA with caching may be more sophisticated but not // likely to be faster. this.term = new RegExp( `^${ this.term.replace(/([.+^$[\]\\(){}|-])/g, '\\$1') .replace(/([^\\]|[^\\](?:\\\\)+)\*/g, '$1.*') .replace(/^(?:\\\\)*\*/g, '.*') .replace(/([^\\]|[^\\](?:\\\\)+)\?/g, '$1.?') .replace(/^(?:\\\\)*\?/g, '.?') }$`, 'i' ); } // Update parse status flag to indicate the new properties are ready. this.parsed = true; }; SearchTerm.prototype._normalizeTerm = function() { if (!this.wildcardable) { return this.term.replace('"', '"'); } return this.term.replace(/\\([^*?])/g, '$1'); }; SearchTerm.prototype.fuzzyMatch = function(targetStr) { let targetDistance, i, j, // Work vectors, representing the last three populated // rows of the dynamic programming matrix of the iterative // optimal string alignment calculation. v0 = [], v1 = [], v2 = [], temp; if (this.fuzz < 1.0) { targetDistance = targetStr.length * (1.0 - this.fuzz); } else { targetDistance = this.fuzz; } const targetStrLower = targetStr.toLowerCase(); for (i = 0; i <= targetStrLower.length; i += 1) { v1.push(i); } for (i = 0; i < this.term.length; i += 1) { v2[0] = i; for (j = 0; j < targetStrLower.length; j += 1) { const cost = this.term[i] === targetStrLower[j] ? 0 : 1; v2[j + 1] = Math.min( // Deletion. v1[j + 1] + 1, // Insertion. v2[j] + 1, // Substitution or No Change. v1[j] + cost ); if (i > 1 && j > 1 && this.term[i] === targetStrLower[j - 1] && targetStrLower[i - 1] === targetStrLower[j]) { v2[j + 1] = Math.min(v2[j], v0[j - 1] + cost); } } // Rotate dem vec pointers bra. temp = v0; v0 = v1; v1 = v2; v2 = temp; } return v1[targetStrLower.length] <= targetDistance; }; SearchTerm.prototype.exactMatch = function(targetStr) { return this.term.toLowerCase() === targetStr.toLowerCase(); }; SearchTerm.prototype.wildcardMatch = function(targetStr) { return this.term.test(targetStr); }; SearchTerm.prototype.interactionMatch = function(imageID, type, interaction, interactions) { let ret = false; interactions.forEach(v => { if (v.image_id === imageID && v.interaction_type === type && (interaction === null || v.value === interaction)) { ret = true; } }); return ret; }; SearchTerm.prototype.match = function(target) { // eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this const ohffs = this; let ret = false, compFunc, numbuh, date; if (!this.parsed) { this.parse(); } if (this.termType === 'literal') { // Literal matching. if (this.fuzz) { compFunc = this.fuzzyMatch; } else if (this.wildcardable) { compFunc = this.wildcardMatch; } else { compFunc = this.exactMatch; } if (this.termSpace === 'tags') { target.getAttribute('data-image-tag-aliases').split(', ').every( str => { if (compFunc.call(ohffs, str)) { ret = true; return false; } return true; } ); } else { ret = compFunc.call( this, target.getAttribute(termSpaceToImageField[this.termSpace]) ); } } else if (this.termType === 'my' && window.booru.interactions.length > 0) { // Should work with most my:conditions except watched. switch (this.termSpace) { case 'faves': ret = this.interactionMatch(target.getAttribute('data-image-id'), 'faved', null, window.booru.interactions); break; case 'upvotes': ret = this.interactionMatch(target.getAttribute('data-image-id'), 'voted', 'up', window.booru.interactions); break; case 'downvotes': ret = this.interactionMatch(target.getAttribute('data-image-id'), 'voted', 'down', window.booru.interactions); break; default: ret = false; // Other my: interactions aren't supported, return false to prevent them from triggering spoiler. break; } } else if (this.termType === 'date') { // Date matching. date = new Date( target.getAttribute(termSpaceToImageField[this.termSpace]) ).getTime(); switch (this.compare) { // The open-left, closed-right date range specified by the // date/time format limits the types of comparisons that are // done compared to numeric ranges. case 'lt': ret = this.term > date; break; case 'gte': ret = this.term <= date; break; default: ret = this.term[0] <= date && this.term[1] > date; } } else { // Range matching. numbuh = parseFloat( target.getAttribute(termSpaceToImageField[this.termSpace]) ); if (isNaN(this.term)) { ret = false; } else if (this.fuzz) { ret = this.term <= numbuh + this.fuzz && this.term + this.fuzz >= numbuh; } else { switch (this.compare) { case 'lt': ret = this.term > numbuh; break; case 'gt': ret = this.term < numbuh; break; case 'lte': ret = this.term >= numbuh; break; case 'gte': ret = this.term <= numbuh; break; default: ret = this.term === numbuh; } } } return ret; }; function generateLexArray(searchStr) { const opQueue = [], groupNegate = [], tokenStack = []; let searchTerm = null, boost = null, fuzz = null, lparenCtr = 0, negate = false, boostFuzzStr = '', localSearchStr = searchStr; while (localSearchStr.length > 0) { // eslint-disable-next-line no-loop-func tokenList.every(tokenArr => { const tokenName = tokenArr[0], tokenRE = tokenArr[1]; let match = tokenRE.exec(localSearchStr), op; if (match) { match = match[0]; if (Boolean(searchTerm) && ( ['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) { // Set options. searchTerm.boost = boost; searchTerm.fuzz = fuzz; // Push to stack. tokenStack.push(searchTerm); // Reset term and options data. searchTerm = fuzz = boost = null; boostFuzzStr = ''; lparenCtr = 0; if (negate) { tokenStack.push('not_op'); negate = false; } } switch (tokenName) { case 'and_op': while (opQueue[0] === 'and_op') { tokenStack.push(opQueue.shift()); } opQueue.unshift('and_op'); break; case 'or_op': while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') { tokenStack.push(opQueue.shift()); } opQueue.unshift('or_op'); break; case 'not_op': if (searchTerm) { // We're already inside a search term, so it does // not apply, obv. searchTerm.append(match); } else { negate = !negate; } break; case 'lparen': if (searchTerm) { // If we are inside the search term, do not error // out just yet; instead, consider it as part of // the search term, as a user convenience. searchTerm.append(match); lparenCtr += 1; } else { opQueue.unshift('lparen'); groupNegate.push(negate); negate = false; } break; case 'rparen': if (lparenCtr > 0) { if (searchTerm) { searchTerm.append(match); } else { searchTerm = new SearchTerm(match); } lparenCtr -= 1; } else { while (opQueue.length) { op = opQueue.shift(); if (op === 'lparen') { break; } tokenStack.push(op); } if (groupNegate.length > 0 && groupNegate.pop()) { tokenStack.push('not_op'); } } break; case 'fuzz': if (searchTerm) { // For this and boost operations, we store the // current match so far to a temporary string in // case this is actually inside the term. fuzz = parseFloat(match.substr(1)); boostFuzzStr += match; } else { searchTerm = new SearchTerm(match); } break; case 'boost': if (searchTerm) { boost = match.substr(1); boostFuzzStr += match; } else { searchTerm = new SearchTerm(match); } break; case 'quoted_lit': if (searchTerm) { searchTerm.append(match); } else { searchTerm = new SearchTerm(match); } break; case 'word': if (searchTerm) { if (fuzz || boost) { boost = fuzz = null; searchTerm.append(boostFuzzStr); boostFuzzStr = ''; } searchTerm.append(match); } else { searchTerm = new SearchTerm(match); } break; default: // Append extra spaces within search terms. if (searchTerm) { searchTerm.append(match); } } // Truncate string and restart the token tests. localSearchStr = localSearchStr.substr( match.length, localSearchStr.length - match.length ); // Break since we have found a match. return false; } return true; }); } // Append final tokens to the stack, starting with the search term. if (searchTerm) { searchTerm.boost = boost; searchTerm.fuzz = fuzz; tokenStack.push(searchTerm); } if (negate) { tokenStack.push('not_op'); } if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) { throw new Error('Mismatched parentheses.'); } // Memory-efficient concatenation of remaining operators queue to the // token stack. tokenStack.push.apply(tokenStack, opQueue); return tokenStack; } function parseTokens(lexicalArray) { const operandStack = []; let negate, op1, op2; lexicalArray.forEach((token, i) => { if (token !== 'not_op') { negate = lexicalArray[i + 1] === 'not_op'; if (typeof token === 'string') { op2 = operandStack.pop(); op1 = operandStack.pop(); if (typeof op1 === 'undefined' || typeof op2 === 'undefined') { throw new Error('Missing operand.'); } operandStack.push(new SearchAST(token, negate, op1, op2)); } else { if (negate) { operandStack.push(new SearchAST(null, true, token)); } else { operandStack.push(token); } } } }); if (operandStack.length > 1) { throw new Error('Missing operator.'); } op1 = operandStack.pop(); if (typeof op1 === 'undefined') { return new SearchAST(); } if (isTerminal(op1)) { return new SearchAST(null, false, op1); } return op1; } function parseSearch(searchStr) { return parseTokens(generateLexArray(searchStr)); } function isTerminal(operand) { // Whether operand is a terminal SearchTerm. return typeof operand.term !== 'undefined'; } function SearchAST(op, negate, leftOperand, rightOperand) { this.negate = Boolean(negate); this.leftOperand = leftOperand || null; this.op = op || null; this.rightOperand = rightOperand || null; } function combineOperands(ast1, ast2, parentAST) { let localAst1; if (parentAST.op === 'and_op') { localAst1 = ast1 && ast2; } else { localAst1 = ast1 || ast2; } if (parentAST.negate) { return !localAst1; } return localAst1; } // Evaluation of the AST in regard to a target image SearchAST.prototype.hitsImage = function(image) { const treeStack = []; // Left side node. // eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this let ast1 = this, // Right side node. ast2, // Parent node of the current subtree. parentAST; // Build the initial tree node traversal stack, of the "far left" side. // The general idea is to accumulate from the bottom and make stacks // of right-hand subtrees that themselves accumulate upward. The left // side node, ast1, will always be a Boolean representing the left-side // evaluated value, up to the current subtree (parentAST). while (!isTerminal(ast1)) { treeStack.push(ast1); ast1 = ast1.leftOperand; if (!ast1) { // Empty tree. return false; } } ast1 = ast1.match(image); treeStack.push(ast1); while (treeStack.length > 0) { parentAST = treeStack.pop(); if (parentAST === null) { // We are at the end of a virtual stack for a right node // subtree. We switch the result of this stack from left // (ast1) to right (ast2), pop the original left node, // and finally pop the parent subtree itself. See near the // end of this function to view how this is populated. ast2 = ast1; ast1 = treeStack.pop(); parentAST = treeStack.pop(); } else { // First, check to see if we can do a short-circuit // evaluation to skip evaluating the right side entirely. if (!ast1 && parentAST.op === 'and_op') { ast1 = parentAST.negate; continue; } if (ast1 && parentAST.op === 'or_op') { ast1 = !parentAST.negate; continue; } // If we are not at the end of a stack, grab the right // node. The left node (ast1) is currently a terminal Boolean. ast2 = parentAST.rightOperand; } if (typeof ast2 === 'boolean') { ast1 = combineOperands(ast1, ast2, parentAST); } else if (!ast2) { // A subtree with a single node. This is generally the case // for negated tokens. if (parentAST.negate) { ast1 = !ast1; } } else if (isTerminal(ast2)) { // We are finally at a leaf and can evaluate. ast2 = ast2.match(image); ast1 = combineOperands(ast1, ast2, parentAST); } else { // We are at a node whose right side is a new subtree. // We will build a new "virtual" stack, but instead of // building a new Array, we can insert a null object as a // marker. treeStack.push(parentAST, ast1, null); do { treeStack.push(ast2); ast2 = ast2.leftOperand; } while (!isTerminal(ast2)); ast1 = ast2.match(image); } } return ast1; }; SearchAST.prototype.dumpTree = function() { // Dumps to string a simple diagram of the syntax tree structure // (starting with this object as the root) for debugging purposes. const retStrArr = [], treeQueue = [['', this]]; let treeArr, prefix, tree; while (treeQueue.length > 0) { treeArr = treeQueue.shift(); prefix = treeArr[0]; tree = treeArr[1]; if (isTerminal(tree)) { retStrArr.push(`${prefix}-> ${tree.term}`); } else { if (tree.negate) { retStrArr.push(`${prefix}+ NOT_OP`); prefix += '\t'; } if (tree.op) { retStrArr.push(`${prefix}+ ${tree.op.toUpperCase()}`); prefix += '\t'; treeQueue.unshift([prefix, tree.rightOperand]); treeQueue.unshift([prefix, tree.leftOperand]); } else { treeQueue.unshift([prefix, tree.leftOperand]); } } } return retStrArr.join('\n'); }; // Force module handling for Jest, can be removed after TypeScript migration export {}; export default parseSearch;