From 1ab0db43c79481d7a911a7bd6d0f4f614a0e4cf6 Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Mon, 19 Aug 2019 10:16:40 -0400 Subject: [PATCH] search lexer --- lib/philomena/search/lexer.ex | 110 ++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 lib/philomena/search/lexer.ex diff --git a/lib/philomena/search/lexer.ex b/lib/philomena/search/lexer.ex new file mode 100644 index 00000000..56d7bde8 --- /dev/null +++ b/lib/philomena/search/lexer.ex @@ -0,0 +1,110 @@ +defmodule Philomena.Search.Lexer do + def lex(input) do + accept(input |> to_charlist, :outer) + end + + # + # Outer state (not inside a term, spaces irrelevant) + # + + defp accept([], :outer), do: [eof: "$"] + defp accept('AND' ++ rest, :outer), do: [{:and, "AND"} | accept(rest, :outer)] + defp accept('NOT' ++ rest, :outer), do: [{:not, "NOT"} | accept(rest, :outer)] + defp accept('OR' ++ rest, :outer), do: [{:or, "OR"} | accept(rest, :outer)] + defp accept('||' ++ rest, :outer), do: [{:or, "||"} | accept(rest, :outer)] + defp accept('&&' ++ rest, :outer), do: [{:and, "&&"} | accept(rest, :outer)] + defp accept(',' ++ rest, :outer), do: [{:and, ","} | accept(rest, :outer)] + defp accept('!' ++ rest, :outer), do: [{:not, "!"} | accept(rest, :outer)] + defp accept('-' ++ rest, :outer), do: [{:not, "-"} | accept(rest, :outer)] + defp accept('(' ++ rest, :outer), do: [{:lparen, "("} | accept(rest, :outer)] + defp accept(')' ++ rest, :outer), do: [{:rparen, ")"} | accept(rest, :outer)] + defp accept('^' ++ rest, :outer), do: [{:boost, "^"} | accept(rest, :float)] + defp accept('~' ++ rest, :outer), do: [{:fuzz, "~"} | accept(rest, :float)] + defp accept(' ' ++ rest, :outer), do: accept(rest, :outer) + defp accept('\t' ++ rest, :outer), do: accept(rest, :outer) + defp accept('\n' ++ rest, :outer), do: accept(rest, :outer) + defp accept('\r' ++ rest, :outer), do: accept(rest, :outer) + defp accept('\v' ++ rest, :outer), do: accept(rest, :outer) + defp accept('\f' ++ rest, :outer), do: accept(rest, :outer) + defp accept('"' ++ rest, :outer), do: accept(rest, "", :quoted_term) + defp accept(input, :outer), do: accept(input, "", 0, :term) + + # + # Quoted term state + # + + defp accept([?\\, c] ++ rest, term, :quoted_term), do: accept(rest, term <> <>, :quoted_term) + defp accept([?\\], _term, :quoted_term), do: raise ArgumentError, "Unpaired backslash." + defp accept('"' ++ rest, term, :quoted_term), do: [{:term, term} | accept(rest, :outer)] + defp accept([c] ++ rest, term, :quoted_term), do: accept(rest, term <> <>, :quoted_term) + defp accept([], _term, :quoted_term), do: raise ArgumentError, "Imbalanced quotes." + + # + # Term state + # + + defp accept([?\\, c] ++ rest, term, depth, :term), do: accept(rest, term <> <>, depth, :term) + defp accept('\\', _term, _depth, :term), do: raise ArgumentError, "Unpaired backslash." + defp accept('(' ++ rest, term, depth, :term), do: accept(rest, term <> "(", depth + 1, :term) + defp accept(')' ++ rest, term, 0, :term), do: [{:term, term}, {:rparen, ")"} | accept(rest, :outer)] + defp accept(')' ++ rest, term, depth, :term), do: accept(rest, term <> ")", depth - 1, :term) + defp accept(' AND' ++ rest, term, 0, :term), do: [{:term, term}, {:and, "AND"} | accept(rest, :outer)] + defp accept(' OR' ++ rest, term, 0, :term), do: [{:term, term}, {:or, "OR"} | accept(rest, :outer)] + defp accept(' &&' ++ rest, term, 0, :term), do: [{:term, term}, {:and, "&&"} | accept(rest, :outer)] + defp accept(' ||' ++ rest, term, 0, :term), do: [{:term, term}, {:or, "||"} | accept(rest, :outer)] + defp accept(',' ++ rest, term, 0, :term), do: [{:term, term}, {:and, ","} | accept(rest, :outer)] + defp accept([?^, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:boost, "^"} | accept([c | rest], :float)] + defp accept([?~, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:fuzz, "~"} | accept([c | rest], :float)] + defp accept('^' ++ rest, term, depth, :term), do: accept(rest, term <> "^", depth, :term) + defp accept('~' ++ rest, term, depth, :term), do: accept(rest, term <> "~", depth, :term) + defp accept([c] ++ rest, term, depth, :term), do: accept(rest, term <> <>, depth, :term) + defp accept([], term, 0, :term), do: [term: term, eof: "$"] + defp accept([], _term, _depth, :term), do: raise ArgumentError, "Imbalanced parentheses." + + # + # Number state (for boosting, fuzzing) + # + + defp accept('+' ++ rest, :float), do: accept(rest, "", :float_w) + defp accept('-' ++ rest, :float), do: accept(rest, "-", :float_w) + defp accept(input, :float), do: accept(input, "", :float_w) + + defp accept([c] ++ rest, term, :float_w) when c in ?0..?9, do: accept(rest, term <> <>, :float_w) + defp accept('.' ++ rest, term, :float_w), do: accept(rest, term <> ".", :float_f) + defp accept(')' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] + defp accept(' AND' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] + defp accept(' OR' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)] + defp accept(' &&' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)] + defp accept(' ||' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)] + defp accept(',' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)] + defp accept('^' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)] + defp accept('~' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)] + defp accept([], term, :float_w), do: [float: to_number(term), eof: "$"] + defp accept(_input, _term, :float_w), do: raise ArgumentError, "Expected a number." + + defp accept([c] ++ rest, term, :float_f) when c in ?0..?9, do: accept(rest, term <> <>, :float_f) + defp accept(')' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] + defp accept(' AND' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] + defp accept(' OR' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)] + defp accept(' &&' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)] + defp accept(' ||' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)] + defp accept(',' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)] + defp accept('^' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)] + defp accept('~' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)] + defp accept([], term, :float_f), do: [float: to_number(term), eof: "$"] + defp accept(_input, _term, :float_f), do: raise ArgumentError, "Expected a number." + + defp to_number(term) do + {float_val, _} = :string.to_float(term) + {int_val, _} = :string.to_integer(term) + + cond do + is_float(float_val) -> + float_val + is_integer(int_val) -> + int_val + true -> + raise ArgumentError, "Expected a number." + end + end +end \ No newline at end of file