From aef0e7f7d50fabd87c7114e2dff163801ea07c0d Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Wed, 21 Aug 2019 20:43:34 -0400 Subject: [PATCH] rename stuff --- lib/philomena/search/lexer.ex | 192 +++++++++++----------- lib/philomena/search/lexer_two.ex | 103 ------------ lib/philomena/search/term_lexer.ex | 247 ----------------------------- 3 files changed, 89 insertions(+), 453 deletions(-) delete mode 100644 lib/philomena/search/lexer_two.ex delete mode 100644 lib/philomena/search/term_lexer.ex diff --git a/lib/philomena/search/lexer.ex b/lib/philomena/search/lexer.ex index 24407a64..35a76f76 100644 --- a/lib/philomena/search/lexer.ex +++ b/lib/philomena/search/lexer.ex @@ -1,105 +1,5 @@ defmodule Philomena.Search.Lexer do - def lex(input) do - {:ok, accept(input |> to_charlist, :outer)} - rescue - e in ArgumentError -> - {:error, e.message} - _ -> - {:error, "Parsing error."} - end - - # - # Outer state (not inside a term, spaces irrelevant) - # - - defp accept([], :outer), do: [eof: "$"] - defp accept('AND' ++ rest, :outer), do: [{:and, "AND"} | accept(rest, :outer)] - defp accept('NOT' ++ rest, :outer), do: [{:not, "NOT"} | accept(rest, :outer)] - defp accept('OR' ++ rest, :outer), do: [{:or, "OR"} | accept(rest, :outer)] - defp accept('||' ++ rest, :outer), do: [{:or, "||"} | accept(rest, :outer)] - defp accept('&&' ++ rest, :outer), do: [{:and, "&&"} | accept(rest, :outer)] - defp accept(',' ++ rest, :outer), do: [{:and, ","} | accept(rest, :outer)] - defp accept('!' ++ rest, :outer), do: [{:not, "!"} | accept(rest, :outer)] - defp accept('-' ++ rest, :outer), do: [{:not, "-"} | accept(rest, :outer)] - defp accept('(' ++ rest, :outer), do: [{:lparen, "("} | accept(rest, :outer)] - defp accept(')' ++ rest, :outer), do: [{:rparen, ")"} | accept(rest, :outer)] - defp accept('^' ++ rest, :outer), do: [{:boost, "^"} | accept(rest, :float)] - defp accept('~' ++ rest, :outer), do: [{:fuzz, "~"} | accept(rest, :float)] - defp accept(' ' ++ rest, :outer), do: accept(rest, :outer) - defp accept('\t' ++ rest, :outer), do: accept(rest, :outer) - defp accept('\n' ++ rest, :outer), do: accept(rest, :outer) - defp accept('\r' ++ rest, :outer), do: accept(rest, :outer) - defp accept('\v' ++ rest, :outer), do: accept(rest, :outer) - defp accept('\f' ++ rest, :outer), do: accept(rest, :outer) - defp accept('"' ++ rest, :outer), do: accept(rest, "", :quoted_term) - defp accept(input, :outer), do: accept(input, "", 0, :term) - - # - # Quoted term state - # - - defp accept('\\"' ++ rest, term, :quoted_term), do: accept(rest, term <> "\"", :quoted_term) - defp accept('\\', _term, :quoted_term), do: raise ArgumentError, "Unpaired backslash." - defp accept('"' ++ rest, term, :quoted_term), do: [{:term, term} | accept(rest, :outer)] - defp accept([c] ++ rest, term, :quoted_term), do: accept(rest, term <> <>, :quoted_term) - defp accept([], _term, :quoted_term), do: raise ArgumentError, "Imbalanced quotes." - - # - # Term state - # - - defp accept([?\\, c] ++ rest, term, depth, :term) when c in '()\\', do: accept(rest, term <> <>, depth, :term) - defp accept('\\', _term, _depth, :term), do: raise ArgumentError, "Unpaired backslash." - defp accept('(' ++ rest, term, depth, :term), do: accept(rest, term <> "(", depth + 1, :term) - defp accept(')' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:rparen, ")"} | accept(rest, :outer)] - defp accept(')' ++ rest, term, depth, :term), do: accept(rest, term <> ")", depth - 1, :term) - defp accept(' AND' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, "AND"} | accept(rest, :outer)] - defp accept(' OR' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:or, "OR"} | accept(rest, :outer)] - defp accept(' &&' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, "&&"} | accept(rest, :outer)] - defp accept(' ||' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:or, "||"} | accept(rest, :outer)] - defp accept(',' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, ","} | accept(rest, :outer)] - defp accept([?^, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:boost, "^"} | accept([c | rest], :float)] - defp accept([?~, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:fuzz, "~"} | accept([c | rest], :float)] - defp accept('^' ++ rest, term, depth, :term), do: accept(rest, term <> "^", depth, :term) - defp accept('~' ++ rest, term, depth, :term), do: accept(rest, term <> "~", depth, :term) - defp accept([c] ++ rest, term, depth, :term), do: accept(rest, term <> <>, depth, :term) - defp accept([], term, 0, :term), do: [term: String.trim(term), eof: "$"] - defp accept([], _term, _depth, :term), do: raise ArgumentError, "Imbalanced parentheses." - - # - # Number state (for boosting, fuzzing) - # - - defp accept('+' ++ rest, :float), do: accept(rest, "", :float_w) - defp accept('-' ++ rest, :float), do: accept(rest, "-", :float_w) - defp accept(input, :float), do: accept(input, "", :float_w) - - defp accept([c] ++ rest, term, :float_w) when c in ?0..?9, do: accept(rest, term <> <>, :float_w) - defp accept('.' ++ rest, term, :float_w), do: accept(rest, term <> ".", :float_f) - defp accept(')' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] - defp accept(' AND' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] - defp accept(' OR' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)] - defp accept(' &&' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)] - defp accept(' ||' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)] - defp accept(',' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)] - defp accept('^' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)] - defp accept('~' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)] - defp accept(' ' ++ rest, term, :float_w), do: [{:float, to_number(term)} | accept(rest, :outer)] - defp accept([], term, :float_w), do: [float: to_number(term), eof: "$"] - defp accept(_input, _term, :float_w), do: raise ArgumentError, "Expected a number." - - defp accept([c] ++ rest, term, :float_f) when c in ?0..?9, do: accept(rest, term <> <>, :float_f) - defp accept(')' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] - defp accept(' AND' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)] - defp accept(' OR' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)] - defp accept(' &&' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)] - defp accept(' ||' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)] - defp accept(',' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)] - defp accept('^' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)] - defp accept('~' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)] - defp accept(' ' ++ rest, term, :float_f), do: [{:float, to_number(term)} | accept(rest, :outer)] - defp accept([], term, :float_f), do: [float: to_number(term), eof: "$"] - defp accept(_input, _term, :float_f), do: raise ArgumentError, "Expected a number." + import NimbleParsec defp to_number(term) do {float_val, _} = :string.to_float(term) @@ -110,8 +10,94 @@ defmodule Philomena.Search.Lexer do float_val is_integer(int_val) -> int_val - true -> - raise ArgumentError, "Expected a number." end end + + l_and = + choice([string("AND"), string("&&"), string(",")]) + |> unwrap_and_tag(:and) + + l_or = + choice([string("OR"), string("||")]) + |> unwrap_and_tag(:or) + + l_not = + choice([string("NOT"), string("!"), string("-")]) + |> unwrap_and_tag(:not) + + lparen = string("(") |> unwrap_and_tag(:lparen) + rparen = string(")") |> unwrap_and_tag(:rparen) + + number = + optional(ascii_char('-+')) + |> ascii_char([?0..?9]) + |> times(min: 1) + |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1)) + |> reduce(:to_number) + + boost = ignore(string("^")) |> unwrap_and_tag(number, :boost) + fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz) + + space = + choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) + |> ignore() + + quot = string("\"") + + quoted_term = + ignore(quot) + |> choice([ + ignore(string("\\")) |> string("\""), + ignore(string("\\")) |> string("\\"), + string("\\") |> utf8_char([]), + utf8_char(not: ?") + ]) + |> times(min: 1) + |> ignore(quot) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:term) + + stop_words = choice([ + string("\\") |> eos(), + string(","), + concat(space, l_and), + concat(space, l_or), + concat(space, l_not), + rparen, + fuzz, + boost + ]) + + defcombinatorp :simple_term, + lookahead_not(stop_words) + |> choice([ + string("\\") |> utf8_char([]), + string("(") |> parsec(:simple_term) |> string(")"), + utf8_char([]), + ]) + |> times(min: 1) + + unquoted_term = + parsec(:simple_term) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:term) + + outer = choice([ + l_and, + l_or, + l_not, + lparen, + rparen, + boost, + fuzz, + space, + quoted_term, + unquoted_term + ]) + + search = + times(outer, min: 1) + |> eos() + + defparsec :search, search end \ No newline at end of file diff --git a/lib/philomena/search/lexer_two.ex b/lib/philomena/search/lexer_two.ex deleted file mode 100644 index 2041f999..00000000 --- a/lib/philomena/search/lexer_two.ex +++ /dev/null @@ -1,103 +0,0 @@ -defmodule Philomena.Search.LexerTwo do - import NimbleParsec - - defp to_number(term) do - {float_val, _} = :string.to_float(term) - {int_val, _} = :string.to_integer(term) - - cond do - is_float(float_val) -> - float_val - is_integer(int_val) -> - int_val - end - end - - l_and = - choice([string("AND"), string("&&"), string(",")]) - |> unwrap_and_tag(:and) - - l_or = - choice([string("OR"), string("||")]) - |> unwrap_and_tag(:or) - - l_not = - choice([string("NOT"), string("!"), string("-")]) - |> unwrap_and_tag(:not) - - lparen = string("(") |> unwrap_and_tag(:lparen) - rparen = string(")") |> unwrap_and_tag(:rparen) - - number = - optional(ascii_char('-+')) - |> ascii_char([?0..?9]) - |> times(min: 1) - |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1)) - |> reduce(:to_number) - - boost = ignore(string("^")) |> unwrap_and_tag(number, :boost) - fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz) - - space = - choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) - |> ignore() - - quot = string("\"") - - quoted_term = - ignore(quot) - |> choice([ - ignore(string("\\")) |> string("\""), - ignore(string("\\")) |> string("\\"), - string("\\") |> utf8_char([]), - utf8_char(not: ?") - ]) - |> times(min: 1) - |> ignore(quot) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:term) - - stop_words = choice([ - string("\\") |> eos(), - string(","), - concat(space, l_and), - concat(space, l_or), - concat(space, l_not), - rparen, - fuzz, - boost - ]) - - defcombinatorp :simple_term, - lookahead_not(stop_words) - |> choice([ - string("\\") |> utf8_char([]), - string("(") |> parsec(:simple_term) |> string(")"), - utf8_char([]), - ]) - |> times(min: 1) - - unquoted_term = - parsec(:simple_term) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:term) - - outer = choice([ - l_and, - l_or, - l_not, - lparen, - rparen, - boost, - fuzz, - space, - quoted_term, - unquoted_term - ]) - - search = - times(outer, min: 1) - |> eos() - - defparsec :search, search -end \ No newline at end of file diff --git a/lib/philomena/search/term_lexer.ex b/lib/philomena/search/term_lexer.ex deleted file mode 100644 index f27532ae..00000000 --- a/lib/philomena/search/term_lexer.ex +++ /dev/null @@ -1,247 +0,0 @@ -""" -defmodule Philomena.Search.TermLexer do - def lex(opts, input) do - {:ok | accept(opts, input, :literal)} - #rescue - # e in ArgumentError -> - # {:error, e.message} - # _ -> - # {:error, "Parsing error."} - end - - # - # Literal fields - # - - defp accept([field | r_fields], opts, input, :literal_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:literal_field, field} | accept(rest, :literal)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:literal_field, field} | accept(rest, :literal)] - _ -> - accept(r_fields, opts, input, :literal_field) - end - end - defp accept([], %{boolean_fields: fields} = opts, input, :literal_field), do: accept(fields, opts, input, :boolean_field) - - # - # Boolean fields - # - - defp accept([field | r_fields], opts, input, :boolean_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:boolean_field, field} | accept(rest, :boolean)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:boolean_field, field} | accept(rest, :boolean)] - _ -> - accept(r_fields, opts, input, :boolean_field) - end - end - defp accept([], %{ngram_fields: fields} = opts, input, :boolean_field), do: accept(fields, opts, input, :ngram_field) - - # - # NLP-analyzed fields - # - - defp accept([field | r_fields], opts, input, :ngram_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:ngram_field, field} | accept(rest, :literal)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:ngram_field, field} | accept(rest, :literal)] - _ -> - accept(r_fields, opts, input, :ngram_field) - end - end - defp accept([], %{ip_fields: fields} = opts, input, :ngram_field), do: accept(fields, opts, input, :ip_fieldngram - - # - # IP address and CIDR range fields - # - - defp accept([field | r_fields], opts, input, :ip_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:ip_field, field} | accept(rest, :ip)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:ip_field, field} | accept(rest, :ip)] - _ -> - accept(r_fields, opts, input, :ip_field) - end - end - defp accept([], %{int_fields: fields} = opts, input, :ip_field), do: accept(fields, opts, input, :int_field) - - # - # Integer fields - # - - defp accept([field | r_fields], opts, input, :int_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:int_field, field}, {:range, :eq} | accept(rest, :int)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:int_field, field}, {:range, :eq} | accept(rest, :int)] - <<^field::binary-size(sz), ".lt:", rest::binary>> -> - [{:int_field, field}, {:range, :lt} | accept(rest, :int)] - <<^field::binary-size(sz), ".lte:", rest::binary>> -> - [{:int_field, field}, {:range, :lte}. accept(rest, :int)] - <<^field::binary-size(sz), ".gt:", rest::binary>> -> - [{:int_field, field}, {:range, :gt} | accept(rest, :int)] - <<^field::binary-size(sz), ".gte:", rest::binary>> -> - [{:int_field, field}, {:range, :gte} | accept(rest, :int)] - _ -> - accept(r_fields, opts, input, :int_field) - end - end - defp accept([], %{float_fields: fields} = opts, input, :int_field), do: accept(fields, opts, input, :float_field) - - # - # Float fields - # - - defp accept([field | r_fields], opts, input, :float_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:float_field, field}, {:range, :eq} | accept(rest, :float)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:float_field, field}, {:range, :eq} | accept(rest, :float)] - <<^field::binary-size(sz), ".lt:", rest::binary>> -> - [{:float_field, field}, {:range, :lt} | accept(rest, :float)] - <<^field::binary-size(sz), ".lte:", rest::binary>> -> - [{:float_field, field}, {:range, :lte} | accept(rest, :float)] - <<^field::binary-size(sz), ".gt:", rest::binary>> -> - [{:float_field, field}, {:range, :gt} | accept(rest, :float)] - <<^field::binary-size(sz), ".gte:", rest::binary>> -> - [{:float_field, field}, {:range, :gte} | accept(rest, :float)] - _ -> - accept(r_fields, opts, input, :float_field) - end - end - defp accept([], %{date_fields: fields} = opts, input, :float_field), do: accept(fields, opts, input, :date_field) - - # - # Date fields - # - - defp accept([field | r_fields], opts, input, :date_field) do - sz = field |> byte_size - - case input do - <<^field::binary-size(sz), ":", rest::binary>> -> - [{:date_field, field}, {:range, :eq} | accept(rest, :date)] - <<^field::binary-size(sz), ".eq:", rest::binary>> -> - [{:date_field, field}, {:range, :eq} | accept(rest, :date)] - <<^field::binary-size(sz), ".lt:", rest::binary>> -> - [{:date_field, field}, {:range, :lt} | accept(rest, :date)] - <<^field::binary-size(sz), ".lte:", rest::binary>> -> - [{:date_field, field}, {:range, :lte} | accept(rest, :date)] - <<^field::binary-size(sz), ".gt:", rest::binary>> -> - [{:date_field, field}, {:range, :gt} | accept(rest, :date)] - <<^field::binary-size(sz), ".gte:", rest::binary>> -> - [{:date_field, field}, {:range, :gte} | accept(rest, :date)] - _ -> - accept(r_fields, opts, input, :date_field) - end - end - - # - # Default field handling - # - - defp accept([], %{default_field: field} = opts, input, :date_field) do - [{:literal_field, field} | accept(input, :literal)] - end - - # - # Text and wildcarded text - # - - defp accept(input, :literal), do: accept(input, "", :literal) - - defp accept(<<"\\", c::utf8, rest::binary>>, term, :literal), do: accept(rest, term <> <>, :literal) - defp accept(<>, term, :literal) when c in '*?', do: accept(rest, term <> <>, :wildcard) - defp accept(<>, term, :literal), do: accept(rest, term <> <>, :literal) - defp accept(<<>>, term, :literal), do: [literal: term] - - defp accept(<<"\\", c::utf8, rest::binary>>, term, :wildcard) when c in '*?', do: accept(rest, term <> <<"\\", c::utf8>>, :wildcard) - defp accept(<<"\\", c::utf8, rest::binary>>, term, :wildcard), do: accept(rest, term <> <>, :wildcard) - defp accept(<>, term, :wildcard), do: accept(rest, term <> <>, :wildcard) - defp accept(<<>>, term, :wildcard), do: [wildcard: term] - - # - # Booleans - # - - defp accept("true", :boolean), do: [boolean: true] - defp accept("false", :boolean), do: [boolean: false] - defp accept(input, :boolean), do: raise ArgumentError, "Expected a boolean, got `\#{input}'." - - # - # Floats (integers are also considered valid) - # - - defp accept(<<"+", rest::binary>>, :float), do: accept(rest, "", :float_w) - defp accept(<<"-", rest::binary>>, :float), do: accept(rest, "-", :float_w) - defp accept(input, :float), do: accept(input, "", :float_w) - - defp accept(<>, term, :float_w) when c in ?0..?9, do: accept(rest, term <> <>, :float_w) - defp accept(<<".", rest::binary>>, term, :float_w), do: accept(rest, term <> ".", :float_f) - defp accept(<>, term, :float_w), do: raise ArgumentError, "Expected a float, got `\#{<>}'." - defp accept(<<>>, term, :float_w), do: [float: to_number(term)] - - defp accept(<>, term, :float_f) when c in ?0..?9, do: accept(rest, term <> <>, :float_f) - defp accept(<>, term, :float_f), do: raise ArgumentError, "Expected a float, got `\#{<>}'." - defp accept(<<>>, term, :float_f), do: [float: to_number(term)] - - # - # Integers - # - - defp accept(<<"+", rest::binary>>, :int), do: accept(rest, "", :int_w) - defp accept(<<"-", rest::binary>>, :int), do: accept(rest, "-", :int_w) - defp accept(input, :int), do: accept(input, "", :int_w) - - defp accept(<>, term, :int_w) when c in ?0..?9, do: accept(rest, term <> <>, :int_w) - defp accept(<>, term, :int_w), do: raise ArgumentError, "Expected an integer, got `\#{<>}'." - defp accept(<<>>, term, :int_w), do: [int: to_number(term)] - - # - # IP addresses - # - - defp accept(<>, :ip) when c1 in ?0..9 and c2 in ?0..?9 and c3 in ?0..?9, do: accept({}) - defp accept(<<"::ffff:", c1::utf8, c2::utf8, c3::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9 and c3 in ?0..?9, do: accept({}) - defp accept(<>, :ip) when c1 in ?0..9 and c2 in ?0..?9, do: accept({}) - defp accept(<<"::ffff:",c1::utf8, c2::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9, do: accept({}) - defp accept(<>, :ip) when c1 in ?0..9, do: accept({}) - defp accept(<<"::ffff:", c1::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9, do: accept({}) - - defp to_number(term) do - {float_val, _} = :string.to_float(term) - {int_val, _} = :string.to_integer(term) - - cond do - is_float(float_val) -> - float_val - is_integer(int_val) -> - int_val - true -> - raise ArgumentError, "Expected a number." - end - end -end -""" \ No newline at end of file