rename stuff

This commit is contained in:
byte[] 2019-08-21 20:43:34 -04:00
parent 0188a27893
commit aef0e7f7d5
3 changed files with 89 additions and 453 deletions

View file

@ -1,105 +1,5 @@
defmodule Philomena.Search.Lexer do
def lex(input) do
{:ok, accept(input |> to_charlist, :outer)}
rescue
e in ArgumentError ->
{:error, e.message}
_ ->
{:error, "Parsing error."}
end
#
# Outer state (not inside a term, spaces irrelevant)
#
defp accept([], :outer), do: [eof: "$"]
defp accept('AND' ++ rest, :outer), do: [{:and, "AND"} | accept(rest, :outer)]
defp accept('NOT' ++ rest, :outer), do: [{:not, "NOT"} | accept(rest, :outer)]
defp accept('OR' ++ rest, :outer), do: [{:or, "OR"} | accept(rest, :outer)]
defp accept('||' ++ rest, :outer), do: [{:or, "||"} | accept(rest, :outer)]
defp accept('&&' ++ rest, :outer), do: [{:and, "&&"} | accept(rest, :outer)]
defp accept(',' ++ rest, :outer), do: [{:and, ","} | accept(rest, :outer)]
defp accept('!' ++ rest, :outer), do: [{:not, "!"} | accept(rest, :outer)]
defp accept('-' ++ rest, :outer), do: [{:not, "-"} | accept(rest, :outer)]
defp accept('(' ++ rest, :outer), do: [{:lparen, "("} | accept(rest, :outer)]
defp accept(')' ++ rest, :outer), do: [{:rparen, ")"} | accept(rest, :outer)]
defp accept('^' ++ rest, :outer), do: [{:boost, "^"} | accept(rest, :float)]
defp accept('~' ++ rest, :outer), do: [{:fuzz, "~"} | accept(rest, :float)]
defp accept(' ' ++ rest, :outer), do: accept(rest, :outer)
defp accept('\t' ++ rest, :outer), do: accept(rest, :outer)
defp accept('\n' ++ rest, :outer), do: accept(rest, :outer)
defp accept('\r' ++ rest, :outer), do: accept(rest, :outer)
defp accept('\v' ++ rest, :outer), do: accept(rest, :outer)
defp accept('\f' ++ rest, :outer), do: accept(rest, :outer)
defp accept('"' ++ rest, :outer), do: accept(rest, "", :quoted_term)
defp accept(input, :outer), do: accept(input, "", 0, :term)
#
# Quoted term state
#
defp accept('\\"' ++ rest, term, :quoted_term), do: accept(rest, term <> "\"", :quoted_term)
defp accept('\\', _term, :quoted_term), do: raise ArgumentError, "Unpaired backslash."
defp accept('"' ++ rest, term, :quoted_term), do: [{:term, term} | accept(rest, :outer)]
defp accept([c] ++ rest, term, :quoted_term), do: accept(rest, term <> <<c::utf8>>, :quoted_term)
defp accept([], _term, :quoted_term), do: raise ArgumentError, "Imbalanced quotes."
#
# Term state
#
defp accept([?\\, c] ++ rest, term, depth, :term) when c in '()\\', do: accept(rest, term <> <<c::utf8>>, depth, :term)
defp accept('\\', _term, _depth, :term), do: raise ArgumentError, "Unpaired backslash."
defp accept('(' ++ rest, term, depth, :term), do: accept(rest, term <> "(", depth + 1, :term)
defp accept(')' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:rparen, ")"} | accept(rest, :outer)]
defp accept(')' ++ rest, term, depth, :term), do: accept(rest, term <> ")", depth - 1, :term)
defp accept(' AND' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, "AND"} | accept(rest, :outer)]
defp accept(' OR' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:or, "OR"} | accept(rest, :outer)]
defp accept(' &&' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, "&&"} | accept(rest, :outer)]
defp accept(' ||' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:or, "||"} | accept(rest, :outer)]
defp accept(',' ++ rest, term, 0, :term), do: [{:term, String.trim(term)}, {:and, ","} | accept(rest, :outer)]
defp accept([?^, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:boost, "^"} | accept([c | rest], :float)]
defp accept([?~, c] ++ rest, term, 0, :term) when c in '+-0123456789', do: [{:term, term}, {:fuzz, "~"} | accept([c | rest], :float)]
defp accept('^' ++ rest, term, depth, :term), do: accept(rest, term <> "^", depth, :term)
defp accept('~' ++ rest, term, depth, :term), do: accept(rest, term <> "~", depth, :term)
defp accept([c] ++ rest, term, depth, :term), do: accept(rest, term <> <<c::utf8>>, depth, :term)
defp accept([], term, 0, :term), do: [term: String.trim(term), eof: "$"]
defp accept([], _term, _depth, :term), do: raise ArgumentError, "Imbalanced parentheses."
#
# Number state (for boosting, fuzzing)
#
defp accept('+' ++ rest, :float), do: accept(rest, "", :float_w)
defp accept('-' ++ rest, :float), do: accept(rest, "-", :float_w)
defp accept(input, :float), do: accept(input, "", :float_w)
defp accept([c] ++ rest, term, :float_w) when c in ?0..?9, do: accept(rest, term <> <<c::utf8>>, :float_w)
defp accept('.' ++ rest, term, :float_w), do: accept(rest, term <> ".", :float_f)
defp accept(')' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)]
defp accept(' AND' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)]
defp accept(' OR' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)]
defp accept(' &&' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)]
defp accept(' ||' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)]
defp accept(',' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)]
defp accept('^' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)]
defp accept('~' ++ rest, term, :float_w), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)]
defp accept(' ' ++ rest, term, :float_w), do: [{:float, to_number(term)} | accept(rest, :outer)]
defp accept([], term, :float_w), do: [float: to_number(term), eof: "$"]
defp accept(_input, _term, :float_w), do: raise ArgumentError, "Expected a number."
defp accept([c] ++ rest, term, :float_f) when c in ?0..?9, do: accept(rest, term <> <<c::utf8>>, :float_f)
defp accept(')' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)]
defp accept(' AND' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:rparen, ")"} | accept(rest, :outer)]
defp accept(' OR' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "AND"} | accept(rest, :outer)]
defp accept(' &&' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:or, "||"} | accept(rest, :outer)]
defp accept(' ||' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, "&&"} | accept(rest, :outer)]
defp accept(',' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:and, ","} | accept(rest, :outer)]
defp accept('^' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:boost, "^"} | accept(rest, :float)]
defp accept('~' ++ rest, term, :float_f), do: [{:float, to_number(term)}, {:fuzz, "~"} | accept(rest, :float)]
defp accept(' ' ++ rest, term, :float_f), do: [{:float, to_number(term)} | accept(rest, :outer)]
defp accept([], term, :float_f), do: [float: to_number(term), eof: "$"]
defp accept(_input, _term, :float_f), do: raise ArgumentError, "Expected a number."
import NimbleParsec
defp to_number(term) do
{float_val, _} = :string.to_float(term)
@ -110,8 +10,94 @@ defmodule Philomena.Search.Lexer do
float_val
is_integer(int_val) ->
int_val
true ->
raise ArgumentError, "Expected a number."
end
end
l_and =
choice([string("AND"), string("&&"), string(",")])
|> unwrap_and_tag(:and)
l_or =
choice([string("OR"), string("||")])
|> unwrap_and_tag(:or)
l_not =
choice([string("NOT"), string("!"), string("-")])
|> unwrap_and_tag(:not)
lparen = string("(") |> unwrap_and_tag(:lparen)
rparen = string(")") |> unwrap_and_tag(:rparen)
number =
optional(ascii_char('-+'))
|> ascii_char([?0..?9])
|> times(min: 1)
|> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1))
|> reduce(:to_number)
boost = ignore(string("^")) |> unwrap_and_tag(number, :boost)
fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz)
space =
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|> ignore()
quot = string("\"")
quoted_term =
ignore(quot)
|> choice([
ignore(string("\\")) |> string("\""),
ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]),
utf8_char(not: ?")
])
|> times(min: 1)
|> ignore(quot)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:term)
stop_words = choice([
string("\\") |> eos(),
string(","),
concat(space, l_and),
concat(space, l_or),
concat(space, l_not),
rparen,
fuzz,
boost
])
defcombinatorp :simple_term,
lookahead_not(stop_words)
|> choice([
string("\\") |> utf8_char([]),
string("(") |> parsec(:simple_term) |> string(")"),
utf8_char([]),
])
|> times(min: 1)
unquoted_term =
parsec(:simple_term)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:term)
outer = choice([
l_and,
l_or,
l_not,
lparen,
rparen,
boost,
fuzz,
space,
quoted_term,
unquoted_term
])
search =
times(outer, min: 1)
|> eos()
defparsec :search, search
end

View file

@ -1,103 +0,0 @@
defmodule Philomena.Search.LexerTwo do
import NimbleParsec
defp to_number(term) do
{float_val, _} = :string.to_float(term)
{int_val, _} = :string.to_integer(term)
cond do
is_float(float_val) ->
float_val
is_integer(int_val) ->
int_val
end
end
l_and =
choice([string("AND"), string("&&"), string(",")])
|> unwrap_and_tag(:and)
l_or =
choice([string("OR"), string("||")])
|> unwrap_and_tag(:or)
l_not =
choice([string("NOT"), string("!"), string("-")])
|> unwrap_and_tag(:not)
lparen = string("(") |> unwrap_and_tag(:lparen)
rparen = string(")") |> unwrap_and_tag(:rparen)
number =
optional(ascii_char('-+'))
|> ascii_char([?0..?9])
|> times(min: 1)
|> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1))
|> reduce(:to_number)
boost = ignore(string("^")) |> unwrap_and_tag(number, :boost)
fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz)
space =
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|> ignore()
quot = string("\"")
quoted_term =
ignore(quot)
|> choice([
ignore(string("\\")) |> string("\""),
ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]),
utf8_char(not: ?")
])
|> times(min: 1)
|> ignore(quot)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:term)
stop_words = choice([
string("\\") |> eos(),
string(","),
concat(space, l_and),
concat(space, l_or),
concat(space, l_not),
rparen,
fuzz,
boost
])
defcombinatorp :simple_term,
lookahead_not(stop_words)
|> choice([
string("\\") |> utf8_char([]),
string("(") |> parsec(:simple_term) |> string(")"),
utf8_char([]),
])
|> times(min: 1)
unquoted_term =
parsec(:simple_term)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:term)
outer = choice([
l_and,
l_or,
l_not,
lparen,
rparen,
boost,
fuzz,
space,
quoted_term,
unquoted_term
])
search =
times(outer, min: 1)
|> eos()
defparsec :search, search
end

View file

@ -1,247 +0,0 @@
"""
defmodule Philomena.Search.TermLexer do
def lex(opts, input) do
{:ok | accept(opts, input, :literal)}
#rescue
# e in ArgumentError ->
# {:error, e.message}
# _ ->
# {:error, "Parsing error."}
end
#
# Literal fields
#
defp accept([field | r_fields], opts, input, :literal_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:literal_field, field} | accept(rest, :literal)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:literal_field, field} | accept(rest, :literal)]
_ ->
accept(r_fields, opts, input, :literal_field)
end
end
defp accept([], %{boolean_fields: fields} = opts, input, :literal_field), do: accept(fields, opts, input, :boolean_field)
#
# Boolean fields
#
defp accept([field | r_fields], opts, input, :boolean_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:boolean_field, field} | accept(rest, :boolean)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:boolean_field, field} | accept(rest, :boolean)]
_ ->
accept(r_fields, opts, input, :boolean_field)
end
end
defp accept([], %{ngram_fields: fields} = opts, input, :boolean_field), do: accept(fields, opts, input, :ngram_field)
#
# NLP-analyzed fields
#
defp accept([field | r_fields], opts, input, :ngram_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:ngram_field, field} | accept(rest, :literal)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:ngram_field, field} | accept(rest, :literal)]
_ ->
accept(r_fields, opts, input, :ngram_field)
end
end
defp accept([], %{ip_fields: fields} = opts, input, :ngram_field), do: accept(fields, opts, input, :ip_fieldngram
#
# IP address and CIDR range fields
#
defp accept([field | r_fields], opts, input, :ip_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:ip_field, field} | accept(rest, :ip)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:ip_field, field} | accept(rest, :ip)]
_ ->
accept(r_fields, opts, input, :ip_field)
end
end
defp accept([], %{int_fields: fields} = opts, input, :ip_field), do: accept(fields, opts, input, :int_field)
#
# Integer fields
#
defp accept([field | r_fields], opts, input, :int_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:int_field, field}, {:range, :eq} | accept(rest, :int)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:int_field, field}, {:range, :eq} | accept(rest, :int)]
<<^field::binary-size(sz), ".lt:", rest::binary>> ->
[{:int_field, field}, {:range, :lt} | accept(rest, :int)]
<<^field::binary-size(sz), ".lte:", rest::binary>> ->
[{:int_field, field}, {:range, :lte}. accept(rest, :int)]
<<^field::binary-size(sz), ".gt:", rest::binary>> ->
[{:int_field, field}, {:range, :gt} | accept(rest, :int)]
<<^field::binary-size(sz), ".gte:", rest::binary>> ->
[{:int_field, field}, {:range, :gte} | accept(rest, :int)]
_ ->
accept(r_fields, opts, input, :int_field)
end
end
defp accept([], %{float_fields: fields} = opts, input, :int_field), do: accept(fields, opts, input, :float_field)
#
# Float fields
#
defp accept([field | r_fields], opts, input, :float_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:float_field, field}, {:range, :eq} | accept(rest, :float)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:float_field, field}, {:range, :eq} | accept(rest, :float)]
<<^field::binary-size(sz), ".lt:", rest::binary>> ->
[{:float_field, field}, {:range, :lt} | accept(rest, :float)]
<<^field::binary-size(sz), ".lte:", rest::binary>> ->
[{:float_field, field}, {:range, :lte} | accept(rest, :float)]
<<^field::binary-size(sz), ".gt:", rest::binary>> ->
[{:float_field, field}, {:range, :gt} | accept(rest, :float)]
<<^field::binary-size(sz), ".gte:", rest::binary>> ->
[{:float_field, field}, {:range, :gte} | accept(rest, :float)]
_ ->
accept(r_fields, opts, input, :float_field)
end
end
defp accept([], %{date_fields: fields} = opts, input, :float_field), do: accept(fields, opts, input, :date_field)
#
# Date fields
#
defp accept([field | r_fields], opts, input, :date_field) do
sz = field |> byte_size
case input do
<<^field::binary-size(sz), ":", rest::binary>> ->
[{:date_field, field}, {:range, :eq} | accept(rest, :date)]
<<^field::binary-size(sz), ".eq:", rest::binary>> ->
[{:date_field, field}, {:range, :eq} | accept(rest, :date)]
<<^field::binary-size(sz), ".lt:", rest::binary>> ->
[{:date_field, field}, {:range, :lt} | accept(rest, :date)]
<<^field::binary-size(sz), ".lte:", rest::binary>> ->
[{:date_field, field}, {:range, :lte} | accept(rest, :date)]
<<^field::binary-size(sz), ".gt:", rest::binary>> ->
[{:date_field, field}, {:range, :gt} | accept(rest, :date)]
<<^field::binary-size(sz), ".gte:", rest::binary>> ->
[{:date_field, field}, {:range, :gte} | accept(rest, :date)]
_ ->
accept(r_fields, opts, input, :date_field)
end
end
#
# Default field handling
#
defp accept([], %{default_field: field} = opts, input, :date_field) do
[{:literal_field, field} | accept(input, :literal)]
end
#
# Text and wildcarded text
#
defp accept(input, :literal), do: accept(input, "", :literal)
defp accept(<<"\\", c::utf8, rest::binary>>, term, :literal), do: accept(rest, term <> <<c::utf8>>, :literal)
defp accept(<<c::utf8, rest::binary>>, term, :literal) when c in '*?', do: accept(rest, term <> <<c::utf8>>, :wildcard)
defp accept(<<c::utf8, rest::binary>>, term, :literal), do: accept(rest, term <> <<c::utf8>>, :literal)
defp accept(<<>>, term, :literal), do: [literal: term]
defp accept(<<"\\", c::utf8, rest::binary>>, term, :wildcard) when c in '*?', do: accept(rest, term <> <<"\\", c::utf8>>, :wildcard)
defp accept(<<"\\", c::utf8, rest::binary>>, term, :wildcard), do: accept(rest, term <> <<c::utf8>>, :wildcard)
defp accept(<<c::utf8, rest::binary>>, term, :wildcard), do: accept(rest, term <> <<c::utf8>>, :wildcard)
defp accept(<<>>, term, :wildcard), do: [wildcard: term]
#
# Booleans
#
defp accept("true", :boolean), do: [boolean: true]
defp accept("false", :boolean), do: [boolean: false]
defp accept(input, :boolean), do: raise ArgumentError, "Expected a boolean, got `\#{input}'."
#
# Floats (integers are also considered valid)
#
defp accept(<<"+", rest::binary>>, :float), do: accept(rest, "", :float_w)
defp accept(<<"-", rest::binary>>, :float), do: accept(rest, "-", :float_w)
defp accept(input, :float), do: accept(input, "", :float_w)
defp accept(<<c::utf8, rest::binary>>, term, :float_w) when c in ?0..?9, do: accept(rest, term <> <<c::utf8>>, :float_w)
defp accept(<<".", rest::binary>>, term, :float_w), do: accept(rest, term <> ".", :float_f)
defp accept(<<c::utf8, rest::binary>>, term, :float_w), do: raise ArgumentError, "Expected a float, got `\#{<<term::binary, c::utf8, rest::binary>>}'."
defp accept(<<>>, term, :float_w), do: [float: to_number(term)]
defp accept(<<c::utf8, rest::binary>>, term, :float_f) when c in ?0..?9, do: accept(rest, term <> <<c::utf8>>, :float_f)
defp accept(<<c::utf8, rest::binary>>, term, :float_f), do: raise ArgumentError, "Expected a float, got `\#{<<term::binary, c::utf8, rest::binary>>}'."
defp accept(<<>>, term, :float_f), do: [float: to_number(term)]
#
# Integers
#
defp accept(<<"+", rest::binary>>, :int), do: accept(rest, "", :int_w)
defp accept(<<"-", rest::binary>>, :int), do: accept(rest, "-", :int_w)
defp accept(input, :int), do: accept(input, "", :int_w)
defp accept(<<c::utf8, rest::binary>>, term, :int_w) when c in ?0..?9, do: accept(rest, term <> <<c::utf8>>, :int_w)
defp accept(<<c::utf8, rest::binary>>, term, :int_w), do: raise ArgumentError, "Expected an integer, got `\#{<<term::binary, c::utf8, rest::binary>>}'."
defp accept(<<>>, term, :int_w), do: [int: to_number(term)]
#
# IP addresses
#
defp accept(<<c1::utf8, c2::utf8, c3::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9 and c3 in ?0..?9, do: accept({})
defp accept(<<"::ffff:", c1::utf8, c2::utf8, c3::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9 and c3 in ?0..?9, do: accept({})
defp accept(<<c1::utf8, c2::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9, do: accept({})
defp accept(<<"::ffff:",c1::utf8, c2::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9 and c2 in ?0..?9, do: accept({})
defp accept(<<c1::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9, do: accept({})
defp accept(<<"::ffff:", c1::utf8, ".", rest::binary>>, :ip) when c1 in ?0..9, do: accept({})
defp to_number(term) do
{float_val, _} = :string.to_float(term)
{int_val, _} = :string.to_integer(term)
cond do
is_float(float_val) ->
float_val
is_integer(int_val) ->
int_val
true ->
raise ArgumentError, "Expected a number."
end
end
end
"""