This commit is contained in:
byte[] 2019-08-21 21:07:58 -04:00
parent aef0e7f7d5
commit 108377462e
3 changed files with 60 additions and 38 deletions

View file

@ -8,6 +8,7 @@ defmodule Philomena.Search.Lexer do
cond do cond do
is_float(float_val) -> is_float(float_val) ->
float_val float_val
is_integer(int_val) -> is_integer(int_val) ->
int_val int_val
end end
@ -44,7 +45,7 @@ defmodule Philomena.Search.Lexer do
quot = string("\"") quot = string("\"")
quoted_term = quoted_term =
ignore(quot) ignore(quot)
|> choice([ |> choice([
ignore(string("\\")) |> string("\""), ignore(string("\\")) |> string("\""),
@ -57,47 +58,51 @@ defmodule Philomena.Search.Lexer do
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> unwrap_and_tag(:term) |> unwrap_and_tag(:term)
stop_words = choice([ stop_words =
string("\\") |> eos(), choice([
string(","), string("\\") |> eos(),
concat(space, l_and), string(","),
concat(space, l_or), concat(space, l_and),
concat(space, l_not), concat(space, l_or),
rparen, concat(space, l_not),
fuzz, rparen,
boost fuzz,
]) boost
])
defcombinatorp :simple_term, defcombinatorp(
:simple_term,
lookahead_not(stop_words) lookahead_not(stop_words)
|> choice([ |> choice([
string("\\") |> utf8_char([]), string("\\") |> utf8_char([]),
string("(") |> parsec(:simple_term) |> string(")"), string("(") |> parsec(:simple_term) |> string(")"),
utf8_char([]), utf8_char([])
]) ])
|> times(min: 1) |> times(min: 1)
)
unquoted_term = unquoted_term =
parsec(:simple_term) parsec(:simple_term)
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> unwrap_and_tag(:term) |> unwrap_and_tag(:term)
outer = choice([ outer =
l_and, choice([
l_or, l_and,
l_not, l_or,
lparen, l_not,
rparen, lparen,
boost, rparen,
fuzz, boost,
space, fuzz,
quoted_term, space,
unquoted_term quoted_term,
]) unquoted_term
])
search = search =
times(outer, min: 1) times(outer, min: 1)
|> eos() |> eos()
defparsec :search, search defparsec(:search, search)
end end

View file

@ -1,13 +1,12 @@
defmodule Philomena.Search.Parser do defmodule Philomena.Search.Parser do
alias Philomena.Search.Lexer
def parse(ctx, tokens) do def parse(ctx, tokens) do
{tree, [eof: "$"]} = search_top(ctx, tokens) {tree, []} = search_top(ctx, tokens)
{:ok, tree} {:ok, tree}
rescue rescue
e in ArgumentError -> e in ArgumentError ->
{:error, e.message} {:error, e.message}
_ -> _ ->
{:error, "Parsing error."} {:error, "Parsing error."}
end end
@ -20,11 +19,13 @@ defmodule Philomena.Search.Parser do
# #
# Boolean OR # Boolean OR
# #
defp search_or(ctx, tokens) do defp search_or(ctx, tokens) do
case search_and(ctx, tokens) do case search_and(ctx, tokens) do
{left, [{:or, _} | r_tokens]} -> {left, [{:or, _} | r_tokens]} ->
{right, rest} = search_top(ctx, r_tokens) {right, rest} = search_top(ctx, r_tokens)
{%{bool: %{should: [left, right]}}, rest} {%{bool: %{should: [left, right]}}, rest}
{child, rest} -> {child, rest} ->
{child, rest} {child, rest}
end end
@ -33,11 +34,13 @@ defmodule Philomena.Search.Parser do
# #
# Boolean AND # Boolean AND
# #
defp search_and(ctx, tokens) do defp search_and(ctx, tokens) do
case search_boost(ctx, tokens) do case search_boost(ctx, tokens) do
{left, [{:and, _} | r_tokens]} -> {left, [{:and, _} | r_tokens]} ->
{right, rest} = search_top(ctx, r_tokens) {right, rest} = search_top(ctx, r_tokens)
{%{bool: %{must: [left, right]}}, rest} {%{bool: %{must: [left, right]}}, rest}
{child, rest} -> {child, rest} ->
{child, rest} {child, rest}
end end
@ -46,10 +49,12 @@ defmodule Philomena.Search.Parser do
# #
# Subquery score boosting # Subquery score boosting
# #
defp search_boost(ctx, tokens) do defp search_boost(ctx, tokens) do
case search_not(ctx, tokens) do case search_not(ctx, tokens) do
{child, [{:boost, _}, {:float, value} | r_tokens]} -> {child, [{:boost, _}, {:float, value} | r_tokens]} ->
{%{function_score: %{query: child, boost_factor: value}}, r_tokens} {%{function_score: %{query: child, boost_factor: value}}, r_tokens}
{child, rest} -> {child, rest} ->
{child, rest} {child, rest}
end end
@ -58,40 +63,52 @@ defmodule Philomena.Search.Parser do
# #
# Boolean NOT # Boolean NOT
# #
defp search_not(ctx, [{:not, _} | r_tokens]) do defp search_not(ctx, [{:not, _} | r_tokens]) do
{child, rest} = search_top(ctx, r_tokens) {child, rest} = search_top(ctx, r_tokens)
{%{bool: %{must_not: child}}, rest} {%{bool: %{must_not: child}}, rest}
end end
defp search_not(ctx, tokens), do: search_group(ctx, tokens) defp search_not(ctx, tokens), do: search_group(ctx, tokens)
# #
# Logical grouping # Logical grouping
# #
defp search_group(ctx, [{:lparen, _} | rest]) do defp search_group(ctx, [{:lparen, _} | rest]) do
case search_top(ctx, rest) do case search_top(ctx, rest) do
{child, [{:rparen, _} | r_tokens]} -> {child, [{:rparen, _} | r_tokens]} ->
{child, r_tokens} {child, r_tokens}
_ -> _ ->
raise ArgumentError, "Imbalanced parentheses." raise ArgumentError, "Imbalanced parentheses."
end end
end end
defp search_group(_ctx, [{:rparen, _} | _rest]), do: raise ArgumentError, "Imbalanced parentheses."
defp search_group(_ctx, [{:rparen, _} | _rest]),
do: raise(ArgumentError, "Imbalanced parentheses.")
defp search_group(ctx, tokens), do: search_fuzz(ctx, tokens) defp search_group(ctx, tokens), do: search_fuzz(ctx, tokens)
# #
# Term fuzzing # Term fuzzing
# #
defp search_fuzz(ctx, tokens) do defp search_fuzz(ctx, tokens) do
nil search_term(ctx, tokens)
end end
# #
# Search terms # Search terms
# #
defp search_term(ctx, [{:term, t} | rest]) do
{TermParser.parse(ctx, t), rest} defp search_term(_ctx, [{:term, _t} | rest]) do
{[], rest}
end end
defp search_term(_ctx, [eof: "$"]), do: raise ArgumentError, "Expected a term, got <end of input>."
defp search_term(_ctx, [{_, text} | _rest]), do: raise ArgumentError, "Expected a term, got `#{text}'." defp search_term(_ctx, []), do: raise(ArgumentError, "Expected a term, got <end of input>.")
end
defp search_term(_ctx, [{_, text} | _rest]),
do: raise(ArgumentError, "Expected a term, got `#{text}'.")
end

View file

@ -50,7 +50,7 @@ defmodule Philomena.MixProject do
{:pot, "~> 0.10.1"}, {:pot, "~> 0.10.1"},
{:secure_compare, "~> 0.1.0"}, {:secure_compare, "~> 0.1.0"},
{:elastix, "~> 0.7.1"}, {:elastix, "~> 0.7.1"},
{:nimble_parsec, "~> 0.5.1"}, {:nimble_parsec, "~> 0.5.1"}
] ]
end end