philomena/lib/search/lexer.ex

119 lines
2.3 KiB
Elixir
Raw Normal View History

2019-11-02 19:34:25 +01:00
defmodule Search.Lexer do
import NimbleParsec
defp to_number(input), do: Search.Helpers.to_number(input)
2019-11-02 19:34:25 +01:00
space =
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|> ignore()
2019-11-02 19:34:25 +01:00
float =
optional(ascii_char('-+'))
|> ascii_string([?0..?9], min: 1)
|> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1))
|> reduce({List, :to_string, []})
|> reduce(:to_number)
l_and =
times(space, min: 1)
|> choice([string("AND"), string("&&")])
|> times(space, min: 1)
|> unwrap_and_tag(:and)
l_comma =
string(",")
2019-11-02 19:34:25 +01:00
|> unwrap_and_tag(:and)
l_or =
times(space, min: 1)
|> choice([string("OR"), string("||")])
|> times(space, min: 1)
2019-11-02 19:34:25 +01:00
|> unwrap_and_tag(:or)
l_not =
string("NOT")
|> times(space, min: 1)
|> unwrap_and_tag(:not)
l_negate =
choice([string("!"), string("-")])
2019-11-02 19:34:25 +01:00
|> unwrap_and_tag(:not)
lparen = string("(") |> unwrap_and_tag(:lparen)
rparen = string(")") |> unwrap_and_tag(:rparen)
quot = string("\"")
boost =
ignore(string("^"))
|> concat(float)
|> unwrap_and_tag(:boost)
stop_words =
choice([
l_comma,
2019-11-02 19:34:25 +01:00
l_and,
l_or,
repeat(space) |> concat(rparen),
repeat(space) |> concat(boost)
2019-11-02 19:34:25 +01:00
])
defcombinatorp(
:dirty_text,
lookahead_not(stop_words)
|> choice([
string("\\") |> utf8_char([]),
string("(") |> parsec(:dirty_text) |> string(")"),
utf8_char(not: ?(..?))
])
|> times(min: 1)
)
text =
parsec(:dirty_text)
|> reduce({List, :to_string, []})
2019-11-02 21:31:55 +01:00
|> unwrap_and_tag(:term)
|> label("a term, like `safe'")
2019-11-02 19:34:25 +01:00
quoted_text =
ignore(quot)
2020-01-11 05:20:19 +01:00
|> repeat(
choice([
ignore(string("\\")) |> string("\""),
ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]),
utf8_char(not: ?")
])
)
2019-11-02 19:34:25 +01:00
|> ignore(quot)
|> reduce({List, :to_string, []})
2019-11-02 21:31:55 +01:00
|> unwrap_and_tag(:term)
|> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|)
2019-11-02 19:34:25 +01:00
term =
choice([
quoted_text,
text
])
outer =
choice([
l_comma,
l_negate,
2019-11-02 19:34:25 +01:00
l_and,
l_or,
l_not,
lparen,
rparen,
boost,
space,
term
])
search =
repeat(outer)
|> eos()
2020-01-11 05:20:19 +01:00
defparsec(:lex, search)
end