mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-12-03 16:17:59 +01:00
add non-macro-based search parser
This commit is contained in:
parent
6496fd30fa
commit
756599df56
11 changed files with 852 additions and 0 deletions
13
lib/search/bool_parser.ex
Normal file
13
lib/search/bool_parser.ex
Normal file
|
@ -0,0 +1,13 @@
|
|||
defmodule Search.BoolParser do
|
||||
import NimbleParsec
|
||||
|
||||
bool =
|
||||
choice([
|
||||
string("true"),
|
||||
string("false")
|
||||
])
|
||||
|> unwrap_and_tag(:bool)
|
||||
|> eos()
|
||||
|
||||
defparsec :parse, bool
|
||||
end
|
187
lib/search/date_parser.ex
Normal file
187
lib/search/date_parser.ex
Normal file
|
@ -0,0 +1,187 @@
|
|||
defmodule Search.DateParser do
|
||||
import NimbleParsec
|
||||
|
||||
defp build_datetime(naive, tz_off, tz_hour, tz_minute) do
|
||||
tz_hour =
|
||||
tz_hour
|
||||
|> Integer.to_string()
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
tz_minute =
|
||||
tz_minute
|
||||
|> Integer.to_string()
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}"
|
||||
|
||||
# Unbelievable that there is no way to build this with integer arguments.
|
||||
# WTF, Elixir?
|
||||
{:ok, datetime, _offset} = DateTime.from_iso8601(iso8601_string)
|
||||
|
||||
datetime
|
||||
end
|
||||
|
||||
defp timezone_bounds([]), do: ["+", 0, 0]
|
||||
defp timezone_bounds([tz_off, tz_hour]), do: [tz_off, tz_hour, 0]
|
||||
defp timezone_bounds([tz_off, tz_hour, tz_minute]), do: [tz_off, tz_hour, tz_minute]
|
||||
|
||||
defp date_bounds([year]) do
|
||||
lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 31_536_000, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 2_592_000, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: day, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 86400, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 3600, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour, minute]) do
|
||||
lower = %NaiveDateTime{
|
||||
year: year,
|
||||
month: month,
|
||||
day: day,
|
||||
hour: hour,
|
||||
minute: minute,
|
||||
second: 0
|
||||
}
|
||||
|
||||
upper = NaiveDateTime.add(lower, 60, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour, minute, second]) do
|
||||
lower = %NaiveDateTime{
|
||||
year: year,
|
||||
month: month,
|
||||
day: day,
|
||||
hour: hour,
|
||||
minute: minute,
|
||||
second: second
|
||||
}
|
||||
|
||||
upper = NaiveDateTime.add(lower, 1, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp absolute_datetime(opts) do
|
||||
date = Keyword.fetch!(opts, :date)
|
||||
timezone = Keyword.get(opts, :timezone, [])
|
||||
|
||||
[lower, upper] = date_bounds(date)
|
||||
[tz_off, tz_hour, tz_minute] = timezone_bounds(timezone)
|
||||
|
||||
lower = build_datetime(lower, tz_off, tz_hour, tz_minute)
|
||||
upper = build_datetime(upper, tz_off, tz_hour, tz_minute)
|
||||
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp relative_datetime([count, scale]) do
|
||||
now = NaiveDateTime.utc_now()
|
||||
|
||||
lower = NaiveDateTime.add(now, count * -scale, :second)
|
||||
upper = NaiveDateTime.add(now, (count - 1) * -scale, :second)
|
||||
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
space =
|
||||
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|
||||
|> ignore()
|
||||
|
||||
year = integer(4)
|
||||
month = integer(2)
|
||||
day = integer(2)
|
||||
|
||||
hour = integer(2)
|
||||
minute = integer(2)
|
||||
second = integer(2)
|
||||
tz_hour = integer(2)
|
||||
tz_minute = integer(2)
|
||||
|
||||
ymd_sep = ignore(string("-"))
|
||||
hms_sep = ignore(string(":"))
|
||||
iso8601_sep = ignore(choice([string("T"), string("t"), space]))
|
||||
iso8601_tzsep = choice([string("+"), string("-")])
|
||||
zulu = ignore(choice([string("Z"), string("z")]))
|
||||
|
||||
date_part =
|
||||
year
|
||||
|> optional(
|
||||
ymd_sep
|
||||
|> concat(month)
|
||||
|> optional(
|
||||
ymd_sep
|
||||
|> concat(day)
|
||||
|> optional(
|
||||
iso8601_sep
|
||||
|> optional(
|
||||
hour
|
||||
|> optional(
|
||||
hms_sep
|
||||
|> concat(minute)
|
||||
|> optional(concat(hms_sep, second))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|> tag(:date)
|
||||
|
||||
timezone_part =
|
||||
choice([
|
||||
iso8601_tzsep
|
||||
|> concat(tz_hour)
|
||||
|> optional(
|
||||
hms_sep
|
||||
|> concat(tz_minute)
|
||||
)
|
||||
|> tag(:timezone),
|
||||
zulu
|
||||
])
|
||||
|
||||
absolute_date =
|
||||
date_part
|
||||
|> optional(timezone_part)
|
||||
|> reduce(:absolute_datetime)
|
||||
|> unwrap_and_tag(:date)
|
||||
|
||||
relative_date =
|
||||
integer(min: 1)
|
||||
|> ignore(concat(space, empty()))
|
||||
|> choice([
|
||||
string("second") |> optional(string("s")) |> replace(1),
|
||||
string("minute") |> optional(string("s")) |> replace(60),
|
||||
string("hour") |> optional(string("s")) |> replace(3_600),
|
||||
string("day") |> optional(string("s")) |> replace(86_400),
|
||||
string("week") |> optional(string("s")) |> replace(604_800),
|
||||
string("month") |> optional(string("s")) |> replace(2_592_000),
|
||||
string("year") |> optional(string("s")) |> replace(31_536_000)
|
||||
])
|
||||
|> ignore(string(" ago"))
|
||||
|> reduce(:relative_datetime)
|
||||
|> unwrap_and_tag(:date)
|
||||
|
||||
date =
|
||||
choice([
|
||||
absolute_date,
|
||||
relative_date
|
||||
])
|
||||
|> eos()
|
||||
|
||||
defparsec :parse, date
|
||||
end
|
30
lib/search/float_parser.ex
Normal file
30
lib/search/float_parser.ex
Normal file
|
@ -0,0 +1,30 @@
|
|||
defmodule Search.FloatParser do
|
||||
import NimbleParsec
|
||||
import Search.Helpers
|
||||
|
||||
fuzz =
|
||||
string("~")
|
||||
|> ignore()
|
||||
|
||||
unsigned_float =
|
||||
ascii_string([?0..?9], min: 1)
|
||||
|> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_number)
|
||||
|
||||
float =
|
||||
optional(ascii_char('-+'))
|
||||
|> ascii_string([?0..?9], min: 1)
|
||||
|> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_number)
|
||||
|
||||
float_parser =
|
||||
choice([
|
||||
float |> concat(fuzz) |> concat(unsigned_float) |> reduce(:range) |> unwrap_and_tag(:float_range),
|
||||
float |> unwrap_and_tag(:float)
|
||||
])
|
||||
|> eos()
|
||||
|
||||
defparsec :parse, float_parser
|
||||
end
|
26
lib/search/helpers.ex
Normal file
26
lib/search/helpers.ex
Normal file
|
@ -0,0 +1,26 @@
|
|||
defmodule Search.Helpers do
|
||||
# Apparently, it's too hard for the standard library to to parse a number
|
||||
# as a float if it doesn't contain a decimal point. WTF
|
||||
def to_number(term) do
|
||||
{float_val, _} = :string.to_float(term)
|
||||
{int_val, _} = :string.to_integer(term)
|
||||
|
||||
cond do
|
||||
is_float(float_val) ->
|
||||
float_val
|
||||
|
||||
is_integer(int_val) ->
|
||||
int_val
|
||||
end
|
||||
end
|
||||
|
||||
def to_int(term) do
|
||||
{int, _} = :string.to_integer(term)
|
||||
|
||||
int
|
||||
end
|
||||
|
||||
def range([center, deviation]) do
|
||||
[center - deviation, center + deviation]
|
||||
end
|
||||
end
|
23
lib/search/int_parser.ex
Normal file
23
lib/search/int_parser.ex
Normal file
|
@ -0,0 +1,23 @@
|
|||
defmodule Search.IntParser do
|
||||
import NimbleParsec
|
||||
import Search.Helpers
|
||||
|
||||
fuzz =
|
||||
string("~")
|
||||
|> ignore()
|
||||
|
||||
int =
|
||||
optional(ascii_char('-+'))
|
||||
|> ascii_string([?0..?9], min: 1)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_int)
|
||||
|
||||
int_parser =
|
||||
choice([
|
||||
int |> concat(fuzz) |> integer(min: 1) |> reduce(:range) |> unwrap_and_tag(:int_range),
|
||||
int |> unwrap_and_tag(:int)
|
||||
])
|
||||
|> eos()
|
||||
|
||||
defparsec :parse, int_parser
|
||||
end
|
138
lib/search/ip_parser.ex
Normal file
138
lib/search/ip_parser.ex
Normal file
|
@ -0,0 +1,138 @@
|
|||
defmodule Search.IpParser do
|
||||
import NimbleParsec
|
||||
|
||||
ipv4_octet =
|
||||
choice([
|
||||
ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]),
|
||||
ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]),
|
||||
ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?1..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
ipv4_address =
|
||||
times(ipv4_octet |> string("."), 3)
|
||||
|> concat(ipv4_octet)
|
||||
|
||||
ipv4_prefix =
|
||||
ascii_char('/')
|
||||
|> choice([
|
||||
ascii_char('3') |> ascii_char([?0..?2]),
|
||||
ascii_char([?1..?2]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
ipv6_hexadectet = ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
|
||||
|
||||
ipv6_ls32 =
|
||||
choice([
|
||||
ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet),
|
||||
ipv4_address
|
||||
])
|
||||
|
||||
ipv6_fragment = ipv6_hexadectet |> string(":")
|
||||
|
||||
ipv6_address =
|
||||
choice([
|
||||
times(ipv6_fragment, 6) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
|
||||
string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_ls32),
|
||||
string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 5)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 4)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet),
|
||||
string("::") |> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
ipv6_hexadectet |> string("::"),
|
||||
string("::")
|
||||
])
|
||||
|
||||
ipv6_prefix =
|
||||
ascii_char('/')
|
||||
|> choice([
|
||||
ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]),
|
||||
ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?1..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
ip =
|
||||
choice([
|
||||
ipv4_address |> optional(ipv4_prefix),
|
||||
ipv6_address |> optional(ipv6_prefix)
|
||||
])
|
||||
|> reduce({Enum, :join, []})
|
||||
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|
||||
|> unwrap_and_tag(:ip)
|
||||
|> eos()
|
||||
|
||||
defparsec :parse, ip
|
||||
end
|
101
lib/search/lexer.ex
Normal file
101
lib/search/lexer.ex
Normal file
|
@ -0,0 +1,101 @@
|
|||
defmodule Search.Lexer do
|
||||
import NimbleParsec
|
||||
import Search.Helpers
|
||||
|
||||
float =
|
||||
optional(ascii_char('-+'))
|
||||
|> ascii_string([?0..?9], min: 1)
|
||||
|> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_number)
|
||||
|
||||
l_and =
|
||||
choice([string("AND"), string("&&"), string(",")])
|
||||
|> unwrap_and_tag(:and)
|
||||
|
||||
l_or =
|
||||
choice([string("OR"), string("||")])
|
||||
|> unwrap_and_tag(:or)
|
||||
|
||||
l_not =
|
||||
choice([string("NOT"), string("!"), string("-")])
|
||||
|> unwrap_and_tag(:not)
|
||||
|
||||
lparen = string("(") |> unwrap_and_tag(:lparen)
|
||||
rparen = string(")") |> unwrap_and_tag(:rparen)
|
||||
|
||||
space =
|
||||
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|
||||
|> ignore()
|
||||
|
||||
quot = string("\"")
|
||||
backslash = string("\\")
|
||||
|
||||
boost =
|
||||
ignore(string("^"))
|
||||
|> concat(float)
|
||||
|> unwrap_and_tag(:boost)
|
||||
|
||||
stop_words =
|
||||
repeat(space)
|
||||
|> choice([
|
||||
backslash |> eos(),
|
||||
l_and,
|
||||
l_or,
|
||||
rparen,
|
||||
boost
|
||||
])
|
||||
|
||||
defcombinatorp(
|
||||
:dirty_text,
|
||||
lookahead_not(stop_words)
|
||||
|> choice([
|
||||
string("\\") |> utf8_char([]),
|
||||
string("(") |> parsec(:dirty_text) |> string(")"),
|
||||
utf8_char(not: ?(..?))
|
||||
])
|
||||
|> times(min: 1)
|
||||
)
|
||||
|
||||
text =
|
||||
parsec(:dirty_text)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:text)
|
||||
|
||||
quoted_text =
|
||||
ignore(quot)
|
||||
|> choice([
|
||||
ignore(string("\\")) |> string("\""),
|
||||
ignore(string("\\")) |> string("\\"),
|
||||
string("\\") |> utf8_char([]),
|
||||
utf8_char(not: ?")
|
||||
])
|
||||
|> repeat()
|
||||
|> ignore(quot)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:text)
|
||||
|
||||
term =
|
||||
choice([
|
||||
quoted_text,
|
||||
text
|
||||
])
|
||||
|
||||
outer =
|
||||
choice([
|
||||
l_and,
|
||||
l_or,
|
||||
l_not,
|
||||
lparen,
|
||||
rparen,
|
||||
boost,
|
||||
space,
|
||||
term
|
||||
])
|
||||
|
||||
search =
|
||||
repeat(outer)
|
||||
|> eos()
|
||||
|
||||
defparsec :lex, search
|
||||
end
|
51
lib/search/literal_parser.ex
Normal file
51
lib/search/literal_parser.ex
Normal file
|
@ -0,0 +1,51 @@
|
|||
defmodule Search.LiteralParser do
|
||||
import NimbleParsec
|
||||
|
||||
edit_distance =
|
||||
ignore(string("~"))
|
||||
|> integer(min: 1)
|
||||
|> unwrap_and_tag(:fuzz)
|
||||
|> eos()
|
||||
|
||||
stopwords =
|
||||
choice([
|
||||
string("*"),
|
||||
string("?"),
|
||||
edit_distance
|
||||
])
|
||||
|
||||
normal =
|
||||
lookahead_not(stopwords)
|
||||
|> choice([
|
||||
ignore(string("\\")) |> utf8_char([]),
|
||||
utf8_char([])
|
||||
])
|
||||
|> repeat()
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:literal)
|
||||
|> optional(edit_distance)
|
||||
|> eos()
|
||||
|
||||
# Runs of Kleene stars are coalesced.
|
||||
# Fuzzy search has no meaning in wildcard mode, so we ignore it.
|
||||
wildcard =
|
||||
lookahead_not(edit_distance)
|
||||
|> choice([
|
||||
ignore(string("\\")) |> utf8_char([]),
|
||||
string("*") |> ignore(repeat(string("*"))),
|
||||
utf8_char([])
|
||||
])
|
||||
|> repeat()
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:wildcard)
|
||||
|> ignore(optional(edit_distance))
|
||||
|> eos()
|
||||
|
||||
literal =
|
||||
choice([
|
||||
normal,
|
||||
wildcard
|
||||
])
|
||||
|
||||
defparsec :parse, literal
|
||||
end
|
6
lib/search/ngram_parser.ex
Normal file
6
lib/search/ngram_parser.ex
Normal file
|
@ -0,0 +1,6 @@
|
|||
defmodule Search.NgramParser do
|
||||
alias Search.LiteralParser
|
||||
|
||||
# Dummy stub. Used for convenient parser implementation.
|
||||
def parse(input), do: LiteralParser.parse(input)
|
||||
end
|
242
lib/search/parser.ex
Normal file
242
lib/search/parser.ex
Normal file
|
@ -0,0 +1,242 @@
|
|||
defmodule Search.Parser do
|
||||
alias Search.{
|
||||
BoolParser,
|
||||
DateParser,
|
||||
FloatParser,
|
||||
IntParser,
|
||||
IpParser,
|
||||
Lexer,
|
||||
LiteralParser,
|
||||
Parser,
|
||||
TermRangeParser
|
||||
}
|
||||
|
||||
defstruct [
|
||||
:default_field,
|
||||
bool_fields: [],
|
||||
date_fields: [],
|
||||
float_fields: [],
|
||||
int_fields: [],
|
||||
ip_fields: [],
|
||||
literal_fields: [],
|
||||
ngram_fields: [],
|
||||
custom_fields: [],
|
||||
transforms: %{},
|
||||
aliases: %{},
|
||||
__fields__: %{},
|
||||
__data__: nil
|
||||
]
|
||||
|
||||
def parser(options) do
|
||||
parser = struct(Parser, options)
|
||||
fields =
|
||||
Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++
|
||||
Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++
|
||||
Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++
|
||||
Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++
|
||||
Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++
|
||||
Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++
|
||||
Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++
|
||||
Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end)
|
||||
|
||||
%{parser | __fields__: Map.new(fields)}
|
||||
end
|
||||
|
||||
def parse(%Parser{} = parser, input, context \\ nil) do
|
||||
parser = %{parser | __data__: context}
|
||||
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input),
|
||||
{:ok, {tree, []}} <- search_top(parser, tokens)
|
||||
do
|
||||
{:ok, tree}
|
||||
else
|
||||
_ ->
|
||||
{:error, "Search parsing error."}
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Predictive LL(1) RD parser for search grammar
|
||||
#
|
||||
|
||||
defp search_top(parser, tokens), do: search_or(parser, tokens)
|
||||
|
||||
defp search_or(parser, tokens) do
|
||||
case search_and(parser, tokens) do
|
||||
{:ok, {left, [{:or, _} | r_tokens]}} ->
|
||||
{right, rest} = search_or(parser, r_tokens)
|
||||
{:ok, {%{bool: %{should: [left, right]}}, rest}}
|
||||
|
||||
value ->
|
||||
value
|
||||
end
|
||||
end
|
||||
|
||||
defp search_and(parser, tokens) do
|
||||
case search_boost(parser, tokens) do
|
||||
{:ok, {left, [{:and, _} | r_tokens]}} ->
|
||||
{right, rest} = search_or(parser, r_tokens)
|
||||
{:ok, {%{bool: %{must: [left, right]}}, rest}}
|
||||
|
||||
value ->
|
||||
value
|
||||
end
|
||||
end
|
||||
|
||||
defp search_boost(parser, tokens) do
|
||||
case search_not(parser, tokens) do
|
||||
{:ok, {child, [{:boost, value} | r_tokens]}} ->
|
||||
{:ok, {%{function_score: %{query: child, boost_factor: value}}, r_tokens}}
|
||||
|
||||
value ->
|
||||
value
|
||||
end
|
||||
end
|
||||
|
||||
defp search_not(parser, [{:not, _} | rest]) do
|
||||
case search_group(parser, rest) do
|
||||
{:ok, {child, r_tokens}} ->
|
||||
{:ok, {%{bool: %{must_not: child}}, r_tokens}}
|
||||
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
defp search_not(parser, tokens), do: search_group(parser, tokens)
|
||||
|
||||
defp search_group(parser, [{:lparen, _} | rest]) do
|
||||
case search_top(parser, rest) do
|
||||
{:ok, {child, [{:rparen, _} | r_tokens]}} ->
|
||||
{:ok, {child, r_tokens}}
|
||||
|
||||
{:ok, {_child, _tokens}} ->
|
||||
{:error, "Imbalanced parentheses."}
|
||||
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
defp search_group(_parser, [{:rparen, _} | _rest]) do
|
||||
{:error, "Imbalanced parentheses."}
|
||||
end
|
||||
|
||||
defp search_group(parser, tokens), do: search_field(parser, tokens)
|
||||
|
||||
defp search_field(parser, [{:term, value} | r_tokens]) do
|
||||
tokens = TermRangeParser.parse(value, parser.__fields__, parser.default_field)
|
||||
|
||||
case field_top(parser, tokens) do
|
||||
{:ok, {child, []}} ->
|
||||
{:ok, {child, r_tokens}}
|
||||
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Predictive LL(k) RD parser for search terms in parent grammar
|
||||
#
|
||||
|
||||
defp field_top(parser, tokens), do: field_term(parser, tokens)
|
||||
|
||||
defp field_term(parser, [custom_field: field_name, range: :eq, value: value]) do
|
||||
case parser.transforms[field_name].(parser.__data__, value) do
|
||||
{:ok, child} ->
|
||||
{:ok, {child, []}}
|
||||
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
defp field_term(parser, [{field_parser, field_name}, {:range, range}, {:value, value}]) do
|
||||
# N.B.: field_parser is an atom
|
||||
case field_parser.parse(value) do
|
||||
{:ok, extra_tokens, _1, _2, _3, _4} ->
|
||||
field_type(parser, [{field_parser, field_name}, {:range, range}] ++ extra_tokens)
|
||||
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Types which do not support ranges
|
||||
|
||||
defp field_type(parser, [{LiteralParser, field_name}, range: :eq, literal: value]),
|
||||
do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(parser, [{LiteralParser, field_name}, range: :eq, literal: value, fuzz: fuzz]),
|
||||
do: {:ok, {%{fuzzy: %{field(parser, field_name) => %{value: value, fuzziness: fuzz}}}, []}}
|
||||
|
||||
defp field_type(_parser, [{LiteralParser, _field_name}, range: :eq, wildcard: "*"]),
|
||||
do: {:ok, {%{match_all: %{}}, []}}
|
||||
|
||||
defp field_type(parser, [{LiteralParser, field_name}, range: :eq, wildcard: value]),
|
||||
do: {:ok, {%{wildcard: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
|
||||
defp field_type(parser, [{NgramParser, field_name}, range: :eq, literal: value]),
|
||||
do: {:ok, {%{match_phrase: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(parser, [{NgramParser, field_name}, range: :eq, literal: value, fuzz: _fuzz]),
|
||||
do: {:ok, {%{match_phrase: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(_parser, [{NgramParser, _field_name}, range: :eq, wildcard: "*"]),
|
||||
do: {:ok, {%{match_all: %{}}, []}}
|
||||
|
||||
defp field_type(parser, [{NgramParser, field_name}, range: :eq, wildcard: value]),
|
||||
do: {:ok, {%{wildcard: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
|
||||
defp field_type(parser, [{BoolParser, field_name}, range: :eq, bool: value]),
|
||||
do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(parser, [{IpParser, field_name}, range: :eq, ip: value]),
|
||||
do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
|
||||
# Types which do support ranges
|
||||
|
||||
defp field_type(parser, [{IntParser, field_name}, range: :eq, int: value]),
|
||||
do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(parser, [{IntParser, field_name}, range: :eq, int_range: [lower, upper]]),
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}}
|
||||
|
||||
defp field_type(parser, [{IntParser, field_name}, range: range, int: value]),
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{range => value}}}, []}}
|
||||
|
||||
defp field_type(_parser, [{IntParser, field_name}, range: _range, int_range: _value]),
|
||||
do: {:error, "multiple ranges specified for " <> field_name}
|
||||
|
||||
|
||||
defp field_type(parser, [{FloatParser, field_name}, range: :eq, float: value]),
|
||||
do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}}
|
||||
|
||||
defp field_type(parser, [{FloatParser, field_name}, range: :eq, float_range: [lower, upper]]),
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}}
|
||||
|
||||
defp field_type(parser, [{FloatParser, field_name}, range: range, float: value]),
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{range => value}}}, []}}
|
||||
|
||||
defp field_type(_parser, [{FloatParser, field_name}, range: _range, float_range: _value]),
|
||||
do: {:error, "multiple ranges specified for " <> field_name}
|
||||
|
||||
|
||||
defp field_type(parser, [{DateParser, field_name}, range: :eq, date: [lower, upper]]),
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}}
|
||||
|
||||
defp field_type(parser, [{DateParser, field_name}, range: r, date: [_lower, upper]]) when r in [:lte, :gt],
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{r => upper}}}, []}}
|
||||
|
||||
defp field_type(parser, [{DateParser, field_name}, range: r, date: [lower, _upper]]) when r in [:gte, :lt],
|
||||
do: {:ok, {%{range: %{field(parser, field_name) => %{r => lower}}}, []}}
|
||||
|
||||
|
||||
defp field(parser, field_name) do
|
||||
parser.aliases[field_name] || field_name
|
||||
end
|
||||
end
|
35
lib/search/term_range_parser.ex
Normal file
35
lib/search/term_range_parser.ex
Normal file
|
@ -0,0 +1,35 @@
|
|||
defmodule Search.TermRangeParser do
|
||||
|
||||
# Unfortunately, we can't use NimbleParsec here. It requires
|
||||
# the compiler, and we're not in a macro environment.
|
||||
|
||||
def parse(input, fields, default_field) do
|
||||
tokens =
|
||||
Enum.find_value(fields, fn {p, f} ->
|
||||
field(input, f, p)
|
||||
end)
|
||||
|
||||
tokens || [{LiteralParser, default_field}, range: :eq, value: input]
|
||||
end
|
||||
|
||||
defp field(input, field_name, field_parser) do
|
||||
field_sz = byte_size(field_name)
|
||||
|
||||
case input do
|
||||
<<^field_name::binary-size(field_sz), ":", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :eq, value: value]
|
||||
<<^field_name::binary-size(field_sz), ".eq:", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :eq, value: value]
|
||||
<<^field_name::binary-size(field_sz), ".gt:", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :gt, value: value]
|
||||
<<^field_name::binary-size(field_sz), ".gte:", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :gte, value: value]
|
||||
<<^field_name::binary-size(field_sz), ".lt:", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :lt, value: value]
|
||||
<<^field_name::binary-size(field_sz), ".lte:", value::binary>> ->
|
||||
[{field_parser, field_name}, range: :lte, value: value]
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue