mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-02-17 11:04:22 +01:00
remove old parser
This commit is contained in:
parent
c0379ec056
commit
ae8b3d2c74
6 changed files with 20 additions and 896 deletions
|
@ -1,146 +0,0 @@
|
|||
defmodule Philomena.Search.Helpers do
|
||||
import NimbleParsec
|
||||
|
||||
def to_number(term) do
|
||||
{float_val, _} = :string.to_float(term)
|
||||
{int_val, _} = :string.to_integer(term)
|
||||
|
||||
cond do
|
||||
is_float(float_val) ->
|
||||
float_val
|
||||
|
||||
is_integer(int_val) ->
|
||||
int_val
|
||||
end
|
||||
end
|
||||
|
||||
defp build_datetime(naive, tz_off, tz_hour, tz_minute) do
|
||||
# Unbelievable that there is no way to build this with integer arguments.
|
||||
|
||||
tz_hour =
|
||||
tz_hour
|
||||
|> Integer.to_string()
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
tz_minute =
|
||||
tz_minute
|
||||
|> Integer.to_string()
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}"
|
||||
|
||||
{:ok, datetime, _offset} = DateTime.from_iso8601(iso8601_string)
|
||||
|
||||
datetime
|
||||
end
|
||||
|
||||
defp timezone_bounds([]), do: ["+", 0, 0]
|
||||
defp timezone_bounds([tz_off, tz_hour]), do: [tz_off, tz_hour, 0]
|
||||
defp timezone_bounds([tz_off, tz_hour, tz_minute]), do: [tz_off, tz_hour, tz_minute]
|
||||
|
||||
defp date_bounds([year]) do
|
||||
lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 31_536_000, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 2_592_000, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: day, hour: 0, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 86400, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour]) do
|
||||
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: 0, second: 0}
|
||||
upper = NaiveDateTime.add(lower, 3600, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour, minute]) do
|
||||
lower = %NaiveDateTime{
|
||||
year: year,
|
||||
month: month,
|
||||
day: day,
|
||||
hour: hour,
|
||||
minute: minute,
|
||||
second: 0
|
||||
}
|
||||
|
||||
upper = NaiveDateTime.add(lower, 60, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
defp date_bounds([year, month, day, hour, minute, second]) do
|
||||
lower = %NaiveDateTime{
|
||||
year: year,
|
||||
month: month,
|
||||
day: day,
|
||||
hour: hour,
|
||||
minute: minute,
|
||||
second: second
|
||||
}
|
||||
|
||||
upper = NaiveDateTime.add(lower, 1, :second)
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
def absolute_datetime(opts) do
|
||||
date = Keyword.fetch!(opts, :date)
|
||||
timezone = Keyword.get(opts, :timezone, [])
|
||||
|
||||
[lower, upper] = date_bounds(date)
|
||||
[tz_off, tz_hour, tz_minute] = timezone_bounds(timezone)
|
||||
|
||||
lower = build_datetime(lower, tz_off, tz_hour, tz_minute)
|
||||
upper = build_datetime(upper, tz_off, tz_hour, tz_minute)
|
||||
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
def relative_datetime([count, scale]) do
|
||||
now = NaiveDateTime.utc_now()
|
||||
|
||||
lower = NaiveDateTime.add(now, count * -scale, :second)
|
||||
upper = NaiveDateTime.add(now, (count - 1) * -scale, :second)
|
||||
|
||||
[lower, upper]
|
||||
end
|
||||
|
||||
def full_choice(combinator \\ empty(), choices)
|
||||
|
||||
def full_choice(combinator, []) do
|
||||
combinator |> eos() |> string("<eos>")
|
||||
end
|
||||
|
||||
def full_choice(combinator, [choice]) do
|
||||
combinator |> concat(choice)
|
||||
end
|
||||
|
||||
def full_choice(combinator, choices) do
|
||||
choice(combinator, choices)
|
||||
end
|
||||
|
||||
def contains_wildcard?(value) do
|
||||
String.match?(value, ~r/(?<!\\)(?:\\\\)*[\*\?]/)
|
||||
end
|
||||
|
||||
def unescape_wildcard(value) do
|
||||
# '*' and '?' are wildcard characters in the right context;
|
||||
# don't unescape them.
|
||||
Regex.replace(~r/(?<!\\)(?:\\)*([^\\\*\?])/, value, "\\1")
|
||||
end
|
||||
|
||||
def unescape_regular(value) do
|
||||
Regex.replace(~r/(?<!\\)(?:\\)*(.)/, value, "\\1")
|
||||
end
|
||||
|
||||
def process_term(term) do
|
||||
term |> String.trim() |> String.downcase()
|
||||
end
|
||||
end
|
|
@ -1,468 +0,0 @@
|
|||
defmodule Philomena.Search.Lexer do
|
||||
defmacro deflexer(name, opts) do
|
||||
literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__)
|
||||
ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
|
||||
bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
|
||||
date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__)
|
||||
float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__)
|
||||
int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
|
||||
ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
|
||||
custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
|
||||
|
||||
quote location: :keep do
|
||||
import NimbleParsec
|
||||
import Philomena.Search.Helpers
|
||||
|
||||
l_and =
|
||||
choice([string("AND"), string("&&"), string(",")])
|
||||
|> unwrap_and_tag(:and)
|
||||
|
||||
l_or =
|
||||
choice([string("OR"), string("||")])
|
||||
|> unwrap_and_tag(:or)
|
||||
|
||||
l_not =
|
||||
choice([string("NOT"), string("!"), string("-")])
|
||||
|> unwrap_and_tag(:not)
|
||||
|
||||
lparen = string("(") |> unwrap_and_tag(:lparen)
|
||||
rparen = string(")") |> unwrap_and_tag(:rparen)
|
||||
|
||||
space =
|
||||
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|
||||
|> ignore()
|
||||
|
||||
int =
|
||||
optional(ascii_char('-+'))
|
||||
|> ascii_string([?0..?9], min: 1)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_number)
|
||||
|> unwrap_and_tag(:int)
|
||||
|> label("an integer, such as `-100' or `5'")
|
||||
|
||||
number =
|
||||
optional(ascii_char('-+'))
|
||||
|> ascii_string([?0..?9], min: 1)
|
||||
|> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> reduce(:to_number)
|
||||
|> unwrap_and_tag(:number)
|
||||
|> label("a real number, such as `-2.71828' or `10'")
|
||||
|
||||
bool =
|
||||
choice([
|
||||
string("true"),
|
||||
string("false")
|
||||
])
|
||||
|> label("a boolean, such as `false'")
|
||||
|> reduce({Jason, :decode!, []})
|
||||
|
||||
ipv4_octet =
|
||||
choice([
|
||||
ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]),
|
||||
ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]),
|
||||
ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?1..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
ipv4_address =
|
||||
times(ipv4_octet |> string("."), 3)
|
||||
|> concat(ipv4_octet)
|
||||
|
||||
ipv4_prefix =
|
||||
ascii_char('/')
|
||||
|> choice([
|
||||
ascii_char('3') |> ascii_char([?0..?2]),
|
||||
ascii_char([?1..?2]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
ipv6_hexadectet = ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
|
||||
|
||||
ipv6_ls32 =
|
||||
choice([
|
||||
ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet),
|
||||
ipv4_address
|
||||
])
|
||||
|
||||
ipv6_fragment = ipv6_hexadectet |> string(":")
|
||||
|
||||
ipv6_address =
|
||||
choice([
|
||||
times(ipv6_fragment, 6) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
|
||||
string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_fragment)
|
||||
|> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
|
||||
string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_ls32),
|
||||
string("::") |> concat(ipv6_ls32),
|
||||
times(ipv6_fragment, 5)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 4)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 3)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 2)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 1)
|
||||
|> concat(ipv6_hexadectet)
|
||||
|> string("::")
|
||||
|> concat(ipv6_hexadectet),
|
||||
ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet),
|
||||
string("::") |> concat(ipv6_hexadectet),
|
||||
times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"),
|
||||
ipv6_hexadectet |> string("::"),
|
||||
string("::")
|
||||
])
|
||||
|
||||
ipv6_prefix =
|
||||
ascii_char('/')
|
||||
|> choice([
|
||||
ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]),
|
||||
ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?1..?9]) |> ascii_char([?0..?9]),
|
||||
ascii_char([?0..?9])
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|
||||
# choice([
|
||||
# ,
|
||||
ip_address =
|
||||
ipv4_address
|
||||
|> optional(ipv4_prefix)
|
||||
# ipv6_address |> optional(ipv6_prefix)
|
||||
# ])
|
||||
|> reduce({Enum, :join, []})
|
||||
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|
||||
|> unwrap_and_tag(:ip)
|
||||
|
||||
year = integer(4)
|
||||
month = integer(2)
|
||||
day = integer(2)
|
||||
|
||||
hour = integer(2)
|
||||
minute = integer(2)
|
||||
second = integer(2)
|
||||
tz_hour = integer(2)
|
||||
tz_minute = integer(2)
|
||||
|
||||
ymd_sep = ignore(string("-"))
|
||||
hms_sep = ignore(string(":"))
|
||||
iso8601_sep = ignore(choice([string("T"), string("t"), space]))
|
||||
iso8601_tzsep = choice([string("+"), string("-")])
|
||||
zulu = ignore(choice([string("Z"), string("z")]))
|
||||
|
||||
date_part =
|
||||
year
|
||||
|> optional(
|
||||
ymd_sep
|
||||
|> concat(month)
|
||||
|> optional(
|
||||
ymd_sep
|
||||
|> concat(day)
|
||||
|> optional(
|
||||
iso8601_sep
|
||||
|> optional(
|
||||
hour
|
||||
|> optional(
|
||||
hms_sep
|
||||
|> concat(minute)
|
||||
|> optional(concat(hms_sep, second))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|> tag(:date)
|
||||
|
||||
timezone_part =
|
||||
choice([
|
||||
iso8601_tzsep
|
||||
|> concat(tz_hour)
|
||||
|> optional(
|
||||
hms_sep
|
||||
|> concat(tz_minute)
|
||||
)
|
||||
|> tag(:timezone),
|
||||
zulu
|
||||
])
|
||||
|
||||
absolute_date =
|
||||
date_part
|
||||
|> optional(timezone_part)
|
||||
|> reduce(:absolute_datetime)
|
||||
|> unwrap_and_tag(:date)
|
||||
|> label("an RFC3339 date and optional time, such as `2019-08-01'")
|
||||
|
||||
relative_date =
|
||||
integer(min: 1)
|
||||
|> ignore(concat(space, empty()))
|
||||
|> choice([
|
||||
string("second") |> optional(string("s")) |> replace(1),
|
||||
string("minute") |> optional(string("s")) |> replace(60),
|
||||
string("hour") |> optional(string("s")) |> replace(3600),
|
||||
string("day") |> optional(string("s")) |> replace(86400),
|
||||
string("week") |> optional(string("s")) |> replace(604_800),
|
||||
string("month") |> optional(string("s")) |> replace(2_592_000),
|
||||
string("year") |> optional(string("s")) |> replace(31_536_000)
|
||||
])
|
||||
|> ignore(string(" ago"))
|
||||
|> reduce(:relative_datetime)
|
||||
|> unwrap_and_tag(:date)
|
||||
|> label("a relative date, such as `3 days ago'")
|
||||
|
||||
date =
|
||||
choice([
|
||||
absolute_date,
|
||||
relative_date
|
||||
])
|
||||
|
||||
eq = choice([string(":"), string(".eq:")]) |> unwrap_and_tag(:eq)
|
||||
lt = string(".lt:") |> unwrap_and_tag(:lt)
|
||||
lte = string(".lte:") |> unwrap_and_tag(:lte)
|
||||
gt = string(".gt:") |> unwrap_and_tag(:gt)
|
||||
gte = string(".gte:") |> unwrap_and_tag(:gte)
|
||||
|
||||
range_relation =
|
||||
choice([
|
||||
eq,
|
||||
lt,
|
||||
lte,
|
||||
gt,
|
||||
gte
|
||||
])
|
||||
|
||||
boost =
|
||||
string("^")
|
||||
|> unwrap_and_tag(:boost)
|
||||
|> concat(number)
|
||||
|
||||
fuzz =
|
||||
string("~")
|
||||
|> unwrap_and_tag(:fuzz)
|
||||
|> concat(number)
|
||||
|
||||
quot = string("\"")
|
||||
|
||||
bool_value =
|
||||
full_choice(unquote(for f <- bool_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:bool_field)
|
||||
|> concat(eq)
|
||||
|> concat(bool)
|
||||
|
||||
date_value =
|
||||
full_choice(unquote(for f <- date_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:date_field)
|
||||
|> concat(range_relation)
|
||||
|> concat(date)
|
||||
|
||||
float_value =
|
||||
full_choice(unquote(for f <- float_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:float_field)
|
||||
|> concat(range_relation)
|
||||
|> concat(number)
|
||||
|
||||
int_value =
|
||||
full_choice(unquote(for f <- int_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:int_field)
|
||||
|> concat(range_relation)
|
||||
|> concat(int)
|
||||
|
||||
ip_value =
|
||||
full_choice(unquote(for f <- ip_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:ip_field)
|
||||
|> ignore(eq)
|
||||
|> concat(ip_address)
|
||||
|
||||
numeric =
|
||||
choice([
|
||||
bool_value,
|
||||
date_value,
|
||||
float_value,
|
||||
int_value,
|
||||
ip_value
|
||||
])
|
||||
|
||||
quoted_numeric = ignore(quot) |> concat(numeric) |> ignore(quot)
|
||||
|
||||
stop_words =
|
||||
repeat(space)
|
||||
|> choice([
|
||||
string("\\") |> eos(),
|
||||
string(","),
|
||||
l_and,
|
||||
l_or,
|
||||
rparen,
|
||||
fuzz,
|
||||
boost
|
||||
])
|
||||
|
||||
defcombinatorp(
|
||||
unquote(:"#{name}_text"),
|
||||
lookahead_not(stop_words)
|
||||
|> choice([
|
||||
string("\\") |> utf8_char([]),
|
||||
string("(") |> parsec(unquote(:"#{name}_text")) |> string(")"),
|
||||
utf8_char([])
|
||||
])
|
||||
|> times(min: 1)
|
||||
)
|
||||
|
||||
text =
|
||||
parsec(unquote(:"#{name}_text"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:text)
|
||||
|
||||
quoted_text =
|
||||
choice([
|
||||
ignore(string("\\")) |> string("\""),
|
||||
ignore(string("\\")) |> string("\\"),
|
||||
string("\\") |> utf8_char([]),
|
||||
utf8_char(not: ?")
|
||||
])
|
||||
|> times(min: 1)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:text)
|
||||
|
||||
literal =
|
||||
full_choice(unquote(for f <- literal_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:literal_field)
|
||||
|> concat(eq)
|
||||
|> concat(text)
|
||||
|
||||
ngram =
|
||||
full_choice(unquote(for f <- ngram_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:ngram_field)
|
||||
|> concat(eq)
|
||||
|> concat(text)
|
||||
|
||||
custom =
|
||||
full_choice(unquote(for f <- custom_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:custom_field)
|
||||
|> ignore(string(":"))
|
||||
|> concat(text)
|
||||
|
||||
quoted_literal =
|
||||
ignore(quot)
|
||||
|> full_choice(unquote(for f <- literal_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:literal_field)
|
||||
|> concat(eq)
|
||||
|> concat(quoted_text)
|
||||
|> ignore(quot)
|
||||
|
||||
quoted_ngram =
|
||||
ignore(quot)
|
||||
|> full_choice(unquote(for f <- ngram_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:ngram_field)
|
||||
|> concat(eq)
|
||||
|> concat(quoted_text)
|
||||
|> ignore(quot)
|
||||
|
||||
quoted_custom =
|
||||
ignore(quot)
|
||||
|> full_choice(unquote(for f <- custom_fields, do: [string: f]))
|
||||
|> unwrap_and_tag(:custom_field)
|
||||
|> ignore(string(":"))
|
||||
|> concat(quoted_text)
|
||||
|> ignore(quot)
|
||||
|
||||
default =
|
||||
text
|
||||
|> tag(:default)
|
||||
|
||||
quoted_default =
|
||||
ignore(quot)
|
||||
|> concat(quoted_text)
|
||||
|> ignore(quot)
|
||||
|> tag(:default)
|
||||
|
||||
term =
|
||||
choice([
|
||||
quoted_numeric,
|
||||
quoted_literal,
|
||||
quoted_ngram,
|
||||
quoted_custom,
|
||||
quoted_default,
|
||||
numeric,
|
||||
literal,
|
||||
ngram,
|
||||
custom,
|
||||
default
|
||||
])
|
||||
|
||||
outer =
|
||||
choice([
|
||||
l_and,
|
||||
l_or,
|
||||
l_not,
|
||||
lparen,
|
||||
rparen,
|
||||
boost,
|
||||
fuzz,
|
||||
space,
|
||||
term
|
||||
])
|
||||
|
||||
search =
|
||||
repeat(outer)
|
||||
|> eos()
|
||||
|
||||
defparsec(unquote(:"#{name}_lexer"), search)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,258 +0,0 @@
|
|||
defmodule Philomena.Search.Parser do
|
||||
defmacro defparser(name, opts) do
|
||||
field_transforms = Keyword.get(opts, :transforms, Macro.escape(%{}))
|
||||
field_aliases = Keyword.get(opts, :aliases, Macro.escape(%{}))
|
||||
default_field = Keyword.fetch!(opts, :default)
|
||||
|
||||
quote location: :keep do
|
||||
import Philomena.Search.Lexer
|
||||
import Philomena.Search.Helpers
|
||||
|
||||
deflexer(unquote(name), unquote(opts))
|
||||
|
||||
def unquote(:"#{name}_parser")(ctx, input) do
|
||||
with {:ok, tree, _1, _2, _3, _4} <- unquote(:"#{name}_lexer")(input) do
|
||||
unquote(:"#{name}_parse")(ctx, tree)
|
||||
else
|
||||
{:error, msg, _1, _2, _3, _4} ->
|
||||
{:error, msg}
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_parse")(ctx, tokens) do
|
||||
{tree, []} = unquote(:"#{name}_top")(ctx, tokens)
|
||||
|
||||
{:ok, tree}
|
||||
rescue
|
||||
e in ArgumentError ->
|
||||
{:error, e.message}
|
||||
|
||||
_ ->
|
||||
{:error, "Parsing error."}
|
||||
end
|
||||
|
||||
#
|
||||
# Predictive LL(k) parser for search grammar
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_top")(_ctx, []), do: {%{match_none: %{}}, []}
|
||||
|
||||
defp unquote(:"#{name}_top")(ctx, tokens), do: unquote(:"#{name}_or")(ctx, tokens)
|
||||
|
||||
#
|
||||
# Boolean OR
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_or")(ctx, tokens) do
|
||||
case unquote(:"#{name}_and")(ctx, tokens) do
|
||||
{left, [{:or, _} | r_tokens]} ->
|
||||
{right, rest} = unquote(:"#{name}_or")(ctx, r_tokens)
|
||||
{%{bool: %{should: [left, right]}}, rest}
|
||||
|
||||
{child, rest} ->
|
||||
{child, rest}
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Boolean AND
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_and")(ctx, tokens) do
|
||||
case unquote(:"#{name}_boost")(ctx, tokens) do
|
||||
{left, [{:and, _} | r_tokens]} ->
|
||||
{right, rest} = unquote(:"#{name}_and")(ctx, r_tokens)
|
||||
{%{bool: %{must: [left, right]}}, rest}
|
||||
|
||||
{child, rest} ->
|
||||
{child, rest}
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Subquery score boosting
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_boost")(ctx, tokens) do
|
||||
case unquote(:"#{name}_not")(ctx, tokens) do
|
||||
{child, [{:boost, _}, {:number, value} | r_tokens]} ->
|
||||
{%{function_score: %{query: child, boost_factor: value}}, r_tokens}
|
||||
|
||||
{child, rest} ->
|
||||
{child, rest}
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Boolean NOT
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_not")(ctx, [{:not, _} | r_tokens]) do
|
||||
{child, rest} = unquote(:"#{name}_not")(ctx, r_tokens)
|
||||
|
||||
{%{bool: %{must_not: child}}, rest}
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_not")(ctx, tokens), do: unquote(:"#{name}_group")(ctx, tokens)
|
||||
|
||||
#
|
||||
# Logical grouping
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_group")(ctx, [{:lparen, _} | rest]) do
|
||||
case unquote(:"#{name}_top")(ctx, rest) do
|
||||
{child, [{:rparen, _} | r_tokens]} ->
|
||||
{child, r_tokens}
|
||||
|
||||
_ ->
|
||||
raise ArgumentError, "Imbalanced parentheses."
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_group")(_ctx, [{:rparen, _} | _rest]),
|
||||
do: raise(ArgumentError, "Imbalanced parentheses.")
|
||||
|
||||
defp unquote(:"#{name}_group")(ctx, tokens), do: unquote(:"#{name}_fuzz")(ctx, tokens)
|
||||
|
||||
#
|
||||
# Terms and term fuzzing
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_fuzz")(ctx, tokens) do
|
||||
case tokens do
|
||||
[{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
|
||||
{%{
|
||||
range: %{
|
||||
unquote(:"#{name}_alias")(field) => %{
|
||||
gte: trunc(value - fuzz),
|
||||
lte: trunc(value + fuzz)
|
||||
}
|
||||
}
|
||||
}, r_tokens}
|
||||
|
||||
[
|
||||
{:float_field, field},
|
||||
{:eq, _},
|
||||
{:float, value},
|
||||
{:fuzz, _},
|
||||
{:number, fuzz} | r_tokens
|
||||
] ->
|
||||
{%{
|
||||
range: %{
|
||||
unquote(:"#{name}_alias")(field) => %{
|
||||
gte: trunc(value - fuzz),
|
||||
lte: trunc(value + fuzz)
|
||||
}
|
||||
}
|
||||
}, r_tokens}
|
||||
|
||||
[
|
||||
{:literal_field, field},
|
||||
{:eq, _},
|
||||
{:text, value},
|
||||
{:fuzz, _},
|
||||
{:number, fuzz} | r_tokens
|
||||
] ->
|
||||
{%{fuzzy: %{unquote(:"#{name}_alias")(field) => %{value: value, fuzziness: fuzz}}},
|
||||
r_tokens}
|
||||
|
||||
[
|
||||
{:ngram_field, field},
|
||||
{:eq, _},
|
||||
{:text, value},
|
||||
{:fuzz, _},
|
||||
{:number, fuzz} | r_tokens
|
||||
] ->
|
||||
{%{fuzzy: %{unquote(:"#{name}_alias")(field) => %{value: value, fuzziness: fuzz}}},
|
||||
r_tokens}
|
||||
|
||||
[{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
|
||||
{%{fuzzy: %{unquote(default_field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
|
||||
|
||||
_ ->
|
||||
unquote(:"#{name}_range")(ctx, tokens)
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Range queries
|
||||
#
|
||||
|
||||
defp unquote(:"#{name}_range")(ctx, tokens) do
|
||||
case tokens do
|
||||
[{:int_field, field}, {range, _}, {:int, value} | r_tokens]
|
||||
when range in [:gt, :gte, :lt, :lte] ->
|
||||
{%{range: %{unquote(:"#{name}_alias")(field) => %{range => value}}}, r_tokens}
|
||||
|
||||
[{:float_field, field}, {range, _}, {:number, value} | r_tokens]
|
||||
when range in [:gt, :gte, :lt, :lte] ->
|
||||
{%{range: %{unquote(:"#{name}_alias")(field) => %{range => value}}}, r_tokens}
|
||||
|
||||
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens]
|
||||
when range in [:gt, :gte, :lt, :lte] ->
|
||||
{%{range: %{unquote(:"#{name}_alias")(field) => %{range => lower}}}, r_tokens}
|
||||
|
||||
_ ->
|
||||
unquote(:"#{name}_custom")(ctx, tokens)
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_custom")(ctx, tokens) do
|
||||
case tokens do
|
||||
[{:custom_field, field}, {:text, value} | r_tokens] ->
|
||||
{unquote(field_transforms)[field].(ctx, value), r_tokens}
|
||||
|
||||
_ ->
|
||||
unquote(:"#{name}_term")(ctx, tokens)
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_term")(_ctx, tokens) do
|
||||
case tokens do
|
||||
[{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] ->
|
||||
{%{range: %{unquote(:"#{name}_alias")(field) => %{gte: lower, lte: higher}}},
|
||||
r_tokens}
|
||||
|
||||
[{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
|
||||
value = process_term(value)
|
||||
|
||||
if contains_wildcard?(value) do
|
||||
{%{wildcard: %{unquote(:"#{name}_alias")(field) => unescape_wildcard(value)}},
|
||||
r_tokens}
|
||||
else
|
||||
{%{match: %{unquote(:"#{name}_alias")(field) => unescape_regular(value)}}, r_tokens}
|
||||
end
|
||||
|
||||
[{:literal_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
|
||||
value = process_term(value)
|
||||
|
||||
if contains_wildcard?(value) do
|
||||
{%{wildcard: %{unquote(:"#{name}_alias")(field) => unescape_wildcard(value)}},
|
||||
r_tokens}
|
||||
else
|
||||
{%{term: %{unquote(:"#{name}_alias")(field) => unescape_regular(value)}}, r_tokens}
|
||||
end
|
||||
|
||||
[{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] ->
|
||||
{%{term: %{unquote(:"#{name}_alias")(field) => value}}, r_tokens}
|
||||
|
||||
[{:default, [text: value]} | r_tokens] ->
|
||||
value = process_term(value)
|
||||
|
||||
if contains_wildcard?(value) do
|
||||
{%{wildcard: %{unquote(default_field) => unescape_wildcard(value)}}, r_tokens}
|
||||
else
|
||||
{%{term: %{unquote(default_field) => unescape_regular(value)}}, r_tokens}
|
||||
end
|
||||
|
||||
_ ->
|
||||
raise ArgumentError, "Expected a term"
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(:"#{name}_alias")(field) do
|
||||
unquote(field_aliases)[field] || field
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,9 +0,0 @@
|
|||
defmodule Philomena.Search.String do
|
||||
def normalize(str) do
|
||||
str
|
||||
|> String.replace("\r", "")
|
||||
|> String.split("\n", trim: true)
|
||||
|> Enum.map(fn s -> "(#{s})" end)
|
||||
|> Enum.join(" || ")
|
||||
end
|
||||
end
|
|
@ -1,22 +1,29 @@
|
|||
defmodule Philomena.Tags.Query do
|
||||
import Philomena.Search.Parser
|
||||
alias Search.Parser
|
||||
|
||||
defparser("tag",
|
||||
int: ~W(id images),
|
||||
literal: ~W(slug name name_in_namespace namespace implies alias_of implied_by aliases category analyzed_name),
|
||||
boolean: ~W(aliased),
|
||||
ngram: ~W(description short_description),
|
||||
aliases: %{
|
||||
"implies" => "implied_tags",
|
||||
"implied_by" => "implied_by_tags",
|
||||
"alias_of" => "aliased_tag"
|
||||
},
|
||||
default: "analyzed_name"
|
||||
int_fields = ~W(id images)
|
||||
literal_fields = ~W(slug name name_in_namespace namespace implies alias_of implied_by aliases category analyzed_name)
|
||||
bool_fields = ~W(aliased)
|
||||
ngram_fields = ~W(description short_description)
|
||||
default_field = "analyzed_name"
|
||||
aliases = %{
|
||||
"implies" => "implied_tags",
|
||||
"implied_by" => "implied_by_tags",
|
||||
"alias_of" => "aliased_tag"
|
||||
}
|
||||
|
||||
@tag_parser Parser.parser(
|
||||
int_fields: int_fields,
|
||||
literal_fields: literal_fields,
|
||||
bool_fields: bool_fields,
|
||||
ngram_fields: ngram_fields,
|
||||
default_field: default_field,
|
||||
aliases: aliases
|
||||
)
|
||||
|
||||
def compile(query_string) do
|
||||
query_string = query_string || ""
|
||||
|
||||
tag_parser(%{}, query_string)
|
||||
Parser.parse(@tag_parser, query_string)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
defmodule PhilomenaWeb.LayoutView do
|
||||
use PhilomenaWeb, :view
|
||||
|
||||
alias Philomena.Search
|
||||
|
||||
def render_time(conn) do
|
||||
(Time.diff(Time.utc_now(), conn.assigns[:start_time], :microsecond) / 1000.0)
|
||||
|> Float.round(3)
|
||||
|
|
Loading…
Reference in a new issue