generally working field parsing

This commit is contained in:
Liam P. White 2019-08-25 18:53:13 -04:00
parent c66fe0ca39
commit 2f350791e6
3 changed files with 133 additions and 38 deletions

View file

@ -1,8 +1,8 @@
defmodule Philomena.Images.Query do defmodule Philomena.Images.Query do
use Philomena.Search.Lexer, use Philomena.Search.Lexer,
int_fields: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), int: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float_fields: ~W(aspect_ratio wilson_score), float: ~W(aspect_ratio wilson_score),
date_fields: ~W(created_at updated_at first_seen_at), date: ~W(created_at updated_at first_seen_at),
literal_fields: ~W(namespaced_tags.name faved_by orig_sha512_hash sha512_hash uploader source_url original_format), literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram_fields: ~W(description) ngram: ~W(description)
end end

View file

@ -14,10 +14,92 @@ defmodule Philomena.Search.Helpers do
end end
end end
defp build_datetime(naive, tz_off, tz_hour, tz_minute) do
# Unbelievable that there is no way to build this with integer arguments.
tz_hour =
tz_hour
|> Integer.to_string
|> String.pad_leading(2, "0")
tz_minute =
tz_minute
|> Integer.to_string
|> String.pad_leading(2, "0")
iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}"
{:ok, datetime, _offset} = DateTime.from_iso8601(iso8601_string)
datetime
end
defp timezone_bounds([]), do: ["+", 0, 0]
defp timezone_bounds([tz_off, tz_hour]), do: [tz_off, tz_hour, 0]
defp timezone_bounds([tz_off, tz_hour, tz_minute]), do: [tz_off, tz_hour, tz_minute]
defp date_bounds([year]) do
lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 31536000, :second)
[lower, upper]
end
defp date_bounds([year, month]) do
lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 2592000, :second)
[lower, upper]
end
defp date_bounds([year, month, day]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 86400, :second)
[lower, upper]
end
defp date_bounds([year, month, day, hour]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 3600, :second)
[lower, upper]
end
defp date_bounds([year, month, day, hour, minute]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: 0}
upper = NaiveDateTime.add(lower, 60, :second)
[lower, upper]
end
defp date_bounds([year, month, day, hour, minute, second]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: second}
upper = NaiveDateTime.add(lower, 1, :second)
[lower, upper]
end
def absolute_datetime(opts) do
date = Keyword.fetch!(opts, :date)
timezone = Keyword.get(opts, :timezone, [])
[lower, upper] = date_bounds(date)
[tz_off, tz_hour, tz_minute] = timezone_bounds(timezone)
lower = build_datetime(lower, tz_off, tz_hour, tz_minute)
upper = build_datetime(upper, tz_off, tz_hour, tz_minute)
[lower, upper]
end
def relative_datetime([count, scale]) do
now = NaiveDateTime.utc_now()
lower = NaiveDateTime.add(now, count * -scale, :second)
upper = NaiveDateTime.add(now, (count - 1) * -scale, :second)
[lower, upper]
end
def full_choice(combinator \\ empty(), choices) def full_choice(combinator \\ empty(), choices)
def full_choice(combinator, []) do def full_choice(combinator, []) do
combinator |> eos() combinator |> eos() |> string("<eos>")
end end
def full_choice(combinator, [choice]) do def full_choice(combinator, [choice]) do

View file

@ -1,13 +1,13 @@
defmodule Philomena.Search.Lexer do defmodule Philomena.Search.Lexer do
defmacro __using__(opts) do defmacro __using__(opts) do
literal_fields = Keyword.get(opts, :literal, []) literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__)
ngram_fields = Keyword.get(opts, :ngram, []) ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
bool_fields = Keyword.get(opts, :bool, []) bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
date_fields = Keyword.get(opts, :date, []) date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__)
float_fields = Keyword.get(opts, :float, []) float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__)
int_fields = Keyword.get(opts, :int, []) int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
ip_fields = Keyword.get(opts, :ip, []) ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
custom_fields = Keyword.get(opts, :custom, []) custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
quote location: :keep do quote location: :keep do
import NimbleParsec import NimbleParsec
@ -33,7 +33,12 @@ defmodule Philomena.Search.Lexer do
|> ignore() |> ignore()
int = int =
integer(min: 1) optional(ascii_char('-+'))
|> ascii_char([?0..?9])
|> times(min: 1)
|> reduce({List, :to_string, []})
|> reduce(:to_number)
|> unwrap_and_tag(:int)
|> label("an integer, such as `-100' or `5'") |> label("an integer, such as `-100' or `5'")
number = number =
@ -41,8 +46,10 @@ defmodule Philomena.Search.Lexer do
|> ascii_char([?0..?9]) |> ascii_char([?0..?9])
|> times(min: 1) |> times(min: 1)
|> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1)) |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1))
|> label("a real number, such as `-2.71828' or `10'") |> reduce({List, :to_string, []})
|> reduce(:to_number) |> reduce(:to_number)
|> unwrap_and_tag(:number)
|> label("a real number, such as `-2.71828' or `10'")
bool = bool =
choice([ choice([
@ -167,11 +174,7 @@ defmodule Philomena.Search.Lexer do
ymd_sep = ignore(string("-")) ymd_sep = ignore(string("-"))
hms_sep = ignore(string(":")) hms_sep = ignore(string(":"))
iso8601_sep = ignore(choice([string("T"), string("t"), space])) iso8601_sep = ignore(choice([string("T"), string("t"), space]))
iso8601_tzsep = iso8601_tzsep = choice([string("+"), string("-")])
choice([
string("+") |> replace(1),
string("-") |> replace(-1)
])
zulu = ignore(choice([string("Z"), string("z")])) zulu = ignore(choice([string("Z"), string("z")]))
date_part = date_part =
@ -197,7 +200,6 @@ defmodule Philomena.Search.Lexer do
) )
) )
) )
|> label("an RFC3339 date and optional time, such as `2019-08-01'")
|> tag(:date) |> tag(:date)
timezone_part = timezone_part =
@ -215,7 +217,9 @@ defmodule Philomena.Search.Lexer do
absolute_date = absolute_date =
date_part date_part
|> optional(timezone_part) |> optional(timezone_part)
|> tag(:absolute_date) |> reduce(:absolute_datetime)
|> unwrap_and_tag(:date)
|> label("an RFC3339 date and optional time, such as `2019-08-01'")
relative_date = relative_date =
integer(min: 1) integer(min: 1)
@ -226,12 +230,13 @@ defmodule Philomena.Search.Lexer do
string("hour") |> optional(string("s")) |> replace(3600), string("hour") |> optional(string("s")) |> replace(3600),
string("day") |> optional(string("s")) |> replace(86400), string("day") |> optional(string("s")) |> replace(86400),
string("week") |> optional(string("s")) |> replace(604800), string("week") |> optional(string("s")) |> replace(604800),
string("month") |> optional(string("s")) |> replace(2629746), string("month") |> optional(string("s")) |> replace(2592000),
string("year") |> optional(string("s")) |> replace(31556952) string("year") |> optional(string("s")) |> replace(31536000)
]) ])
|> ignore(string(" ago")) |> ignore(string(" ago"))
|> reduce(:relative_datetime)
|> unwrap_and_tag(:date)
|> label("a relative date, such as `3 days ago'") |> label("a relative date, such as `3 days ago'")
|> tag(:relative_date)
date = date =
choice([ choice([
@ -261,27 +266,32 @@ defmodule Philomena.Search.Lexer do
bool_value = bool_value =
full_choice(unquote(for f <- bool_fields, do: [string: f])) full_choice(unquote(for f <- bool_fields, do: [string: f]))
|> unwrap_and_tag(:bool_field)
|> concat(eq) |> concat(eq)
|> concat(bool) |> concat(bool)
date_value = date_value =
full_choice(unquote(for f <- date_fields, do: [string: f])) full_choice(unquote(for f <- date_fields, do: [string: f]))
|> unwrap_and_tag(:date_field)
|> concat(range_relation) |> concat(range_relation)
|> concat(date) |> concat(date)
float_value = float_value =
full_choice(unquote(for f <- float_fields, do: [string: f])) full_choice(unquote(for f <- float_fields, do: [string: f]))
|> unwrap_and_tag(:float_field)
|> concat(range_relation) |> concat(range_relation)
|> concat(number) |> concat(number)
int_value = int_value =
full_choice(unquote(for f <- int_fields, do: [string: f])) full_choice(unquote(for f <- int_fields, do: [string: f]))
|> unwrap_and_tag(:int_field)
|> concat(range_relation) |> concat(range_relation)
|> concat(int) |> concat(int)
ip_value = ip_value =
full_choice(unquote(for f <- ip_fields, do: [string: f])) full_choice(unquote(for f <- ip_fields, do: [string: f]))
|> concat(eq) |> unwrap_and_tag(:ip_field)
|> ignore(eq)
|> concat(ip_address) |> concat(ip_address)
numeric = numeric =
@ -302,7 +312,6 @@ defmodule Philomena.Search.Lexer do
string(","), string(","),
concat(space, l_and), concat(space, l_and),
concat(space, l_or), concat(space, l_or),
concat(space, l_not),
rparen, rparen,
fuzz, fuzz,
boost boost
@ -317,11 +326,12 @@ defmodule Philomena.Search.Lexer do
utf8_char([]) utf8_char([])
]) ])
|> times(min: 1) |> times(min: 1)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
) )
text = parsec(:text) text =
parsec(:text)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
quoted_text = quoted_text =
choice([ choice([
@ -336,51 +346,54 @@ defmodule Philomena.Search.Lexer do
literal = literal =
full_choice(unquote(for f <- literal_fields, do: [string: f])) full_choice(unquote(for f <- literal_fields, do: [string: f]))
|> unwrap_and_tag(:literal_field)
|> ignore(eq) |> ignore(eq)
|> concat(text) |> concat(text)
|> tag(:literal)
ngram = ngram =
full_choice(unquote(for f <- ngram_fields, do: [string: f])) full_choice(unquote(for f <- ngram_fields, do: [string: f]))
|> unwrap_and_tag(:ngram_field)
|> ignore(eq) |> ignore(eq)
|> concat(text) |> concat(text)
|> tag(:ngram)
custom = custom =
full_choice(unquote(for f <- custom_fields, do: [string: f])) full_choice(unquote(for f <- custom_fields, do: [string: f]))
|> unwrap_and_tag(:custom_field)
|> ignore(string(":")) |> ignore(string(":"))
|> concat(text) |> concat(text)
quoted_literal = quoted_literal =
ignore(quot) ignore(quot)
|> full_choice(unquote(for f <- literal_fields, do: [string: f])) |> full_choice(unquote(for f <- literal_fields, do: [string: f]))
|> unwrap_and_tag(:literal_field)
|> ignore(eq) |> ignore(eq)
|> concat(quoted_text) |> concat(quoted_text)
|> ignore(quot) |> ignore(quot)
|> tag(:literal)
quoted_ngram = quoted_ngram =
ignore(quot) ignore(quot)
|> full_choice(unquote(for f <- ngram_fields, do: [string: f])) |> full_choice(unquote(for f <- ngram_fields, do: [string: f]))
|> unwrap_and_tag(:ngram_field)
|> ignore(eq) |> ignore(eq)
|> concat(quoted_text) |> concat(quoted_text)
|> ignore(quot) |> ignore(quot)
|> tag(:ngram)
quoted_custom = quoted_custom =
ignore(quot) ignore(quot)
|> full_choice(unquote(for f <- custom_fields, do: [string: f])) |> full_choice(unquote(for f <- custom_fields, do: [string: f]))
|> unwrap_and_tag(:custom_field)
|> ignore(string(":")) |> ignore(string(":"))
|> concat(quoted_text) |> concat(quoted_text)
|> ignore(quot) |> ignore(quot)
|> tag(:custom)
default = default =
text text
|> tag(:default) |> tag(:default)
quoted_default = quoted_default =
quoted_text ignore(quot)
|> concat(quoted_text)
|> ignore(quot)
|> tag(:default) |> tag(:default)
term = term =
@ -414,7 +427,7 @@ defmodule Philomena.Search.Lexer do
times(outer, min: 1) times(outer, min: 1)
|> eos() |> eos()
defparsec(:search, search) defparsec :search, search
end end
end end
end end