add macro api

This commit is contained in:
Liam P. White 2019-08-24 11:35:30 -04:00
parent 1ad4d1c574
commit c66fe0ca39
3 changed files with 399 additions and 253 deletions

View file

@ -0,0 +1,8 @@
defmodule Philomena.Images.Query do
use Philomena.Search.Lexer,
int_fields: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float_fields: ~W(aspect_ratio wilson_score),
date_fields: ~W(created_at updated_at first_seen_at),
literal_fields: ~W(namespaced_tags.name faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram_fields: ~W(description)
end

View file

@ -13,4 +13,18 @@ defmodule Philomena.Search.Helpers do
int_val int_val
end end
end end
def full_choice(combinator \\ empty(), choices)
def full_choice(combinator, []) do
combinator |> eos()
end
def full_choice(combinator, [choice]) do
combinator |> concat(choice)
end
def full_choice(combinator, choices) do
choice(combinator, choices)
end
end end

View file

@ -1,296 +1,420 @@
defmodule Philomena.Search.Lexer do defmodule Philomena.Search.Lexer do
import NimbleParsec defmacro __using__(opts) do
import Philomena.Search.Helpers literal_fields = Keyword.get(opts, :literal, [])
ngram_fields = Keyword.get(opts, :ngram, [])
bool_fields = Keyword.get(opts, :bool, [])
date_fields = Keyword.get(opts, :date, [])
float_fields = Keyword.get(opts, :float, [])
int_fields = Keyword.get(opts, :int, [])
ip_fields = Keyword.get(opts, :ip, [])
custom_fields = Keyword.get(opts, :custom, [])
l_and = quote location: :keep do
choice([string("AND"), string("&&"), string(",")]) import NimbleParsec
|> unwrap_and_tag(:and) import Philomena.Search.Helpers
l_or = l_and =
choice([string("OR"), string("||")]) choice([string("AND"), string("&&"), string(",")])
|> unwrap_and_tag(:or) |> unwrap_and_tag(:and)
l_not = l_or =
choice([string("NOT"), string("!"), string("-")]) choice([string("OR"), string("||")])
|> unwrap_and_tag(:not) |> unwrap_and_tag(:or)
lparen = string("(") |> unwrap_and_tag(:lparen) l_not =
rparen = string(")") |> unwrap_and_tag(:rparen) choice([string("NOT"), string("!"), string("-")])
|> unwrap_and_tag(:not)
space = lparen = string("(") |> unwrap_and_tag(:lparen)
choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) rparen = string(")") |> unwrap_and_tag(:rparen)
|> ignore()
int = space =
integer(min: 1) choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")])
|> label("an integer, such as `-100' or `5'") |> ignore()
number = int =
optional(ascii_char('-+')) integer(min: 1)
|> ascii_char([?0..?9]) |> label("an integer, such as `-100' or `5'")
|> times(min: 1)
|> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1))
|> label("a real number, such as `-2.71828' or `10'")
|> reduce(:to_number)
bool = number =
choice([ optional(ascii_char('-+'))
string("true"), |> ascii_char([?0..?9])
string("false") |> times(min: 1)
]) |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1))
|> label("a boolean, such as `false'") |> label("a real number, such as `-2.71828' or `10'")
|> reduce({Jason, :decode!, []}) |> reduce(:to_number)
ipv4_octet = bool =
choice([ choice([
ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]), string("true"),
ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]), string("false")
ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]), ])
ascii_char([?1..?9]) |> ascii_char([?0..?9]), |> label("a boolean, such as `false'")
ascii_char([?0..?9]) |> reduce({Jason, :decode!, []})
])
|> reduce({List, :to_string, []})
ipv4_address = ipv4_octet =
times(ipv4_octet |> string("."), 3) choice([
|> concat(ipv4_octet) ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]),
ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]),
ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]),
ascii_char([?1..?9]) |> ascii_char([?0..?9]),
ascii_char([?0..?9])
])
|> reduce({List, :to_string, []})
ipv4_prefix = ipv4_address =
ascii_char('/') times(ipv4_octet |> string("."), 3)
|> choice([ |> concat(ipv4_octet)
ascii_char('3') |> ascii_char([?0..?2]),
ascii_char([?1..?2]) |> ascii_char([?0..?9]),
ascii_char([?0..?9])
])
|> reduce({List, :to_string, []})
ipv6_hexadectet = ipv4_prefix =
ascii_string('0123456789abcdefABCDEF', min: 1, max: 4) ascii_char('/')
|> choice([
ascii_char('3') |> ascii_char([?0..?2]),
ascii_char([?1..?2]) |> ascii_char([?0..?9]),
ascii_char([?0..?9])
])
|> reduce({List, :to_string, []})
ipv6_ls32 = ipv6_hexadectet =
choice([ ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet),
ipv4_address
])
ipv6_fragment = ipv6_ls32 =
ipv6_hexadectet |> string(":") choice([
ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet),
ipv4_address
])
ipv6_address = ipv6_fragment =
choice([ ipv6_hexadectet |> string(":")
times(ipv6_fragment, 6) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), ipv6_address =
string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), choice([
times(ipv6_fragment, 6) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_ls32), string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), ipv6_hexadectet |> string("::") |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet), string("::") |> concat(ipv6_ls32),
string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"), ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet),
ipv6_hexadectet |> string("::"), string("::") |> concat(ipv6_hexadectet),
string("::")
])
ipv6_prefix = times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"),
ascii_char('/') times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"),
|> choice([ times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"),
ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]), times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"),
ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"),
ascii_char([?1..?9]) |> ascii_char([?0..?9]), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"),
ascii_char([?0..?9]) ipv6_hexadectet |> string("::"),
]) string("::")
|> reduce({List, :to_string, []}) ])
ip_address = ipv6_prefix =
choice([ ascii_char('/')
ipv4_address |> optional(ipv4_prefix), |> choice([
ipv6_address |> optional(ipv6_prefix) ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]),
]) ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]),
|> reduce({Enum, :join, []}) ascii_char([?1..?9]) |> ascii_char([?0..?9]),
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix") ascii_char([?0..?9])
|> unwrap_and_tag(:ip) ])
|> reduce({List, :to_string, []})
year = integer(4) ip_address =
month = integer(2) choice([
day = integer(2) ipv4_address |> optional(ipv4_prefix),
ipv6_address |> optional(ipv6_prefix)
])
|> reduce({Enum, :join, []})
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|> unwrap_and_tag(:ip)
hour = integer(2) year = integer(4)
minute = integer(2) month = integer(2)
second = integer(2) day = integer(2)
tz_hour = integer(2)
tz_minute = integer(2)
ymd_sep = ignore(string("-")) hour = integer(2)
hms_sep = ignore(string(":")) minute = integer(2)
iso8601_sep = ignore(choice([string("T"), string("t"), space])) second = integer(2)
iso8601_tzsep = tz_hour = integer(2)
choice([ tz_minute = integer(2)
string("+") |> replace(1),
string("-") |> replace(-1)
])
zulu = ignore(choice([string("Z"), string("z")]))
date_part = ymd_sep = ignore(string("-"))
year hms_sep = ignore(string(":"))
|> optional( iso8601_sep = ignore(choice([string("T"), string("t"), space]))
ymd_sep iso8601_tzsep =
|> concat(month) choice([
|> optional( string("+") |> replace(1),
ymd_sep string("-") |> replace(-1)
|> concat(day) ])
zulu = ignore(choice([string("Z"), string("z")]))
date_part =
year
|> optional( |> optional(
iso8601_sep ymd_sep
|> concat(month)
|> optional( |> optional(
hour ymd_sep
|> concat(day)
|> optional( |> optional(
hms_sep iso8601_sep
|> concat(minute)
|> optional( |> optional(
concat(hms_sep, second) hour
|> optional(
hms_sep
|> concat(minute)
|> optional(
concat(hms_sep, second)
)
)
) )
) )
) )
) )
|> label("an RFC3339 date and optional time, such as `2019-08-01'")
|> tag(:date)
timezone_part =
choice([
iso8601_tzsep
|> concat(tz_hour)
|> optional(
hms_sep
|> concat(tz_minute)
)
|> tag(:timezone),
zulu
])
absolute_date =
date_part
|> optional(timezone_part)
|> tag(:absolute_date)
relative_date =
integer(min: 1)
|> ignore(concat(space, empty()))
|> choice([
string("second") |> optional(string("s")) |> replace(1),
string("minute") |> optional(string("s")) |> replace(60),
string("hour") |> optional(string("s")) |> replace(3600),
string("day") |> optional(string("s")) |> replace(86400),
string("week") |> optional(string("s")) |> replace(604800),
string("month") |> optional(string("s")) |> replace(2629746),
string("year") |> optional(string("s")) |> replace(31556952)
])
|> ignore(string(" ago"))
|> label("a relative date, such as `3 days ago'")
|> tag(:relative_date)
date =
choice([
absolute_date,
relative_date
])
eq = choice([string(":"), string(".eq:")]) |> unwrap_and_tag(:eq)
lt = string(".lt:") |> unwrap_and_tag(:lt)
lte = string(".lte:") |> unwrap_and_tag(:lte)
gt = string(".gt:") |> unwrap_and_tag(:gt)
gte = string(".gte:") |> unwrap_and_tag(:gte)
range_relation =
choice([
eq,
lt,
lte,
gt,
gte
])
boost = ignore(string("^")) |> unwrap_and_tag(number, :boost)
fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz)
quot = string("\"")
bool_value =
full_choice(unquote(for f <- bool_fields, do: [string: f]))
|> concat(eq)
|> concat(bool)
date_value =
full_choice(unquote(for f <- date_fields, do: [string: f]))
|> concat(range_relation)
|> concat(date)
float_value =
full_choice(unquote(for f <- float_fields, do: [string: f]))
|> concat(range_relation)
|> concat(number)
int_value =
full_choice(unquote(for f <- int_fields, do: [string: f]))
|> concat(range_relation)
|> concat(int)
ip_value =
full_choice(unquote(for f <- ip_fields, do: [string: f]))
|> concat(eq)
|> concat(ip_address)
numeric =
choice([
bool_value,
date_value,
float_value,
int_value,
ip_value
])
quoted_numeric =
ignore(quot) |> concat(numeric) |> ignore(quot)
stop_words =
choice([
string("\\") |> eos(),
string(","),
concat(space, l_and),
concat(space, l_or),
concat(space, l_not),
rparen,
fuzz,
boost
])
defcombinatorp(
:text,
lookahead_not(stop_words)
|> choice([
string("\\") |> utf8_char([]),
string("(") |> parsec(:text) |> string(")"),
utf8_char([])
])
|> times(min: 1)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
) )
)
|> label("an RFC3339 date and optional time, such as `2019-08-01'")
|> tag(:date)
timezone_part = text = parsec(:text)
choice([
iso8601_tzsep
|> concat(tz_hour)
|> optional(
hms_sep
|> concat(tz_minute)
)
|> tag(:timezone),
zulu
])
absolute_date = quoted_text =
date_part choice([
|> optional(timezone_part) ignore(string("\\")) |> string("\""),
|> tag(:absolute_date) ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]),
utf8_char(not: ?")
])
|> times(min: 1)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
relative_date = literal =
integer(min: 1) full_choice(unquote(for f <- literal_fields, do: [string: f]))
|> ignore(concat(space, empty())) |> ignore(eq)
|> choice([ |> concat(text)
string("second") |> optional(string("s")) |> replace(1), |> tag(:literal)
string("minute") |> optional(string("s")) |> replace(60),
string("hour") |> optional(string("s")) |> replace(3600),
string("day") |> optional(string("s")) |> replace(86400),
string("week") |> optional(string("s")) |> replace(604800),
string("month") |> optional(string("s")) |> replace(2629746),
string("year") |> optional(string("s")) |> replace(31556952)
])
|> ignore(string(" ago"))
|> label("a relative date, such as `3 days ago'")
|> tag(:relative_date)
date = ngram =
choice([ full_choice(unquote(for f <- ngram_fields, do: [string: f]))
absolute_date, |> ignore(eq)
relative_date |> concat(text)
]) |> tag(:ngram)
boost = ignore(string("^")) |> unwrap_and_tag(number, :boost) custom =
fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz) full_choice(unquote(for f <- custom_fields, do: [string: f]))
|> ignore(string(":"))
|> concat(text)
quot = string("\"") quoted_literal =
ignore(quot)
|> full_choice(unquote(for f <- literal_fields, do: [string: f]))
|> ignore(eq)
|> concat(quoted_text)
|> ignore(quot)
|> tag(:literal)
quoted_term = quoted_ngram =
ignore(quot) ignore(quot)
|> choice([ |> full_choice(unquote(for f <- ngram_fields, do: [string: f]))
ignore(string("\\")) |> string("\""), |> ignore(eq)
ignore(string("\\")) |> string("\\"), |> concat(quoted_text)
string("\\") |> utf8_char([]), |> ignore(quot)
utf8_char(not: ?") |> tag(:ngram)
])
|> times(min: 1)
|> ignore(quot)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:term)
stop_words = quoted_custom =
choice([ ignore(quot)
string("\\") |> eos(), |> full_choice(unquote(for f <- custom_fields, do: [string: f]))
string(","), |> ignore(string(":"))
concat(space, l_and), |> concat(quoted_text)
concat(space, l_or), |> ignore(quot)
concat(space, l_not), |> tag(:custom)
rparen,
fuzz,
boost
])
defcombinatorp( default =
:simple_term, text
lookahead_not(stop_words) |> tag(:default)
|> choice([
string("\\") |> utf8_char([]),
string("(") |> parsec(:simple_term) |> string(")"),
utf8_char([])
])
|> times(min: 1)
)
unquoted_term = quoted_default =
parsec(:simple_term) quoted_text
|> reduce({List, :to_string, []}) |> tag(:default)
|> unwrap_and_tag(:term)
outer = term =
choice([ choice([
l_and, quoted_numeric,
l_or, quoted_literal,
l_not, quoted_ngram,
lparen, quoted_custom,
rparen, quoted_default,
boost, numeric,
fuzz, literal,
space, ngram,
quoted_term, custom,
unquoted_term default
]) ])
search = outer =
times(outer, min: 1) choice([
|> eos() l_and,
l_or,
l_not,
lparen,
rparen,
boost,
fuzz,
space,
term
])
defparsec(:search, search) search =
times(outer, min: 1)
|> eos()
defparsec(:search, search)
end
end
end end