diff --git a/lib/philomena/images/query.ex b/lib/philomena/images/query.ex new file mode 100644 index 00000000..c12354da --- /dev/null +++ b/lib/philomena/images/query.ex @@ -0,0 +1,8 @@ +defmodule Philomena.Images.Query do + use Philomena.Search.Lexer, + int_fields: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), + float_fields: ~W(aspect_ratio wilson_score), + date_fields: ~W(created_at updated_at first_seen_at), + literal_fields: ~W(namespaced_tags.name faved_by orig_sha512_hash sha512_hash uploader source_url original_format), + ngram_fields: ~W(description) +end \ No newline at end of file diff --git a/lib/philomena/search/helpers.ex b/lib/philomena/search/helpers.ex index 578420af..4d8a3041 100644 --- a/lib/philomena/search/helpers.ex +++ b/lib/philomena/search/helpers.ex @@ -13,4 +13,18 @@ defmodule Philomena.Search.Helpers do int_val end end + + def full_choice(combinator \\ empty(), choices) + + def full_choice(combinator, []) do + combinator |> eos() + end + + def full_choice(combinator, [choice]) do + combinator |> concat(choice) + end + + def full_choice(combinator, choices) do + choice(combinator, choices) + end end \ No newline at end of file diff --git a/lib/philomena/search/lexer.ex b/lib/philomena/search/lexer.ex index 9ba7b63b..c751f4ce 100644 --- a/lib/philomena/search/lexer.ex +++ b/lib/philomena/search/lexer.ex @@ -1,296 +1,420 @@ defmodule Philomena.Search.Lexer do - import NimbleParsec - import Philomena.Search.Helpers + defmacro __using__(opts) do + literal_fields = Keyword.get(opts, :literal, []) + ngram_fields = Keyword.get(opts, :ngram, []) + bool_fields = Keyword.get(opts, :bool, []) + date_fields = Keyword.get(opts, :date, []) + float_fields = Keyword.get(opts, :float, []) + int_fields = Keyword.get(opts, :int, []) + ip_fields = Keyword.get(opts, :ip, []) + custom_fields = Keyword.get(opts, :custom, []) - l_and = - choice([string("AND"), string("&&"), string(",")]) - |> unwrap_and_tag(:and) + quote location: :keep do + import NimbleParsec + import Philomena.Search.Helpers - l_or = - choice([string("OR"), string("||")]) - |> unwrap_and_tag(:or) + l_and = + choice([string("AND"), string("&&"), string(",")]) + |> unwrap_and_tag(:and) - l_not = - choice([string("NOT"), string("!"), string("-")]) - |> unwrap_and_tag(:not) + l_or = + choice([string("OR"), string("||")]) + |> unwrap_and_tag(:or) - lparen = string("(") |> unwrap_and_tag(:lparen) - rparen = string(")") |> unwrap_and_tag(:rparen) + l_not = + choice([string("NOT"), string("!"), string("-")]) + |> unwrap_and_tag(:not) - space = - choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) - |> ignore() + lparen = string("(") |> unwrap_and_tag(:lparen) + rparen = string(")") |> unwrap_and_tag(:rparen) - int = - integer(min: 1) - |> label("an integer, such as `-100' or `5'") + space = + choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) + |> ignore() - number = - optional(ascii_char('-+')) - |> ascii_char([?0..?9]) - |> times(min: 1) - |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1)) - |> label("a real number, such as `-2.71828' or `10'") - |> reduce(:to_number) + int = + integer(min: 1) + |> label("an integer, such as `-100' or `5'") - bool = - choice([ - string("true"), - string("false") - ]) - |> label("a boolean, such as `false'") - |> reduce({Jason, :decode!, []}) + number = + optional(ascii_char('-+')) + |> ascii_char([?0..?9]) + |> times(min: 1) + |> optional(ascii_char('.') |> ascii_char([?0..?9]) |> times(min: 1)) + |> label("a real number, such as `-2.71828' or `10'") + |> reduce(:to_number) - ipv4_octet = - choice([ - ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]), - ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]), - ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]), - ascii_char([?1..?9]) |> ascii_char([?0..?9]), - ascii_char([?0..?9]) - ]) - |> reduce({List, :to_string, []}) + bool = + choice([ + string("true"), + string("false") + ]) + |> label("a boolean, such as `false'") + |> reduce({Jason, :decode!, []}) - ipv4_address = - times(ipv4_octet |> string("."), 3) - |> concat(ipv4_octet) + ipv4_octet = + choice([ + ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]), + ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]), + ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]), + ascii_char([?1..?9]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) - ipv4_prefix = - ascii_char('/') - |> choice([ - ascii_char('3') |> ascii_char([?0..?2]), - ascii_char([?1..?2]) |> ascii_char([?0..?9]), - ascii_char([?0..?9]) - ]) - |> reduce({List, :to_string, []}) + ipv4_address = + times(ipv4_octet |> string("."), 3) + |> concat(ipv4_octet) - ipv6_hexadectet = - ascii_string('0123456789abcdefABCDEF', min: 1, max: 4) + ipv4_prefix = + ascii_char('/') + |> choice([ + ascii_char('3') |> ascii_char([?0..?2]), + ascii_char([?1..?2]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) - ipv6_ls32 = - choice([ - ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet), - ipv4_address - ]) + ipv6_hexadectet = + ascii_string('0123456789abcdefABCDEF', min: 1, max: 4) - ipv6_fragment = - ipv6_hexadectet |> string(":") + ipv6_ls32 = + choice([ + ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet), + ipv4_address + ]) - ipv6_address = - choice([ - times(ipv6_fragment, 6) |> concat(ipv6_ls32), - string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32), + ipv6_fragment = + ipv6_hexadectet |> string(":") - ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), - string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), + ipv6_address = + choice([ + times(ipv6_fragment, 6) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32), + + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), + + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + + times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> concat(ipv6_ls32), + string("::") |> concat(ipv6_ls32), + + times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), + times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), + ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet), + string("::") |> concat(ipv6_hexadectet), + + times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"), + ipv6_hexadectet |> string("::"), + string("::") + ]) + + ipv6_prefix = + ascii_char('/') + |> choice([ + ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]), + ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]), + ascii_char([?1..?9]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) + + ip_address = + choice([ + ipv4_address |> optional(ipv4_prefix), + ipv6_address |> optional(ipv6_prefix) + ]) + |> reduce({Enum, :join, []}) + |> label("a valid IPv4 or IPv6 address and optional CIDR prefix") + |> unwrap_and_tag(:ip) + + year = integer(4) + month = integer(2) + day = integer(2) - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), - ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), - string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + hour = integer(2) + minute = integer(2) + second = integer(2) + tz_hour = integer(2) + tz_minute = integer(2) - times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), - ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), - string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + ymd_sep = ignore(string("-")) + hms_sep = ignore(string(":")) + iso8601_sep = ignore(choice([string("T"), string("t"), space])) + iso8601_tzsep = + choice([ + string("+") |> replace(1), + string("-") |> replace(-1) + ]) + zulu = ignore(choice([string("Z"), string("z")])) - times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), - times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), - ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), - string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), - - times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), - times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), - times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), - ipv6_hexadectet |> string("::") |> concat(ipv6_ls32), - string("::") |> concat(ipv6_ls32), - - times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), - times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), - times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), - times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), - ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet), - string("::") |> concat(ipv6_hexadectet), - - times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"), - times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"), - times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"), - times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"), - times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"), - times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"), - ipv6_hexadectet |> string("::"), - string("::") - ]) - - ipv6_prefix = - ascii_char('/') - |> choice([ - ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]), - ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]), - ascii_char([?1..?9]) |> ascii_char([?0..?9]), - ascii_char([?0..?9]) - ]) - |> reduce({List, :to_string, []}) - - ip_address = - choice([ - ipv4_address |> optional(ipv4_prefix), - ipv6_address |> optional(ipv6_prefix) - ]) - |> reduce({Enum, :join, []}) - |> label("a valid IPv4 or IPv6 address and optional CIDR prefix") - |> unwrap_and_tag(:ip) - - year = integer(4) - month = integer(2) - day = integer(2) - - hour = integer(2) - minute = integer(2) - second = integer(2) - tz_hour = integer(2) - tz_minute = integer(2) - - ymd_sep = ignore(string("-")) - hms_sep = ignore(string(":")) - iso8601_sep = ignore(choice([string("T"), string("t"), space])) - iso8601_tzsep = - choice([ - string("+") |> replace(1), - string("-") |> replace(-1) - ]) - zulu = ignore(choice([string("Z"), string("z")])) - - date_part = - year - |> optional( - ymd_sep - |> concat(month) - |> optional( - ymd_sep - |> concat(day) + date_part = + year |> optional( - iso8601_sep + ymd_sep + |> concat(month) |> optional( - hour + ymd_sep + |> concat(day) |> optional( - hms_sep - |> concat(minute) + iso8601_sep |> optional( - concat(hms_sep, second) + hour + |> optional( + hms_sep + |> concat(minute) + |> optional( + concat(hms_sep, second) + ) + ) ) ) ) ) + |> label("an RFC3339 date and optional time, such as `2019-08-01'") + |> tag(:date) + + timezone_part = + choice([ + iso8601_tzsep + |> concat(tz_hour) + |> optional( + hms_sep + |> concat(tz_minute) + ) + |> tag(:timezone), + zulu + ]) + + absolute_date = + date_part + |> optional(timezone_part) + |> tag(:absolute_date) + + relative_date = + integer(min: 1) + |> ignore(concat(space, empty())) + |> choice([ + string("second") |> optional(string("s")) |> replace(1), + string("minute") |> optional(string("s")) |> replace(60), + string("hour") |> optional(string("s")) |> replace(3600), + string("day") |> optional(string("s")) |> replace(86400), + string("week") |> optional(string("s")) |> replace(604800), + string("month") |> optional(string("s")) |> replace(2629746), + string("year") |> optional(string("s")) |> replace(31556952) + ]) + |> ignore(string(" ago")) + |> label("a relative date, such as `3 days ago'") + |> tag(:relative_date) + + date = + choice([ + absolute_date, + relative_date + ]) + + eq = choice([string(":"), string(".eq:")]) |> unwrap_and_tag(:eq) + lt = string(".lt:") |> unwrap_and_tag(:lt) + lte = string(".lte:") |> unwrap_and_tag(:lte) + gt = string(".gt:") |> unwrap_and_tag(:gt) + gte = string(".gte:") |> unwrap_and_tag(:gte) + + range_relation = + choice([ + eq, + lt, + lte, + gt, + gte + ]) + + boost = ignore(string("^")) |> unwrap_and_tag(number, :boost) + fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz) + + quot = string("\"") + + bool_value = + full_choice(unquote(for f <- bool_fields, do: [string: f])) + |> concat(eq) + |> concat(bool) + + date_value = + full_choice(unquote(for f <- date_fields, do: [string: f])) + |> concat(range_relation) + |> concat(date) + + float_value = + full_choice(unquote(for f <- float_fields, do: [string: f])) + |> concat(range_relation) + |> concat(number) + + int_value = + full_choice(unquote(for f <- int_fields, do: [string: f])) + |> concat(range_relation) + |> concat(int) + + ip_value = + full_choice(unquote(for f <- ip_fields, do: [string: f])) + |> concat(eq) + |> concat(ip_address) + + numeric = + choice([ + bool_value, + date_value, + float_value, + int_value, + ip_value + ]) + + quoted_numeric = + ignore(quot) |> concat(numeric) |> ignore(quot) + + stop_words = + choice([ + string("\\") |> eos(), + string(","), + concat(space, l_and), + concat(space, l_or), + concat(space, l_not), + rparen, + fuzz, + boost + ]) + + defcombinatorp( + :text, + lookahead_not(stop_words) + |> choice([ + string("\\") |> utf8_char([]), + string("(") |> parsec(:text) |> string(")"), + utf8_char([]) + ]) + |> times(min: 1) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:text) ) - ) - |> label("an RFC3339 date and optional time, such as `2019-08-01'") - |> tag(:date) - timezone_part = - choice([ - iso8601_tzsep - |> concat(tz_hour) - |> optional( - hms_sep - |> concat(tz_minute) - ) - |> tag(:timezone), - zulu - ]) + text = parsec(:text) - absolute_date = - date_part - |> optional(timezone_part) - |> tag(:absolute_date) + quoted_text = + choice([ + ignore(string("\\")) |> string("\""), + ignore(string("\\")) |> string("\\"), + string("\\") |> utf8_char([]), + utf8_char(not: ?") + ]) + |> times(min: 1) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:text) - relative_date = - integer(min: 1) - |> ignore(concat(space, empty())) - |> choice([ - string("second") |> optional(string("s")) |> replace(1), - string("minute") |> optional(string("s")) |> replace(60), - string("hour") |> optional(string("s")) |> replace(3600), - string("day") |> optional(string("s")) |> replace(86400), - string("week") |> optional(string("s")) |> replace(604800), - string("month") |> optional(string("s")) |> replace(2629746), - string("year") |> optional(string("s")) |> replace(31556952) - ]) - |> ignore(string(" ago")) - |> label("a relative date, such as `3 days ago'") - |> tag(:relative_date) + literal = + full_choice(unquote(for f <- literal_fields, do: [string: f])) + |> ignore(eq) + |> concat(text) + |> tag(:literal) - date = - choice([ - absolute_date, - relative_date - ]) + ngram = + full_choice(unquote(for f <- ngram_fields, do: [string: f])) + |> ignore(eq) + |> concat(text) + |> tag(:ngram) - boost = ignore(string("^")) |> unwrap_and_tag(number, :boost) - fuzz = ignore(string("~")) |> unwrap_and_tag(number, :fuzz) + custom = + full_choice(unquote(for f <- custom_fields, do: [string: f])) + |> ignore(string(":")) + |> concat(text) - quot = string("\"") + quoted_literal = + ignore(quot) + |> full_choice(unquote(for f <- literal_fields, do: [string: f])) + |> ignore(eq) + |> concat(quoted_text) + |> ignore(quot) + |> tag(:literal) - quoted_term = - ignore(quot) - |> choice([ - ignore(string("\\")) |> string("\""), - ignore(string("\\")) |> string("\\"), - string("\\") |> utf8_char([]), - utf8_char(not: ?") - ]) - |> times(min: 1) - |> ignore(quot) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:term) + quoted_ngram = + ignore(quot) + |> full_choice(unquote(for f <- ngram_fields, do: [string: f])) + |> ignore(eq) + |> concat(quoted_text) + |> ignore(quot) + |> tag(:ngram) - stop_words = - choice([ - string("\\") |> eos(), - string(","), - concat(space, l_and), - concat(space, l_or), - concat(space, l_not), - rparen, - fuzz, - boost - ]) + quoted_custom = + ignore(quot) + |> full_choice(unquote(for f <- custom_fields, do: [string: f])) + |> ignore(string(":")) + |> concat(quoted_text) + |> ignore(quot) + |> tag(:custom) - defcombinatorp( - :simple_term, - lookahead_not(stop_words) - |> choice([ - string("\\") |> utf8_char([]), - string("(") |> parsec(:simple_term) |> string(")"), - utf8_char([]) - ]) - |> times(min: 1) - ) + default = + text + |> tag(:default) - unquoted_term = - parsec(:simple_term) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:term) + quoted_default = + quoted_text + |> tag(:default) - outer = - choice([ - l_and, - l_or, - l_not, - lparen, - rparen, - boost, - fuzz, - space, - quoted_term, - unquoted_term - ]) + term = + choice([ + quoted_numeric, + quoted_literal, + quoted_ngram, + quoted_custom, + quoted_default, + numeric, + literal, + ngram, + custom, + default + ]) - search = - times(outer, min: 1) - |> eos() + outer = + choice([ + l_and, + l_or, + l_not, + lparen, + rparen, + boost, + fuzz, + space, + term + ]) - defparsec(:search, search) + search = + times(outer, min: 1) + |> eos() + + defparsec(:search, search) + end + end end