From 756599df56c75de0c2e8ec269f01a72421f37b5e Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Sat, 2 Nov 2019 14:34:25 -0400 Subject: [PATCH] add non-macro-based search parser --- lib/search/bool_parser.ex | 13 ++ lib/search/date_parser.ex | 187 ++++++++++++++++++++++++ lib/search/float_parser.ex | 30 ++++ lib/search/helpers.ex | 26 ++++ lib/search/int_parser.ex | 23 +++ lib/search/ip_parser.ex | 138 ++++++++++++++++++ lib/search/lexer.ex | 101 +++++++++++++ lib/search/literal_parser.ex | 51 +++++++ lib/search/ngram_parser.ex | 6 + lib/search/parser.ex | 242 ++++++++++++++++++++++++++++++++ lib/search/term_range_parser.ex | 35 +++++ 11 files changed, 852 insertions(+) create mode 100644 lib/search/bool_parser.ex create mode 100644 lib/search/date_parser.ex create mode 100644 lib/search/float_parser.ex create mode 100644 lib/search/helpers.ex create mode 100644 lib/search/int_parser.ex create mode 100644 lib/search/ip_parser.ex create mode 100644 lib/search/lexer.ex create mode 100644 lib/search/literal_parser.ex create mode 100644 lib/search/ngram_parser.ex create mode 100644 lib/search/parser.ex create mode 100644 lib/search/term_range_parser.ex diff --git a/lib/search/bool_parser.ex b/lib/search/bool_parser.ex new file mode 100644 index 00000000..6daa9257 --- /dev/null +++ b/lib/search/bool_parser.ex @@ -0,0 +1,13 @@ +defmodule Search.BoolParser do + import NimbleParsec + + bool = + choice([ + string("true"), + string("false") + ]) + |> unwrap_and_tag(:bool) + |> eos() + + defparsec :parse, bool +end \ No newline at end of file diff --git a/lib/search/date_parser.ex b/lib/search/date_parser.ex new file mode 100644 index 00000000..30a7cd22 --- /dev/null +++ b/lib/search/date_parser.ex @@ -0,0 +1,187 @@ +defmodule Search.DateParser do + import NimbleParsec + + defp build_datetime(naive, tz_off, tz_hour, tz_minute) do + tz_hour = + tz_hour + |> Integer.to_string() + |> String.pad_leading(2, "0") + + tz_minute = + tz_minute + |> Integer.to_string() + |> String.pad_leading(2, "0") + + iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}" + + # Unbelievable that there is no way to build this with integer arguments. + # WTF, Elixir? + {:ok, datetime, _offset} = DateTime.from_iso8601(iso8601_string) + + datetime + end + + defp timezone_bounds([]), do: ["+", 0, 0] + defp timezone_bounds([tz_off, tz_hour]), do: [tz_off, tz_hour, 0] + defp timezone_bounds([tz_off, tz_hour, tz_minute]), do: [tz_off, tz_hour, tz_minute] + + defp date_bounds([year]) do + lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0} + upper = NaiveDateTime.add(lower, 31_536_000, :second) + [lower, upper] + end + + defp date_bounds([year, month]) do + lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0} + upper = NaiveDateTime.add(lower, 2_592_000, :second) + [lower, upper] + end + + defp date_bounds([year, month, day]) do + lower = %NaiveDateTime{year: year, month: month, day: day, hour: 0, minute: 0, second: 0} + upper = NaiveDateTime.add(lower, 86400, :second) + [lower, upper] + end + + defp date_bounds([year, month, day, hour]) do + lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: 0, second: 0} + upper = NaiveDateTime.add(lower, 3600, :second) + [lower, upper] + end + + defp date_bounds([year, month, day, hour, minute]) do + lower = %NaiveDateTime{ + year: year, + month: month, + day: day, + hour: hour, + minute: minute, + second: 0 + } + + upper = NaiveDateTime.add(lower, 60, :second) + [lower, upper] + end + + defp date_bounds([year, month, day, hour, minute, second]) do + lower = %NaiveDateTime{ + year: year, + month: month, + day: day, + hour: hour, + minute: minute, + second: second + } + + upper = NaiveDateTime.add(lower, 1, :second) + [lower, upper] + end + + defp absolute_datetime(opts) do + date = Keyword.fetch!(opts, :date) + timezone = Keyword.get(opts, :timezone, []) + + [lower, upper] = date_bounds(date) + [tz_off, tz_hour, tz_minute] = timezone_bounds(timezone) + + lower = build_datetime(lower, tz_off, tz_hour, tz_minute) + upper = build_datetime(upper, tz_off, tz_hour, tz_minute) + + [lower, upper] + end + + defp relative_datetime([count, scale]) do + now = NaiveDateTime.utc_now() + + lower = NaiveDateTime.add(now, count * -scale, :second) + upper = NaiveDateTime.add(now, (count - 1) * -scale, :second) + + [lower, upper] + end + + space = + choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) + |> ignore() + + year = integer(4) + month = integer(2) + day = integer(2) + + hour = integer(2) + minute = integer(2) + second = integer(2) + tz_hour = integer(2) + tz_minute = integer(2) + + ymd_sep = ignore(string("-")) + hms_sep = ignore(string(":")) + iso8601_sep = ignore(choice([string("T"), string("t"), space])) + iso8601_tzsep = choice([string("+"), string("-")]) + zulu = ignore(choice([string("Z"), string("z")])) + + date_part = + year + |> optional( + ymd_sep + |> concat(month) + |> optional( + ymd_sep + |> concat(day) + |> optional( + iso8601_sep + |> optional( + hour + |> optional( + hms_sep + |> concat(minute) + |> optional(concat(hms_sep, second)) + ) + ) + ) + ) + ) + |> tag(:date) + + timezone_part = + choice([ + iso8601_tzsep + |> concat(tz_hour) + |> optional( + hms_sep + |> concat(tz_minute) + ) + |> tag(:timezone), + zulu + ]) + + absolute_date = + date_part + |> optional(timezone_part) + |> reduce(:absolute_datetime) + |> unwrap_and_tag(:date) + + relative_date = + integer(min: 1) + |> ignore(concat(space, empty())) + |> choice([ + string("second") |> optional(string("s")) |> replace(1), + string("minute") |> optional(string("s")) |> replace(60), + string("hour") |> optional(string("s")) |> replace(3_600), + string("day") |> optional(string("s")) |> replace(86_400), + string("week") |> optional(string("s")) |> replace(604_800), + string("month") |> optional(string("s")) |> replace(2_592_000), + string("year") |> optional(string("s")) |> replace(31_536_000) + ]) + |> ignore(string(" ago")) + |> reduce(:relative_datetime) + |> unwrap_and_tag(:date) + + date = + choice([ + absolute_date, + relative_date + ]) + |> eos() + + defparsec :parse, date +end \ No newline at end of file diff --git a/lib/search/float_parser.ex b/lib/search/float_parser.ex new file mode 100644 index 00000000..03e518de --- /dev/null +++ b/lib/search/float_parser.ex @@ -0,0 +1,30 @@ +defmodule Search.FloatParser do + import NimbleParsec + import Search.Helpers + + fuzz = + string("~") + |> ignore() + + unsigned_float = + ascii_string([?0..?9], min: 1) + |> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1)) + |> reduce({List, :to_string, []}) + |> reduce(:to_number) + + float = + optional(ascii_char('-+')) + |> ascii_string([?0..?9], min: 1) + |> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1)) + |> reduce({List, :to_string, []}) + |> reduce(:to_number) + + float_parser = + choice([ + float |> concat(fuzz) |> concat(unsigned_float) |> reduce(:range) |> unwrap_and_tag(:float_range), + float |> unwrap_and_tag(:float) + ]) + |> eos() + + defparsec :parse, float_parser +end \ No newline at end of file diff --git a/lib/search/helpers.ex b/lib/search/helpers.ex new file mode 100644 index 00000000..c2d66d8f --- /dev/null +++ b/lib/search/helpers.ex @@ -0,0 +1,26 @@ +defmodule Search.Helpers do + # Apparently, it's too hard for the standard library to to parse a number + # as a float if it doesn't contain a decimal point. WTF + def to_number(term) do + {float_val, _} = :string.to_float(term) + {int_val, _} = :string.to_integer(term) + + cond do + is_float(float_val) -> + float_val + + is_integer(int_val) -> + int_val + end + end + + def to_int(term) do + {int, _} = :string.to_integer(term) + + int + end + + def range([center, deviation]) do + [center - deviation, center + deviation] + end +end \ No newline at end of file diff --git a/lib/search/int_parser.ex b/lib/search/int_parser.ex new file mode 100644 index 00000000..d616f49b --- /dev/null +++ b/lib/search/int_parser.ex @@ -0,0 +1,23 @@ +defmodule Search.IntParser do + import NimbleParsec + import Search.Helpers + + fuzz = + string("~") + |> ignore() + + int = + optional(ascii_char('-+')) + |> ascii_string([?0..?9], min: 1) + |> reduce({List, :to_string, []}) + |> reduce(:to_int) + + int_parser = + choice([ + int |> concat(fuzz) |> integer(min: 1) |> reduce(:range) |> unwrap_and_tag(:int_range), + int |> unwrap_and_tag(:int) + ]) + |> eos() + + defparsec :parse, int_parser +end \ No newline at end of file diff --git a/lib/search/ip_parser.ex b/lib/search/ip_parser.ex new file mode 100644 index 00000000..3ae7b61e --- /dev/null +++ b/lib/search/ip_parser.ex @@ -0,0 +1,138 @@ +defmodule Search.IpParser do + import NimbleParsec + + ipv4_octet = + choice([ + ascii_char('2') |> ascii_char('5') |> ascii_char([?0..?5]), + ascii_char('2') |> ascii_char([?0..?4]) |> ascii_char([?0..?9]), + ascii_char('1') |> ascii_char([?0..?9]) |> ascii_char([?0..?9]), + ascii_char([?1..?9]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) + + ipv4_address = + times(ipv4_octet |> string("."), 3) + |> concat(ipv4_octet) + + ipv4_prefix = + ascii_char('/') + |> choice([ + ascii_char('3') |> ascii_char([?0..?2]), + ascii_char([?1..?2]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) + + ipv6_hexadectet = ascii_string('0123456789abcdefABCDEF', min: 1, max: 4) + + ipv6_ls32 = + choice([ + ipv6_hexadectet |> string(":") |> concat(ipv6_hexadectet), + ipv4_address + ]) + + ipv6_fragment = ipv6_hexadectet |> string(":") + + ipv6_address = + choice([ + times(ipv6_fragment, 6) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), + times(ipv6_fragment, 1) + |> concat(ipv6_hexadectet) + |> string("::") + |> times(ipv6_fragment, 3) + |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), + times(ipv6_fragment, 2) + |> concat(ipv6_hexadectet) + |> string("::") + |> times(ipv6_fragment, 2) + |> concat(ipv6_ls32), + times(ipv6_fragment, 1) + |> concat(ipv6_hexadectet) + |> string("::") + |> times(ipv6_fragment, 2) + |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), + times(ipv6_fragment, 3) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_fragment) + |> concat(ipv6_ls32), + times(ipv6_fragment, 2) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_fragment) + |> concat(ipv6_ls32), + times(ipv6_fragment, 1) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_fragment) + |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), + times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), + ipv6_hexadectet |> string("::") |> concat(ipv6_ls32), + string("::") |> concat(ipv6_ls32), + times(ipv6_fragment, 5) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_hexadectet), + times(ipv6_fragment, 4) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_hexadectet), + times(ipv6_fragment, 3) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_hexadectet), + times(ipv6_fragment, 2) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_hexadectet), + times(ipv6_fragment, 1) + |> concat(ipv6_hexadectet) + |> string("::") + |> concat(ipv6_hexadectet), + ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet), + string("::") |> concat(ipv6_hexadectet), + times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::"), + times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::"), + ipv6_hexadectet |> string("::"), + string("::") + ]) + + ipv6_prefix = + ascii_char('/') + |> choice([ + ascii_char('1') |> ascii_char('2') |> ascii_char([?0..?8]), + ascii_char('1') |> ascii_char([?0..?1]) |> ascii_char([?0..?9]), + ascii_char([?1..?9]) |> ascii_char([?0..?9]), + ascii_char([?0..?9]) + ]) + |> reduce({List, :to_string, []}) + + ip = + choice([ + ipv4_address |> optional(ipv4_prefix), + ipv6_address |> optional(ipv6_prefix) + ]) + |> reduce({Enum, :join, []}) + |> label("a valid IPv4 or IPv6 address and optional CIDR prefix") + |> unwrap_and_tag(:ip) + |> eos() + + defparsec :parse, ip +end \ No newline at end of file diff --git a/lib/search/lexer.ex b/lib/search/lexer.ex new file mode 100644 index 00000000..653c3a8a --- /dev/null +++ b/lib/search/lexer.ex @@ -0,0 +1,101 @@ +defmodule Search.Lexer do + import NimbleParsec + import Search.Helpers + + float = + optional(ascii_char('-+')) + |> ascii_string([?0..?9], min: 1) + |> optional(ascii_char('.') |> ascii_string([?0..?9], min: 1)) + |> reduce({List, :to_string, []}) + |> reduce(:to_number) + + l_and = + choice([string("AND"), string("&&"), string(",")]) + |> unwrap_and_tag(:and) + + l_or = + choice([string("OR"), string("||")]) + |> unwrap_and_tag(:or) + + l_not = + choice([string("NOT"), string("!"), string("-")]) + |> unwrap_and_tag(:not) + + lparen = string("(") |> unwrap_and_tag(:lparen) + rparen = string(")") |> unwrap_and_tag(:rparen) + + space = + choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) + |> ignore() + + quot = string("\"") + backslash = string("\\") + + boost = + ignore(string("^")) + |> concat(float) + |> unwrap_and_tag(:boost) + + stop_words = + repeat(space) + |> choice([ + backslash |> eos(), + l_and, + l_or, + rparen, + boost + ]) + + defcombinatorp( + :dirty_text, + lookahead_not(stop_words) + |> choice([ + string("\\") |> utf8_char([]), + string("(") |> parsec(:dirty_text) |> string(")"), + utf8_char(not: ?(..?)) + ]) + |> times(min: 1) + ) + + text = + parsec(:dirty_text) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:text) + + quoted_text = + ignore(quot) + |> choice([ + ignore(string("\\")) |> string("\""), + ignore(string("\\")) |> string("\\"), + string("\\") |> utf8_char([]), + utf8_char(not: ?") + ]) + |> repeat() + |> ignore(quot) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:text) + + term = + choice([ + quoted_text, + text + ]) + + outer = + choice([ + l_and, + l_or, + l_not, + lparen, + rparen, + boost, + space, + term + ]) + + search = + repeat(outer) + |> eos() + + defparsec :lex, search +end \ No newline at end of file diff --git a/lib/search/literal_parser.ex b/lib/search/literal_parser.ex new file mode 100644 index 00000000..a8eccb61 --- /dev/null +++ b/lib/search/literal_parser.ex @@ -0,0 +1,51 @@ +defmodule Search.LiteralParser do + import NimbleParsec + + edit_distance = + ignore(string("~")) + |> integer(min: 1) + |> unwrap_and_tag(:fuzz) + |> eos() + + stopwords = + choice([ + string("*"), + string("?"), + edit_distance + ]) + + normal = + lookahead_not(stopwords) + |> choice([ + ignore(string("\\")) |> utf8_char([]), + utf8_char([]) + ]) + |> repeat() + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:literal) + |> optional(edit_distance) + |> eos() + + # Runs of Kleene stars are coalesced. + # Fuzzy search has no meaning in wildcard mode, so we ignore it. + wildcard = + lookahead_not(edit_distance) + |> choice([ + ignore(string("\\")) |> utf8_char([]), + string("*") |> ignore(repeat(string("*"))), + utf8_char([]) + ]) + |> repeat() + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:wildcard) + |> ignore(optional(edit_distance)) + |> eos() + + literal = + choice([ + normal, + wildcard + ]) + + defparsec :parse, literal +end \ No newline at end of file diff --git a/lib/search/ngram_parser.ex b/lib/search/ngram_parser.ex new file mode 100644 index 00000000..370e1e42 --- /dev/null +++ b/lib/search/ngram_parser.ex @@ -0,0 +1,6 @@ +defmodule Search.NgramParser do + alias Search.LiteralParser + + # Dummy stub. Used for convenient parser implementation. + def parse(input), do: LiteralParser.parse(input) +end \ No newline at end of file diff --git a/lib/search/parser.ex b/lib/search/parser.ex new file mode 100644 index 00000000..3608a512 --- /dev/null +++ b/lib/search/parser.ex @@ -0,0 +1,242 @@ +defmodule Search.Parser do + alias Search.{ + BoolParser, + DateParser, + FloatParser, + IntParser, + IpParser, + Lexer, + LiteralParser, + Parser, + TermRangeParser + } + + defstruct [ + :default_field, + bool_fields: [], + date_fields: [], + float_fields: [], + int_fields: [], + ip_fields: [], + literal_fields: [], + ngram_fields: [], + custom_fields: [], + transforms: %{}, + aliases: %{}, + __fields__: %{}, + __data__: nil + ] + + def parser(options) do + parser = struct(Parser, options) + fields = + Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++ + Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++ + Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++ + Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++ + Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++ + Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++ + Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++ + Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end) + + %{parser | __fields__: Map.new(fields)} + end + + def parse(%Parser{} = parser, input, context \\ nil) do + parser = %{parser | __data__: context} + + with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input), + {:ok, {tree, []}} <- search_top(parser, tokens) + do + {:ok, tree} + else + _ -> + {:error, "Search parsing error."} + end + end + + # + # Predictive LL(1) RD parser for search grammar + # + + defp search_top(parser, tokens), do: search_or(parser, tokens) + + defp search_or(parser, tokens) do + case search_and(parser, tokens) do + {:ok, {left, [{:or, _} | r_tokens]}} -> + {right, rest} = search_or(parser, r_tokens) + {:ok, {%{bool: %{should: [left, right]}}, rest}} + + value -> + value + end + end + + defp search_and(parser, tokens) do + case search_boost(parser, tokens) do + {:ok, {left, [{:and, _} | r_tokens]}} -> + {right, rest} = search_or(parser, r_tokens) + {:ok, {%{bool: %{must: [left, right]}}, rest}} + + value -> + value + end + end + + defp search_boost(parser, tokens) do + case search_not(parser, tokens) do + {:ok, {child, [{:boost, value} | r_tokens]}} -> + {:ok, {%{function_score: %{query: child, boost_factor: value}}, r_tokens}} + + value -> + value + end + end + + defp search_not(parser, [{:not, _} | rest]) do + case search_group(parser, rest) do + {:ok, {child, r_tokens}} -> + {:ok, {%{bool: %{must_not: child}}, r_tokens}} + + err -> + err + end + end + + defp search_not(parser, tokens), do: search_group(parser, tokens) + + defp search_group(parser, [{:lparen, _} | rest]) do + case search_top(parser, rest) do + {:ok, {child, [{:rparen, _} | r_tokens]}} -> + {:ok, {child, r_tokens}} + + {:ok, {_child, _tokens}} -> + {:error, "Imbalanced parentheses."} + + err -> + err + end + end + + defp search_group(_parser, [{:rparen, _} | _rest]) do + {:error, "Imbalanced parentheses."} + end + + defp search_group(parser, tokens), do: search_field(parser, tokens) + + defp search_field(parser, [{:term, value} | r_tokens]) do + tokens = TermRangeParser.parse(value, parser.__fields__, parser.default_field) + + case field_top(parser, tokens) do + {:ok, {child, []}} -> + {:ok, {child, r_tokens}} + + err -> + err + end + end + + # + # Predictive LL(k) RD parser for search terms in parent grammar + # + + defp field_top(parser, tokens), do: field_term(parser, tokens) + + defp field_term(parser, [custom_field: field_name, range: :eq, value: value]) do + case parser.transforms[field_name].(parser.__data__, value) do + {:ok, child} -> + {:ok, {child, []}} + + err -> + err + end + end + + defp field_term(parser, [{field_parser, field_name}, {:range, range}, {:value, value}]) do + # N.B.: field_parser is an atom + case field_parser.parse(value) do + {:ok, extra_tokens, _1, _2, _3, _4} -> + field_type(parser, [{field_parser, field_name}, {:range, range}] ++ extra_tokens) + + err -> + err + end + end + + # Types which do not support ranges + + defp field_type(parser, [{LiteralParser, field_name}, range: :eq, literal: value]), + do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}} + + defp field_type(parser, [{LiteralParser, field_name}, range: :eq, literal: value, fuzz: fuzz]), + do: {:ok, {%{fuzzy: %{field(parser, field_name) => %{value: value, fuzziness: fuzz}}}, []}} + + defp field_type(_parser, [{LiteralParser, _field_name}, range: :eq, wildcard: "*"]), + do: {:ok, {%{match_all: %{}}, []}} + + defp field_type(parser, [{LiteralParser, field_name}, range: :eq, wildcard: value]), + do: {:ok, {%{wildcard: %{field(parser, field_name) => value}}, []}} + + + defp field_type(parser, [{NgramParser, field_name}, range: :eq, literal: value]), + do: {:ok, {%{match_phrase: %{field(parser, field_name) => value}}, []}} + + defp field_type(parser, [{NgramParser, field_name}, range: :eq, literal: value, fuzz: _fuzz]), + do: {:ok, {%{match_phrase: %{field(parser, field_name) => value}}, []}} + + defp field_type(_parser, [{NgramParser, _field_name}, range: :eq, wildcard: "*"]), + do: {:ok, {%{match_all: %{}}, []}} + + defp field_type(parser, [{NgramParser, field_name}, range: :eq, wildcard: value]), + do: {:ok, {%{wildcard: %{field(parser, field_name) => value}}, []}} + + + defp field_type(parser, [{BoolParser, field_name}, range: :eq, bool: value]), + do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}} + + defp field_type(parser, [{IpParser, field_name}, range: :eq, ip: value]), + do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}} + + + # Types which do support ranges + + defp field_type(parser, [{IntParser, field_name}, range: :eq, int: value]), + do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}} + + defp field_type(parser, [{IntParser, field_name}, range: :eq, int_range: [lower, upper]]), + do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}} + + defp field_type(parser, [{IntParser, field_name}, range: range, int: value]), + do: {:ok, {%{range: %{field(parser, field_name) => %{range => value}}}, []}} + + defp field_type(_parser, [{IntParser, field_name}, range: _range, int_range: _value]), + do: {:error, "multiple ranges specified for " <> field_name} + + + defp field_type(parser, [{FloatParser, field_name}, range: :eq, float: value]), + do: {:ok, {%{term: %{field(parser, field_name) => value}}, []}} + + defp field_type(parser, [{FloatParser, field_name}, range: :eq, float_range: [lower, upper]]), + do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}} + + defp field_type(parser, [{FloatParser, field_name}, range: range, float: value]), + do: {:ok, {%{range: %{field(parser, field_name) => %{range => value}}}, []}} + + defp field_type(_parser, [{FloatParser, field_name}, range: _range, float_range: _value]), + do: {:error, "multiple ranges specified for " <> field_name} + + + defp field_type(parser, [{DateParser, field_name}, range: :eq, date: [lower, upper]]), + do: {:ok, {%{range: %{field(parser, field_name) => %{gte: lower, lte: upper}}}, []}} + + defp field_type(parser, [{DateParser, field_name}, range: r, date: [_lower, upper]]) when r in [:lte, :gt], + do: {:ok, {%{range: %{field(parser, field_name) => %{r => upper}}}, []}} + + defp field_type(parser, [{DateParser, field_name}, range: r, date: [lower, _upper]]) when r in [:gte, :lt], + do: {:ok, {%{range: %{field(parser, field_name) => %{r => lower}}}, []}} + + + defp field(parser, field_name) do + parser.aliases[field_name] || field_name + end +end \ No newline at end of file diff --git a/lib/search/term_range_parser.ex b/lib/search/term_range_parser.ex new file mode 100644 index 00000000..2a9f7fb3 --- /dev/null +++ b/lib/search/term_range_parser.ex @@ -0,0 +1,35 @@ +defmodule Search.TermRangeParser do + + # Unfortunately, we can't use NimbleParsec here. It requires + # the compiler, and we're not in a macro environment. + + def parse(input, fields, default_field) do + tokens = + Enum.find_value(fields, fn {p, f} -> + field(input, f, p) + end) + + tokens || [{LiteralParser, default_field}, range: :eq, value: input] + end + + defp field(input, field_name, field_parser) do + field_sz = byte_size(field_name) + + case input do + <<^field_name::binary-size(field_sz), ":", value::binary>> -> + [{field_parser, field_name}, range: :eq, value: value] + <<^field_name::binary-size(field_sz), ".eq:", value::binary>> -> + [{field_parser, field_name}, range: :eq, value: value] + <<^field_name::binary-size(field_sz), ".gt:", value::binary>> -> + [{field_parser, field_name}, range: :gt, value: value] + <<^field_name::binary-size(field_sz), ".gte:", value::binary>> -> + [{field_parser, field_name}, range: :gte, value: value] + <<^field_name::binary-size(field_sz), ".lt:", value::binary>> -> + [{field_parser, field_name}, range: :lt, value: value] + <<^field_name::binary-size(field_sz), ".lte:", value::binary>> -> + [{field_parser, field_name}, range: :lte, value: value] + _ -> + nil + end + end +end \ No newline at end of file