defmodule PhilomenaQuery.Parse.Lexer do @moduledoc false import NimbleParsec defp to_number(input), do: PhilomenaQuery.Parse.Helpers.to_number(input) space = choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) |> ignore() float = optional(ascii_char(~c"-+")) |> ascii_string([?0..?9], min: 1) |> optional(ascii_char(~c".") |> ascii_string([?0..?9], min: 1)) |> reduce({List, :to_string, []}) |> reduce(:to_number) l_and = times(space, min: 1) |> choice([string("AND"), string("&&")]) |> times(space, min: 1) |> unwrap_and_tag(:and) l_comma = string(",") |> unwrap_and_tag(:and) l_or = times(space, min: 1) |> choice([string("OR"), string("||")]) |> times(space, min: 1) |> unwrap_and_tag(:or) l_not = string("NOT") |> times(space, min: 1) |> unwrap_and_tag(:not) l_negate = choice([string("!"), string("-")]) |> unwrap_and_tag(:not) lparen = string("(") |> unwrap_and_tag(:lparen) rparen = string(")") |> unwrap_and_tag(:rparen) quot = string("\"") boost = ignore(string("^")) |> concat(float) |> unwrap_and_tag(:boost) stop_words = choice([ l_comma, l_and, l_or, repeat(space) |> concat(rparen), repeat(space) |> concat(boost) ]) defcombinatorp( :dirty_text, lookahead_not(stop_words) |> choice([ string("\\") |> utf8_char([]), string("(") |> parsec(:dirty_text) |> string(")"), utf8_char(not: ?(..?)) ]) |> times(min: 1) ) text = parsec(:dirty_text) |> reduce({List, :to_string, []}) |> unwrap_and_tag(:term) |> label("a term, like `safe'") quoted_text = ignore(quot) |> repeat( choice([ ignore(string("\\")) |> string("\""), ignore(string("\\")) |> string("\\"), string("\\") |> utf8_char([]), utf8_char(not: ?") ]) ) |> ignore(quot) |> reduce({List, :to_string, []}) |> unwrap_and_tag(:term) |> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|) term = choice([ quoted_text, text ]) outer = choice([ l_comma, l_negate, l_and, l_or, l_not, lparen, rparen, boost, space, term ]) search = repeat(outer) |> eos() defparsec(:lex, search) end