parser bundle

This commit is contained in:
Liam P. White 2019-08-26 20:00:39 -04:00
parent 675a7de13b
commit a0fa66628a
3 changed files with 243 additions and 202 deletions

View file

@ -1,9 +1,21 @@
defmodule Philomena.Images.Query do defmodule Philomena.Images.Query do
use Philomena.Search.Lexer, use Philomena.Search.Parser,
int: int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score), float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at), date: ~W(created_at updated_at first_seen_at),
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format), literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram: ~W(description) ngram: ~W(description),
custom: ~W(gallery_id),
transforms: %{
"gallery_id" => fn _ctx, value ->
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
},
default: "namespaced_tags.name",
name: "anonymous"
end end

View file

@ -8,8 +8,9 @@ defmodule Philomena.Search.Lexer do
int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__) int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__) ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__) custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
lexer_name = :"#{Keyword.fetch!(opts, :name)}_lexer"
quote location: :keep do quote do
import NimbleParsec import NimbleParsec
import Philomena.Search.Helpers import Philomena.Search.Helpers
@ -181,10 +182,10 @@ defmodule Philomena.Search.Lexer do
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
ip_address = ip_address =
choice([ #choice([
ipv4_address |> optional(ipv4_prefix), ipv4_address |> optional(ipv4_prefix)#,
ipv6_address |> optional(ipv6_prefix) #ipv6_address |> optional(ipv6_prefix)
]) #])
|> reduce({Enum, :join, []}) |> reduce({Enum, :join, []})
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix") |> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|> unwrap_and_tag(:ip) |> unwrap_and_tag(:ip)
@ -459,7 +460,7 @@ defmodule Philomena.Search.Lexer do
times(outer, min: 1) times(outer, min: 1)
|> eos() |> eos()
defparsec(:search, search) defparsec(unquote(lexer_name), search)
end end
end end
end end

View file

@ -1,5 +1,27 @@
defmodule Philomena.Search.Parser do defmodule Philomena.Search.Parser do
def parse(ctx, tokens) do defmacro __using__(opts) do
lexer_name = :"#{Keyword.fetch!(opts, :name)}_lexer"
parser_name = :"#{Keyword.fetch!(opts, :name)}_parser"
field_transforms = Keyword.get(opts, :transforms, %{})
field_aliases = Keyword.get(opts, :aliases, %{})
default_field = Keyword.fetch!(opts, :default)
quote location: :keep do
use Philomena.Search.Lexer, unquote(opts)
def unquote(parser_name)(ctx, input) do
with {:ok, tree, _1, _2, _3, _4} <- unquote(lexer_name)(input) do
parse(ctx, tree)
else
{:error, msg, _1, _2, _3, _4} ->
{:error, msg}
{:error, msg} ->
{:error, msg}
end
end
defp parse(ctx, tokens) do
{tree, []} = search_top(ctx, tokens) {tree, []} = search_top(ctx, tokens)
{:ok, tree} {:ok, tree}
@ -95,22 +117,22 @@ defmodule Philomena.Search.Parser do
# Terms and term fuzzing # Terms and term fuzzing
# #
defp search_fuzz(%{default_field: default_field} = ctx, tokens) do defp search_fuzz(ctx, tokens) do
case tokens do case tokens do
[{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens} {%{range: %{try_alias(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:float_field, field}, {:eq, _}, {:float, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:float_field, field}, {:eq, _}, {:float, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens} {%{range: %{try_alias(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:literal_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:literal_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{try_alias(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:ngram_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{try_alias(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{default_field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{unquote(default_field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
_ -> _ ->
search_range(ctx, tokens) search_range(ctx, tokens)
@ -125,15 +147,15 @@ defmodule Philomena.Search.Parser do
case tokens do case tokens do
[{:int_field, field}, {range, _}, {:int, value} | r_tokens] [{:int_field, field}, {range, _}, {:int, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] -> when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens} {%{range: %{try_alias(field) => %{range => value}}}, r_tokens}
[{:float_field, field}, {range, _}, {:number, value} | r_tokens] [{:float_field, field}, {range, _}, {:number, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] -> when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens} {%{range: %{try_alias(field) => %{range => value}}}, r_tokens}
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens] [{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens]
when range in [:gt, :gte, :lt, :lte] -> when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => lower}}}, r_tokens} {%{range: %{try_alias(field) => %{range => lower}}}, r_tokens}
_ -> _ ->
search_custom(ctx, tokens) search_custom(ctx, tokens)
@ -143,46 +165,46 @@ defmodule Philomena.Search.Parser do
defp search_custom(ctx, tokens) do defp search_custom(ctx, tokens) do
case tokens do case tokens do
[{:custom_field, field}, {:text, value} | r_tokens] -> [{:custom_field, field}, {:text, value} | r_tokens] ->
{ctx[:field_transforms][field].(value), r_tokens} {unquote(field_transforms)[field].(ctx, value), r_tokens}
_ -> _ ->
search_term(ctx, tokens) search_term(ctx, tokens)
end end
end end
defp search_term(ctx, tokens) do defp search_term(_ctx, tokens) do
case tokens do case tokens do
[{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] -> [{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] ->
{%{range: %{field => %{gte: lower, lte: higher}}}, r_tokens} {%{range: %{try_alias(field) => %{gte: lower, lte: higher}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] -> [{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
value = process_term(value) value = process_term(value)
if contains_wildcard?(value) do if contains_wildcard?(value) do
{%{wildcard: %{field => unescape_wildcard(value)}}, r_tokens} {%{wildcard: %{try_alias(field) => unescape_wildcard(value)}}, r_tokens}
else else
{%{match: %{field => unescape_regular(value)}}, r_tokens} {%{match: %{try_alias(field) => unescape_regular(value)}}, r_tokens}
end end
[{:literal_field, field}, {:eq, _}, {:text, value} | r_tokens] -> [{:literal_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
value = process_term(value) value = process_term(value)
if contains_wildcard?(value) do if contains_wildcard?(value) do
{%{wildcard: %{field => unescape_wildcard(value)}}, r_tokens} {%{wildcard: %{try_alias(field) => unescape_wildcard(value)}}, r_tokens}
else else
{%{term: %{field => unescape_regular(value)}}, r_tokens} {%{term: %{try_alias(field) => unescape_regular(value)}}, r_tokens}
end end
[{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] -> [{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] ->
{%{term: %{field => value}}, r_tokens} {%{term: %{try_alias(field) => value}}, r_tokens}
[{:default, [text: value]} | r_tokens] -> [{:default, [text: value]} | r_tokens] ->
value = process_term(value) value = process_term(value)
if contains_wildcard?(value) do if contains_wildcard?(value) do
{%{wildcard: %{ctx[:default_field] => unescape_wildcard(value)}}, r_tokens} {%{wildcard: %{unquote(default_field) => unescape_wildcard(value)}}, r_tokens}
else else
{%{term: %{ctx[:default_field] => unescape_regular(value)}}, r_tokens} {%{term: %{unquote(default_field) => unescape_regular(value)}}, r_tokens}
end end
_ -> _ ->
@ -207,4 +229,10 @@ defmodule Philomena.Search.Parser do
defp process_term(term) do defp process_term(term) do
term |> String.trim() |> String.downcase() term |> String.trim() |> String.downcase()
end end
defp try_alias(field) do
unquote(field_aliases)[field] || field
end
end
end
end end