From 8cb0cbc244dd49cb9c912505d7d719481d542b54 Mon Sep 17 00:00:00 2001
From: "Liam P. White" <byteslice@airmail.cc>
Date: Tue, 27 Aug 2019 19:37:26 -0400
Subject: [PATCH] finished parser bundle

---
 lib/philomena/images/query.ex   |  68 +++++++++++++++++-
 lib/philomena/search/helpers.ex |  18 +++++
 lib/philomena/search/lexer.ex   |  20 +++---
 lib/philomena/search/parser.ex  | 121 ++++++++++++++------------------
 4 files changed, 145 insertions(+), 82 deletions(-)

diff --git a/lib/philomena/images/query.ex b/lib/philomena/images/query.ex
index a4114851..d84f135a 100644
--- a/lib/philomena/images/query.ex
+++ b/lib/philomena/images/query.ex
@@ -1,5 +1,7 @@
 defmodule Philomena.Images.Query do
-  use Philomena.Search.Parser,
+  import Philomena.Search.Parser
+
+  defparser "anonymous",
     int:
       ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
     float: ~W(aspect_ratio wilson_score),
@@ -16,6 +18,66 @@ defmodule Philomena.Images.Query do
       "faved_by" => "favourited_by_users",
       "faved_by_id" => "favourited_by_user_ids"
     },
-    default: "namespaced_tags.name",
-    name: "anonymous"
+    default: "namespaced_tags.name"
+
+  defparser "user",
+    int:
+      ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
+    float: ~W(aspect_ratio wilson_score),
+    date: ~W(created_at updated_at first_seen_at),
+    literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
+    ngram: ~W(description),
+    custom: ~W(gallery_id my),
+    transforms: %{
+      "gallery_id" => fn _ctx, value ->
+        %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
+      end,
+      "my" => fn
+        %{user: %{id: id}}, "faves" -> %{term: %{favourited_by_user_ids: id}}
+        %{user: %{id: id}}, "upvotes" -> %{term: %{upvoter_ids: id}}
+        %{user: %{id: id}}, "downvotes" -> %{term: %{downvoter_ids: id}}
+        %{user: _u}, "watched" ->
+          %{query: %{match_all: %{}}} # todo
+      end
+    },
+    aliases: %{
+      "faved_by" => "favourited_by_users",
+      "faved_by_id" => "favourited_by_user_ids"
+    },
+    default: "namespaced_tags.name"
+
+  defparser "moderator",
+    int:
+      ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id upvoted_by_id downvoted_by_id tag_count true_uploader_id hidden_by_id deleted_by_user-id),
+    float: ~W(aspect_ratio wilson_score),
+    date: ~W(created_at updated_at first_seen_at),
+    literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user),
+    ngram: ~W(description deletion_reason),
+    ip: ~W(ip),
+    bool: ~W(deleted),
+    custom: ~W(gallery_id my),
+    transforms: %{
+      "gallery_id" => fn _ctx, value ->
+        %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
+      end,
+      "my" => fn
+        %{user: %{id: id}}, "faves" -> %{term: %{favourited_by_user_ids: id}}
+        %{user: %{id: id}}, "upvotes" -> %{term: %{upvoter_ids: id}}
+        %{user: %{id: id}}, "downvotes" -> %{term: %{downvoter_ids: id}}
+        %{user: _u}, "watched" ->
+          %{query: %{match_all: %{}}} # todo
+      end
+    },
+    aliases: %{
+      "faved_by" =>        "favourited_by_users",
+      "upvoted_by" =>      "upvoters",
+      "downvoted_by" =>    "downvoters",
+      "faved_by_id" =>     "favourited_by_user_ids",
+      "upvoted_by_id" =>   "upvoter_ids",
+      "downvoted_by_id" => "downvoter_ids",
+      "hidden_by" =>       "hidden_by_users",
+      "hidden_by_id" =>    "hidden_by_user_ids",
+      "deleted" =>         "hidden_from_users"
+    },
+    default: "namespaced_tags.name"
 end
diff --git a/lib/philomena/search/helpers.ex b/lib/philomena/search/helpers.ex
index 96b2abc3..58b4af9e 100644
--- a/lib/philomena/search/helpers.ex
+++ b/lib/philomena/search/helpers.ex
@@ -125,4 +125,22 @@ defmodule Philomena.Search.Helpers do
   def full_choice(combinator, choices) do
     choice(combinator, choices)
   end
+
+  def contains_wildcard?(value) do
+    String.match?(value, ~r/(?<!\\)(?:\\\\)*[\*\?]/)
+  end
+
+  def unescape_wildcard(value) do
+    # '*' and '?' are wildcard characters in the right context;
+    # don't unescape them.
+    Regex.replace(~r/(?<!\\)(?:\\)*([^\\\*\?])/, value, "\\1")
+  end
+
+  def unescape_regular(value) do
+    Regex.replace(~r/(?<!\\)(?:\\)*(.)/, value, "\\1")
+  end
+
+  def process_term(term) do
+    term |> String.trim() |> String.downcase()
+  end
 end
diff --git a/lib/philomena/search/lexer.ex b/lib/philomena/search/lexer.ex
index ebe7a90c..83b21fe7 100644
--- a/lib/philomena/search/lexer.ex
+++ b/lib/philomena/search/lexer.ex
@@ -1,5 +1,5 @@
 defmodule Philomena.Search.Lexer do
-  defmacro __using__(opts) do
+  defmacro deflexer(name, opts) do
     literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__)
     ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
     bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
@@ -8,9 +8,8 @@ defmodule Philomena.Search.Lexer do
     int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
     ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
     custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
-    lexer_name = :"#{Keyword.fetch!(opts, :name)}_lexer"
 
-    quote do
+    quote location: :keep do
       import NimbleParsec
       import Philomena.Search.Helpers
 
@@ -342,29 +341,30 @@ defmodule Philomena.Search.Lexer do
       quoted_numeric = ignore(quot) |> concat(numeric) |> ignore(quot)
 
       stop_words =
-        choice([
+        repeat(space)
+        |> choice([
           string("\\") |> eos(),
           string(","),
-          concat(space, l_and),
-          concat(space, l_or),
+          l_and,
+          l_or,
           rparen,
           fuzz,
           boost
         ])
 
       defcombinatorp(
-        :text,
+        unquote(:"#{name}_text"),
         lookahead_not(stop_words)
         |> choice([
           string("\\") |> utf8_char([]),
-          string("(") |> parsec(:text) |> string(")"),
+          string("(") |> parsec(unquote(:"#{name}_text")) |> string(")"),
           utf8_char([])
         ])
         |> times(min: 1)
       )
 
       text =
-        parsec(:text)
+        parsec(unquote(:"#{name}_text"))
         |> reduce({List, :to_string, []})
         |> unwrap_and_tag(:text)
 
@@ -462,7 +462,7 @@ defmodule Philomena.Search.Lexer do
         times(outer, min: 1)
         |> eos()
 
-      defparsec(unquote(lexer_name), search)
+      defparsec(unquote(:"#{name}_lexer"), search)
     end
   end
 end
diff --git a/lib/philomena/search/parser.ex b/lib/philomena/search/parser.ex
index 9dbfe024..57e7eb3d 100644
--- a/lib/philomena/search/parser.ex
+++ b/lib/philomena/search/parser.ex
@@ -1,49 +1,50 @@
 defmodule Philomena.Search.Parser do
-  defmacro __using__(opts) do
-    lexer_name = :"#{Keyword.fetch!(opts, :name)}_lexer"
-    parser_name = :"#{Keyword.fetch!(opts, :name)}_parser"
+  defmacro defparser(name, opts) do
     field_transforms = Keyword.get(opts, :transforms, %{})
     field_aliases = Keyword.get(opts, :aliases, %{})
     default_field = Keyword.fetch!(opts, :default)
 
     quote location: :keep do
-      use Philomena.Search.Lexer, unquote(opts)
+      import Philomena.Search.Lexer
+      import Philomena.Search.Helpers
 
-      def unquote(parser_name)(ctx, input) do
-        with {:ok, tree, _1, _2, _3, _4} <- unquote(lexer_name)(input) do
-          parse(ctx, tree)
+      deflexer unquote(name), unquote(opts)
+
+      def unquote(:"#{name}_parser")(ctx, input) do
+        with {:ok, tree, _1, _2, _3, _4} <- unquote(:"#{name}_lexer")(input) do
+          unquote(:"#{name}_parse")(ctx, tree)
         else
           {:error, msg, _1, _2, _3, _4} ->
             {:error, msg}
         end
       end
 
-      defp parse(ctx, tokens) do
-        {tree, []} = search_top(ctx, tokens)
+      defp unquote(:"#{name}_parse")(ctx, tokens) do
+        {tree, []} = unquote(:"#{name}_top")(ctx, tokens)
 
         {:ok, tree}
-      rescue
-        e in ArgumentError ->
-          {:error, e.message}
+      #rescue
+      #  e in ArgumentError ->
+      #    {:error, e.message}
 
-        _ ->
-          {:error, "Parsing error."}
+      #  _ ->
+      #    {:error, "Parsing error."}
       end
 
       #
       # Predictive LL(k) parser for search grammar
       #
 
-      defp search_top(ctx, tokens), do: search_or(ctx, tokens)
+      defp unquote(:"#{name}_top")(ctx, tokens), do: unquote(:"#{name}_or")(ctx, tokens)
 
       #
       # Boolean OR
       #
 
-      defp search_or(ctx, tokens) do
-        case search_and(ctx, tokens) do
+      defp unquote(:"#{name}_or")(ctx, tokens) do
+        case unquote(:"#{name}_and")(ctx, tokens) do
           {left, [{:or, _} | r_tokens]} ->
-            {right, rest} = search_or(ctx, r_tokens)
+            {right, rest} = unquote(:"#{name}_or")(ctx, r_tokens)
             {%{bool: %{should: [left, right]}}, rest}
 
           {child, rest} ->
@@ -55,10 +56,10 @@ defmodule Philomena.Search.Parser do
       # Boolean AND
       #
 
-      defp search_and(ctx, tokens) do
-        case search_boost(ctx, tokens) do
+      defp unquote(:"#{name}_and")(ctx, tokens) do
+        case unquote(:"#{name}_boost")(ctx, tokens) do
           {left, [{:and, _} | r_tokens]} ->
-            {right, rest} = search_and(ctx, r_tokens)
+            {right, rest} = unquote(:"#{name}_and")(ctx, r_tokens)
             {%{bool: %{must: [left, right]}}, rest}
 
           {child, rest} ->
@@ -70,8 +71,8 @@ defmodule Philomena.Search.Parser do
       # Subquery score boosting
       #
 
-      defp search_boost(ctx, tokens) do
-        case search_not(ctx, tokens) do
+      defp unquote(:"#{name}_boost")(ctx, tokens) do
+        case unquote(:"#{name}_not")(ctx, tokens) do
           {child, [{:boost, _}, {:number, value} | r_tokens]} ->
             {%{function_score: %{query: child, boost_factor: value}}, r_tokens}
 
@@ -84,20 +85,20 @@ defmodule Philomena.Search.Parser do
       # Boolean NOT
       #
 
-      defp search_not(ctx, [{:not, _} | r_tokens]) do
-        {child, rest} = search_top(ctx, r_tokens)
+      defp unquote(:"#{name}_not")(ctx, [{:not, _} | r_tokens]) do
+        {child, rest} = unquote(:"#{name}_not")(ctx, r_tokens)
 
         {%{bool: %{must_not: child}}, rest}
       end
 
-      defp search_not(ctx, tokens), do: search_group(ctx, tokens)
+      defp unquote(:"#{name}_not")(ctx, tokens), do: unquote(:"#{name}_group")(ctx, tokens)
 
       #
       # Logical grouping
       #
 
-      defp search_group(ctx, [{:lparen, _} | rest]) do
-        case search_top(ctx, rest) do
+      defp unquote(:"#{name}_group")(ctx, [{:lparen, _} | rest]) do
+        case unquote(:"#{name}_top")(ctx, rest) do
           {child, [{:rparen, _} | r_tokens]} ->
             {child, r_tokens}
 
@@ -106,20 +107,20 @@ defmodule Philomena.Search.Parser do
         end
       end
 
-      defp search_group(_ctx, [{:rparen, _} | _rest]),
+      defp unquote(:"#{name}_group")(_ctx, [{:rparen, _} | _rest]),
         do: raise(ArgumentError, "Imbalanced parentheses.")
 
-      defp search_group(ctx, tokens), do: search_fuzz(ctx, tokens)
+      defp unquote(:"#{name}_group")(ctx, tokens), do: unquote(:"#{name}_fuzz")(ctx, tokens)
 
       #
       # Terms and term fuzzing
       #
 
-      defp search_fuzz(ctx, tokens) do
+      defp unquote(:"#{name}_fuzz")(ctx, tokens) do
         case tokens do
           [{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
             {%{
-               range: %{try_alias(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}
+               range: %{unquote(:"#{name}_alias")(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}
              }, r_tokens}
 
           [
@@ -130,7 +131,7 @@ defmodule Philomena.Search.Parser do
             {:number, fuzz} | r_tokens
           ] ->
             {%{
-               range: %{try_alias(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}
+               range: %{unquote(:"#{name}_alias")(field) => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}
              }, r_tokens}
 
           [
@@ -140,7 +141,7 @@ defmodule Philomena.Search.Parser do
             {:fuzz, _},
             {:number, fuzz} | r_tokens
           ] ->
-            {%{fuzzy: %{try_alias(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
+            {%{fuzzy: %{unquote(:"#{name}_alias")(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
 
           [
             {:ngram_field, field},
@@ -149,13 +150,13 @@ defmodule Philomena.Search.Parser do
             {:fuzz, _},
             {:number, fuzz} | r_tokens
           ] ->
-            {%{fuzzy: %{try_alias(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
+            {%{fuzzy: %{unquote(:"#{name}_alias")(field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
 
           [{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
             {%{fuzzy: %{unquote(default_field) => %{value: value, fuzziness: fuzz}}}, r_tokens}
 
           _ ->
-            search_range(ctx, tokens)
+            unquote(:"#{name}_range")(ctx, tokens)
         end
       end
 
@@ -163,60 +164,60 @@ defmodule Philomena.Search.Parser do
       # Range queries
       #
 
-      defp search_range(ctx, tokens) do
+      defp unquote(:"#{name}_range")(ctx, tokens) do
         case tokens do
           [{:int_field, field}, {range, _}, {:int, value} | r_tokens]
           when range in [:gt, :gte, :lt, :lte] ->
-            {%{range: %{try_alias(field) => %{range => value}}}, r_tokens}
+            {%{range: %{unquote(:"#{name}_alias")(field) => %{range => value}}}, r_tokens}
 
           [{:float_field, field}, {range, _}, {:number, value} | r_tokens]
           when range in [:gt, :gte, :lt, :lte] ->
-            {%{range: %{try_alias(field) => %{range => value}}}, r_tokens}
+            {%{range: %{unquote(:"#{name}_alias")(field) => %{range => value}}}, r_tokens}
 
           [{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens]
           when range in [:gt, :gte, :lt, :lte] ->
-            {%{range: %{try_alias(field) => %{range => lower}}}, r_tokens}
+            {%{range: %{unquote(:"#{name}_alias")(field) => %{range => lower}}}, r_tokens}
 
           _ ->
-            search_custom(ctx, tokens)
+            unquote(:"#{name}_custom")(ctx, tokens)
         end
       end
 
-      defp search_custom(ctx, tokens) do
+      defp unquote(:"#{name}_custom")(ctx, tokens) do
         case tokens do
           [{:custom_field, field}, {:text, value} | r_tokens] ->
             {unquote(field_transforms)[field].(ctx, value), r_tokens}
 
           _ ->
-            search_term(ctx, tokens)
+            unquote(:"#{name}_term")(ctx, tokens)
         end
       end
 
-      defp search_term(_ctx, tokens) do
+      defp unquote(:"#{name}_term")(_ctx, tokens) do
         case tokens do
           [{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] ->
-            {%{range: %{try_alias(field) => %{gte: lower, lte: higher}}}, r_tokens}
+            {%{range: %{unquote(:"#{name}_alias")(field) => %{gte: lower, lte: higher}}}, r_tokens}
 
           [{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
             value = process_term(value)
 
             if contains_wildcard?(value) do
-              {%{wildcard: %{try_alias(field) => unescape_wildcard(value)}}, r_tokens}
+              {%{wildcard: %{unquote(:"#{name}_alias")(field) => unescape_wildcard(value)}}, r_tokens}
             else
-              {%{match: %{try_alias(field) => unescape_regular(value)}}, r_tokens}
+              {%{match: %{unquote(:"#{name}_alias")(field) => unescape_regular(value)}}, r_tokens}
             end
 
           [{:literal_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
             value = process_term(value)
 
             if contains_wildcard?(value) do
-              {%{wildcard: %{try_alias(field) => unescape_wildcard(value)}}, r_tokens}
+              {%{wildcard: %{unquote(:"#{name}_alias")(field) => unescape_wildcard(value)}}, r_tokens}
             else
-              {%{term: %{try_alias(field) => unescape_regular(value)}}, r_tokens}
+              {%{term: %{unquote(:"#{name}_alias")(field) => unescape_regular(value)}}, r_tokens}
             end
 
           [{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] ->
-            {%{term: %{try_alias(field) => value}}, r_tokens}
+            {%{term: %{unquote(:"#{name}_alias")(field) => value}}, r_tokens}
 
           [{:default, [text: value]} | r_tokens] ->
             value = process_term(value)
@@ -232,25 +233,7 @@ defmodule Philomena.Search.Parser do
         end
       end
 
-      defp contains_wildcard?(value) do
-        String.match?(value, ~r/(?<!\\)(?:\\\\)*[\*\?]/)
-      end
-
-      defp unescape_wildcard(value) do
-        # '*' and '?' are wildcard characters in the right context;
-        # don't unescape them.
-        Regex.replace(~r/(?<!\\)(?:\\)*([^\\\*\?])/, value, "\\1")
-      end
-
-      defp unescape_regular(value) do
-        Regex.replace(~r/(?<!\\)(?:\\)*(.)/, value, "\\1")
-      end
-
-      defp process_term(term) do
-        term |> String.trim() |> String.downcase()
-      end
-
-      defp try_alias(field) do
+      defp unquote(:"#{name}_alias")(field) do
         unquote(field_aliases)[field] || field
       end
     end