fixes

2025-04-14 07:23:59 +02:00 · 2019-11-02 16:31:55 -04:00 · 2019-11-02 16:31:55 -04:00 · c46cceab03
commit c46cceab03
parent 756599df56
11 changed files with 203 additions and 193 deletions
--- a/lib/philomena/images/query.ex
+++ b/lib/philomena/images/query.ex
@ -1,187 +1,162 @@
 defmodule Philomena.Images.Query do
-  import Philomena.Search.Parser
-  import Philomena.Search.String
+  alias Search.Parser
+  alias Philomena.Repo

-  defparser("anonymous",
-    int:
-      ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
-    float: ~W(aspect_ratio wilson_score),
-    date: ~W(created_at updated_at first_seen_at),
-    literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
-    ngram: ~W(description),
-    custom: ~W(gallery_id),
-    transforms: %{
-      "gallery_id" => fn _ctx, value ->
-        %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
+  def gallery_id_transform(_ctx, value),
+    do: {:ok, %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}}
+
+  def user_my_transform(%{user: %{id: id}}, "faves"),
+    do: {:ok, %{term: %{favourited_by_user_ids: id}}}
+
+  def user_my_transform(%{user: %{id: id}}, "upvotes"),
+    do: {:ok, %{term: %{upvoter_ids: id}}}
+
+  def user_my_transform(%{user: %{id: id}}, "downvotes"),
+    do: {:ok, %{term: %{downvoter_ids: id}}}
+
+  def user_my_transform(%{watch: true}, "watched"),
+    do: {:error, "Recursive watchlists are not allowed."}
+
+  def user_my_transform(%{user: user} = ctx, "watched") do
+    ctx = Map.merge(ctx, %{watch: true})
+
+    tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
+
+    {:ok, include_query} =
+      Philomena.Images.Query.parse_user(ctx, user.watched_images_query_str |> Search.String.normalize())
+
+    {:ok, exclude_query} =
+      Philomena.Images.Query.parse_user(
+        ctx,
+        user.watched_images_exclude_str |> Search.String.normalize()
+      )
+
+    should = [tag_include, include_query]
+    must_not = [exclude_query]
+
+    must_not =
+      if user.no_spoilered_in_watched do
+        user = user |> Repo.preload(:current_filter)
+
+        tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
+
+        {:ok, spoiler_query} =
+          Philomena.Images.Query.parse_user(
+            ctx,
+            user.current_filter.spoilered_complex_str |> Search.String.normalize()
+          )
+
+        [tag_exclude, spoiler_query | must_not]
+      else
+        must_not
      end
-    },
-    aliases: %{
-      "faved_by" => "favourited_by_users",
-      "faved_by_id" => "favourited_by_user_ids"
-    },
-    default: "namespaced_tags.name"
+
+    %{bool: %{should: should, must_not: must_not}}
+  end
+
+  def user_my_transform(_ctx, _value),
+    do: {:error, "Unknown `my' value."}
+
+
+  int_fields         = ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count)
+  float_fields       = ~W(aspect_ratio wilson_score)
+  date_fields        = ~W(created_at updated_at first_seen_at)
+  literal_fields     = ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format)
+  ngram_fields       = ~W(description)
+  custom_fields      = ~W(gallery_id)
+  default_field      = "namespaced_tags.name"
+  transforms         = %{
+    "gallery_id" => &Philomena.Images.Query.gallery_id_transform/2
+  }
+  aliases            = %{
+    "faved_by" => "favourited_by_users",
+    "faved_by_id" => "favourited_by_user_ids"
+  }
+
+
+  user_custom        = custom_fields ++ ~W(my)
+  user_transforms    = Map.merge(transforms, %{
+    "my" => &Philomena.Images.Query.user_my_transform/2
+  })
+
+
+  mod_int_fields     = int_fields ++ ~W(upvoted_by_id downvoted_by_id true_uploader_id hidden_by_id deleted_by_user_id)
+  mod_literal_fields = literal_fields ++ ~W(fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user)
+  mod_ip_fields      = ~W(ip)
+  mod_bool_fields    = ~W(deleted)
+  mod_aliases        =  Map.merge(aliases, %{
+    "upvoted_by" => "upvoters",
+    "downvoted_by" => "downvoters",
+    "upvoted_by_id" => "upvoter_ids",
+    "downvoted_by_id" => "downvoter_ids",
+    "hidden_by" => "hidden_by_users",
+    "hidden_by_id" => "hidden_by_user_ids",
+    "deleted" => "hidden_from_users"
+  })
+
+
+  @anonymous_parser Parser.parser(
+    int_fields: int_fields,
+    float_fields: float_fields,
+    date_fields: date_fields,
+    literal_fields: literal_fields,
+    ngram_fields: ngram_fields,
+    custom_fields: custom_fields,
+    transforms: transforms,
+    aliases: aliases,
+    default_field: default_field
  )

-  defparser("user",
-    int:
-      ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
-    float: ~W(aspect_ratio wilson_score),
-    date: ~W(created_at updated_at first_seen_at),
-    literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
-    ngram: ~W(description),
-    custom: ~W(gallery_id my),
-    transforms: %{
-      "gallery_id" => fn _ctx, value ->
-        %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
-      end,
-      "my" => fn
-        %{user: %{id: id}}, "faves" ->
-          %{term: %{favourited_by_user_ids: id}}
-
-        %{user: %{id: id}}, "upvotes" ->
-          %{term: %{upvoter_ids: id}}
-
-        %{user: %{id: id}}, "downvotes" ->
-          %{term: %{downvoter_ids: id}}
-
-        %{watch: true}, "watched" ->
-          raise ArgumentError, "Recursive watchlists are not allowed."
-
-        %{user: user} = ctx, "watched" ->
-          ctx = Map.merge(ctx, %{watch: true})
-
-          tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
-
-          {:ok, include_query} =
-            Philomena.Images.Query.user_parser(ctx, user.watched_images_query_str |> normalize())
-
-          {:ok, exclude_query} =
-            Philomena.Images.Query.user_parser(
-              ctx,
-              user.watched_images_exclude_str |> normalize()
-            )
-
-          should = [tag_include, include_query]
-          must_not = [exclude_query]
-
-          must_not =
-            if user.no_spoilered_in_watched do
-              user = user |> Repo.preload(:current_filter)
-
-              tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
-
-              {:ok, spoiler_query} =
-                Philomena.Images.Query.user_parser(
-                  ctx,
-                  user.current_filter.spoilered_complex_str |> normalize()
-                )
-
-              [tag_exclude, spoiler_query | must_not]
-            else
-              must_not
-            end
-
-          %{bool: %{should: should, must_not: must_not}}
-      end
-    },
-    aliases: %{
-      "faved_by" => "favourited_by_users",
-      "faved_by_id" => "favourited_by_user_ids"
-    },
-    default: "namespaced_tags.name"
+  @user_parser Parser.parser(
+    int_fields: int_fields,
+    float_fields: float_fields,
+    date_fields: date_fields,
+    literal_fields: literal_fields,
+    ngram_fields: ngram_fields,
+    custom_fields: user_custom,
+    transforms: user_transforms,
+    aliases: aliases,
+    default_field: default_field
  )

-  defparser("moderator",
-    int:
-      ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id upvoted_by_id downvoted_by_id tag_count true_uploader_id hidden_by_id deleted_by_user-id),
-    float: ~W(aspect_ratio wilson_score),
-    date: ~W(created_at updated_at first_seen_at),
-    literal:
-      ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user),
-    ngram: ~W(description deletion_reason),
-    ip: ~W(ip),
-    bool: ~W(deleted),
-    custom: ~W(gallery_id my),
-    transforms: %{
-      "gallery_id" => fn _ctx, value ->
-        %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
-      end,
-      "my" => fn
-        %{user: %{id: id}}, "faves" ->
-          %{term: %{favourited_by_user_ids: id}}
-
-        %{user: %{id: id}}, "upvotes" ->
-          %{term: %{upvoter_ids: id}}
-
-        %{user: %{id: id}}, "downvotes" ->
-          %{term: %{downvoter_ids: id}}
-
-        %{watch: true}, "watched" ->
-          raise ArgumentError, "Recursive watchlists are not allowed."
-
-        %{user: user} = ctx, "watched" ->
-          ctx = Map.merge(ctx, %{watch: true})
-
-          tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
-
-          {:ok, include_query} =
-            Philomena.Images.Query.moderator_parser(ctx, user.watched_images_query_str |> normalize())
-
-          {:ok, exclude_query} =
-            Philomena.Images.Query.moderator_parser(
-              ctx,
-              user.watched_images_exclude_str |> normalize()
-            )
-
-          should = [tag_include, include_query]
-          must_not = [exclude_query]
-
-          must_not =
-            if user.no_spoilered_in_watched do
-              user = user |> Repo.preload(:current_filter)
-
-              tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
-
-              {:ok, spoiler_query} =
-                Philomena.Images.Query.moderator_parser(
-                  ctx,
-                  user.current_filter.spoilered_complex_str |> normalize()
-                )
-
-              [tag_exclude, spoiler_query | must_not]
-            else
-              must_not
-            end
-
-          %{bool: %{should: should, must_not: must_not}}
-      end
-    },
-    aliases: %{
-      "faved_by" => "favourited_by_users",
-      "upvoted_by" => "upvoters",
-      "downvoted_by" => "downvoters",
-      "faved_by_id" => "favourited_by_user_ids",
-      "upvoted_by_id" => "upvoter_ids",
-      "downvoted_by_id" => "downvoter_ids",
-      "hidden_by" => "hidden_by_users",
-      "hidden_by_id" => "hidden_by_user_ids",
-      "deleted" => "hidden_from_users"
-    },
-    default: "namespaced_tags.name"
+  @moderator_parser Parser.parser(
+    int_fields: mod_int_fields,
+    float_fields: float_fields,
+    date_fields: date_fields,
+    literal_fields: mod_literal_fields,
+    ip_fields: mod_ip_fields,
+    ngram_fields: ngram_fields,
+    bool_fields: mod_bool_fields,
+    custom_fields: user_custom,
+    transforms: user_transforms,
+    aliases: mod_aliases,
+    default_field: default_field
  )

+  def parse_anonymous(context, query_string) do
+    Parser.parse(@anonymous_parser, query_string, context)
+  end
+
+  def parse_user(context, query_string) do
+    Parser.parse(@user_parser, query_string, context)
+  end
+
+  def parse_moderator(context, query_string) do
+    Parser.parse(@moderator_parser, query_string, context)
+  end
+
  def compile(user, query_string, watch \\ false) do
    query_string = query_string || ""

    case user do
      nil ->
-        anonymous_parser(%{user: nil, watch: watch}, query_string)
+        parse_anonymous(%{user: nil, watch: watch}, query_string)

      %{role: role} when role in ~W(user assistant) ->
-        user_parser(%{user: user, watch: watch}, query_string)
+        parse_user(%{user: user, watch: watch}, query_string)

      %{role: role} when role in ~W(moderator admin) ->
-        moderator_parser(%{user: user, watch: watch}, query_string)
+        parse_moderator(%{user: user, watch: watch}, query_string)

      _ ->
        raise ArgumentError, "Unknown user role."
--- a/lib/search/bool_parser.ex
+++ b/lib/search/bool_parser.ex
@ -8,6 +8,7 @@ defmodule Search.BoolParser do
    ])
    |> unwrap_and_tag(:bool)
    |> eos()
+    |> label("a boolean, like `true' or `false'")

  defparsec :parse, bool
 end
--- a/lib/search/date_parser.ex
+++ b/lib/search/date_parser.ex
@ -182,6 +182,7 @@ defmodule Search.DateParser do
      relative_date
    ])
    |> eos()
+    |> label("a RFC3339 datetime fragment, like `2019-01-01', or relative date, like `3 days ago'")

  defparsec :parse, date
 end
--- a/lib/search/float_parser.ex
+++ b/lib/search/float_parser.ex
@ -25,6 +25,7 @@ defmodule Search.FloatParser do
      float |> unwrap_and_tag(:float)
    ])
    |> eos()
+    |> label("a real number, like `2.7182818' or `-10'")

  defparsec :parse, float_parser
 end
--- a/lib/search/int_parser.ex
+++ b/lib/search/int_parser.ex
@ -18,6 +18,7 @@ defmodule Search.IntParser do
      int |> unwrap_and_tag(:int)
    ])
    |> eos()
+    |> label("an integer, like `3' or `-10'")

  defparsec :parse, int_parser
 end
--- a/lib/search/ip_parser.ex
+++ b/lib/search/ip_parser.ex
@ -130,9 +130,9 @@ defmodule Search.IpParser do
      ipv6_address |> optional(ipv6_prefix)
    ])
    |> reduce({Enum, :join, []})
-    |> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
    |> unwrap_and_tag(:ip)
    |> eos()
+    |> label("a valid IPv4 or IPv6 address and optional CIDR prefix")

  defparsec :parse, ip
 end
--- a/lib/search/lexer.ex
+++ b/lib/search/lexer.ex
@ -29,7 +29,6 @@ defmodule Search.Lexer do
    |> ignore()

  quot = string("\"")
-  backslash = string("\\")

  boost =
    ignore(string("^"))
@ -39,7 +38,6 @@ defmodule Search.Lexer do
  stop_words =
    repeat(space)
    |> choice([
-      backslash |> eos(),
      l_and,
      l_or,
      rparen,
@ -60,20 +58,21 @@ defmodule Search.Lexer do
  text =
    parsec(:dirty_text)
    |> reduce({List, :to_string, []})
-    |> unwrap_and_tag(:text)
+    |> unwrap_and_tag(:term)
+    |> label("a term, like `safe'")

  quoted_text =
    ignore(quot)
-    |> choice([
+    |> repeat(choice([
      ignore(string("\\")) |> string("\""),
      ignore(string("\\")) |> string("\\"),
      string("\\") |> utf8_char([]),
      utf8_char(not: ?")
-    ])
-    |> repeat()
+    ]))
    |> ignore(quot)
    |> reduce({List, :to_string, []})
-    |> unwrap_and_tag(:text)
+    |> unwrap_and_tag(:term)
+    |> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|)

  term =
    choice([
--- a/lib/search/literal_parser.ex
+++ b/lib/search/literal_parser.ex
@ -1,6 +1,8 @@
 defmodule Search.LiteralParser do
  import NimbleParsec

+  defp trim([term]), do: String.trim(term)
+
  edit_distance =
    ignore(string("~"))
    |> integer(min: 1)
@ -22,6 +24,7 @@ defmodule Search.LiteralParser do
    ])
    |> repeat()
    |> reduce({List, :to_string, []})
+    |> reduce(:trim)
    |> unwrap_and_tag(:literal)
    |> optional(edit_distance)
    |> eos()
@ -37,6 +40,7 @@ defmodule Search.LiteralParser do
    ])
    |> repeat()
    |> reduce({List, :to_string, []})
+    |> reduce(:trim)
    |> unwrap_and_tag(:wildcard)
    |> ignore(optional(edit_distance))
    |> eos()
--- a/lib/search/parser.ex
+++ b/lib/search/parser.ex
@ -30,14 +30,14 @@ defmodule Search.Parser do
  def parser(options) do
    parser = struct(Parser, options)
    fields =
-      Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++
-      Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++
-      Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++
-      Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++
-      Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++
-      Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++
-      Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++
-      Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end)
+      Enum.map(parser.bool_fields, fn f -> {f, BoolParser} end) ++
+      Enum.map(parser.date_fields, fn f -> {f, DateParser} end) ++
+      Enum.map(parser.float_fields, fn f -> {f, FloatParser} end) ++
+      Enum.map(parser.int_fields, fn f -> {f, IntParser} end) ++
+      Enum.map(parser.ip_fields, fn f -> {f, IpParser} end) ++
+      Enum.map(parser.literal_fields, fn f -> {f, LiteralParser} end) ++
+      Enum.map(parser.ngram_fields, fn f -> {f, NgramParser} end) ++
+      Enum.map(parser.custom_fields, fn f -> {f, :custom_field} end)

    %{parser | __fields__: Map.new(fields)}
  end
@ -50,11 +50,26 @@ defmodule Search.Parser do
    do
      {:ok, tree}
    else
+      {:ok, {_tree, tokens}} ->
+        {:error, "Junk at end of expression: " <> debug_tokens(tokens)}
+
+      {:error, msg, start_pos, _1, _2, _3} ->
+        {:error, msg <> ", starting at: " <> start_pos}
+
+      {:error, msg} ->
+        {:error, msg}
+
      _ ->
        {:error, "Search parsing error."}
    end
  end

+  defp debug_tokens(tokens) do
+    tokens
+    |> Enum.map(fn {_k, v} -> v end)
+    |> Enum.join("")
+  end
+
  #
  # Predictive LL(1) RD parser for search grammar
  #
@ -62,22 +77,22 @@ defmodule Search.Parser do
  defp search_top(parser, tokens), do: search_or(parser, tokens)

  defp search_or(parser, tokens) do
-    case search_and(parser, tokens) do
-      {:ok, {left, [{:or, _} | r_tokens]}} ->
-        {right, rest} = search_or(parser, r_tokens)
-        {:ok, {%{bool: %{should: [left, right]}}, rest}}
-
+    with {:ok, {left, [{:or, _} | r_tokens]}} <- search_and(parser, tokens),
+         {:ok, {right, rest}} <- search_or(parser, r_tokens)
+    do
+      {:ok, {%{bool: %{should: [left, right]}}, rest}}
+    else
      value ->
        value
    end
  end

  defp search_and(parser, tokens) do
-    case search_boost(parser, tokens) do
-      {:ok, {left, [{:and, _} | r_tokens]}} ->
-        {right, rest} = search_or(parser, r_tokens)
-        {:ok, {%{bool: %{must: [left, right]}}, rest}}
-
+    with {:ok, {left, [{:and, _} | r_tokens]}} <- search_boost(parser, tokens),
+         {:ok, {right, rest}} <- search_and(parser, r_tokens)
+    do
+      {:ok, {%{bool: %{must: [left, right]}}, rest}}
+    else
      value ->
        value
    end
@ -136,6 +151,9 @@ defmodule Search.Parser do
    end
  end

+  defp search_field(_parser, _tokens), do:
+    {:error, "Expected a term."}
+
  #
  # Predictive LL(k) RD parser for search terms in parent grammar
  #
--- a/lib/search/string.ex
+++ b/lib/search/string.ex
@ -0,0 +1,9 @@
+defmodule Search.String do
+  def normalize(str) do
+    str
+    |> String.replace("\r", "")
+    |> String.split("\n", trim: true)
+    |> Enum.map(fn s -> "(#{s})" end)
+    |> Enum.join(" || ")
+  end
+end
--- a/lib/search/term_range_parser.ex
+++ b/lib/search/term_range_parser.ex
@ -1,11 +1,12 @@
 defmodule Search.TermRangeParser do
+  alias Search.LiteralParser

  # Unfortunately, we can't use NimbleParsec here. It requires
  # the compiler, and we're not in a macro environment.

  def parse(input, fields, default_field) do
    tokens =
-      Enum.find_value(fields, fn {p, f} ->
+      Enum.find_value(fields, fn {f, p} ->
        field(input, f, p)
      end)