From c46cceab03901b24c9a317d5e42cbb09838680db Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Sat, 2 Nov 2019 16:31:55 -0400 Subject: [PATCH] fixes --- lib/philomena/images/query.ex | 307 +++++++++++++++----------------- lib/search/bool_parser.ex | 1 + lib/search/date_parser.ex | 1 + lib/search/float_parser.ex | 1 + lib/search/int_parser.ex | 1 + lib/search/ip_parser.ex | 2 +- lib/search/lexer.ex | 13 +- lib/search/literal_parser.ex | 4 + lib/search/parser.ex | 54 ++++-- lib/search/string.ex | 9 + lib/search/term_range_parser.ex | 3 +- 11 files changed, 203 insertions(+), 193 deletions(-) create mode 100644 lib/search/string.ex diff --git a/lib/philomena/images/query.ex b/lib/philomena/images/query.ex index 28a4db54..71749b58 100644 --- a/lib/philomena/images/query.ex +++ b/lib/philomena/images/query.ex @@ -1,187 +1,162 @@ defmodule Philomena.Images.Query do - import Philomena.Search.Parser - import Philomena.Search.String + alias Search.Parser + alias Philomena.Repo - defparser("anonymous", - int: - ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), - float: ~W(aspect_ratio wilson_score), - date: ~W(created_at updated_at first_seen_at), - literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format), - ngram: ~W(description), - custom: ~W(gallery_id), - transforms: %{ - "gallery_id" => fn _ctx, value -> - %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}} + def gallery_id_transform(_ctx, value), + do: {:ok, %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}} + + def user_my_transform(%{user: %{id: id}}, "faves"), + do: {:ok, %{term: %{favourited_by_user_ids: id}}} + + def user_my_transform(%{user: %{id: id}}, "upvotes"), + do: {:ok, %{term: %{upvoter_ids: id}}} + + def user_my_transform(%{user: %{id: id}}, "downvotes"), + do: {:ok, %{term: %{downvoter_ids: id}}} + + def user_my_transform(%{watch: true}, "watched"), + do: {:error, "Recursive watchlists are not allowed."} + + def user_my_transform(%{user: user} = ctx, "watched") do + ctx = Map.merge(ctx, %{watch: true}) + + tag_include = %{terms: %{tag_ids: user.watched_tag_ids}} + + {:ok, include_query} = + Philomena.Images.Query.parse_user(ctx, user.watched_images_query_str |> Search.String.normalize()) + + {:ok, exclude_query} = + Philomena.Images.Query.parse_user( + ctx, + user.watched_images_exclude_str |> Search.String.normalize() + ) + + should = [tag_include, include_query] + must_not = [exclude_query] + + must_not = + if user.no_spoilered_in_watched do + user = user |> Repo.preload(:current_filter) + + tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}} + + {:ok, spoiler_query} = + Philomena.Images.Query.parse_user( + ctx, + user.current_filter.spoilered_complex_str |> Search.String.normalize() + ) + + [tag_exclude, spoiler_query | must_not] + else + must_not end - }, - aliases: %{ - "faved_by" => "favourited_by_users", - "faved_by_id" => "favourited_by_user_ids" - }, - default: "namespaced_tags.name" + + %{bool: %{should: should, must_not: must_not}} + end + + def user_my_transform(_ctx, _value), + do: {:error, "Unknown `my' value."} + + + int_fields = ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count) + float_fields = ~W(aspect_ratio wilson_score) + date_fields = ~W(created_at updated_at first_seen_at) + literal_fields = ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format) + ngram_fields = ~W(description) + custom_fields = ~W(gallery_id) + default_field = "namespaced_tags.name" + transforms = %{ + "gallery_id" => &Philomena.Images.Query.gallery_id_transform/2 + } + aliases = %{ + "faved_by" => "favourited_by_users", + "faved_by_id" => "favourited_by_user_ids" + } + + + user_custom = custom_fields ++ ~W(my) + user_transforms = Map.merge(transforms, %{ + "my" => &Philomena.Images.Query.user_my_transform/2 + }) + + + mod_int_fields = int_fields ++ ~W(upvoted_by_id downvoted_by_id true_uploader_id hidden_by_id deleted_by_user_id) + mod_literal_fields = literal_fields ++ ~W(fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user) + mod_ip_fields = ~W(ip) + mod_bool_fields = ~W(deleted) + mod_aliases = Map.merge(aliases, %{ + "upvoted_by" => "upvoters", + "downvoted_by" => "downvoters", + "upvoted_by_id" => "upvoter_ids", + "downvoted_by_id" => "downvoter_ids", + "hidden_by" => "hidden_by_users", + "hidden_by_id" => "hidden_by_user_ids", + "deleted" => "hidden_from_users" + }) + + + @anonymous_parser Parser.parser( + int_fields: int_fields, + float_fields: float_fields, + date_fields: date_fields, + literal_fields: literal_fields, + ngram_fields: ngram_fields, + custom_fields: custom_fields, + transforms: transforms, + aliases: aliases, + default_field: default_field ) - defparser("user", - int: - ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), - float: ~W(aspect_ratio wilson_score), - date: ~W(created_at updated_at first_seen_at), - literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format), - ngram: ~W(description), - custom: ~W(gallery_id my), - transforms: %{ - "gallery_id" => fn _ctx, value -> - %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}} - end, - "my" => fn - %{user: %{id: id}}, "faves" -> - %{term: %{favourited_by_user_ids: id}} - - %{user: %{id: id}}, "upvotes" -> - %{term: %{upvoter_ids: id}} - - %{user: %{id: id}}, "downvotes" -> - %{term: %{downvoter_ids: id}} - - %{watch: true}, "watched" -> - raise ArgumentError, "Recursive watchlists are not allowed." - - %{user: user} = ctx, "watched" -> - ctx = Map.merge(ctx, %{watch: true}) - - tag_include = %{terms: %{tag_ids: user.watched_tag_ids}} - - {:ok, include_query} = - Philomena.Images.Query.user_parser(ctx, user.watched_images_query_str |> normalize()) - - {:ok, exclude_query} = - Philomena.Images.Query.user_parser( - ctx, - user.watched_images_exclude_str |> normalize() - ) - - should = [tag_include, include_query] - must_not = [exclude_query] - - must_not = - if user.no_spoilered_in_watched do - user = user |> Repo.preload(:current_filter) - - tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}} - - {:ok, spoiler_query} = - Philomena.Images.Query.user_parser( - ctx, - user.current_filter.spoilered_complex_str |> normalize() - ) - - [tag_exclude, spoiler_query | must_not] - else - must_not - end - - %{bool: %{should: should, must_not: must_not}} - end - }, - aliases: %{ - "faved_by" => "favourited_by_users", - "faved_by_id" => "favourited_by_user_ids" - }, - default: "namespaced_tags.name" + @user_parser Parser.parser( + int_fields: int_fields, + float_fields: float_fields, + date_fields: date_fields, + literal_fields: literal_fields, + ngram_fields: ngram_fields, + custom_fields: user_custom, + transforms: user_transforms, + aliases: aliases, + default_field: default_field ) - defparser("moderator", - int: - ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id upvoted_by_id downvoted_by_id tag_count true_uploader_id hidden_by_id deleted_by_user-id), - float: ~W(aspect_ratio wilson_score), - date: ~W(created_at updated_at first_seen_at), - literal: - ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user), - ngram: ~W(description deletion_reason), - ip: ~W(ip), - bool: ~W(deleted), - custom: ~W(gallery_id my), - transforms: %{ - "gallery_id" => fn _ctx, value -> - %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}} - end, - "my" => fn - %{user: %{id: id}}, "faves" -> - %{term: %{favourited_by_user_ids: id}} - - %{user: %{id: id}}, "upvotes" -> - %{term: %{upvoter_ids: id}} - - %{user: %{id: id}}, "downvotes" -> - %{term: %{downvoter_ids: id}} - - %{watch: true}, "watched" -> - raise ArgumentError, "Recursive watchlists are not allowed." - - %{user: user} = ctx, "watched" -> - ctx = Map.merge(ctx, %{watch: true}) - - tag_include = %{terms: %{tag_ids: user.watched_tag_ids}} - - {:ok, include_query} = - Philomena.Images.Query.moderator_parser(ctx, user.watched_images_query_str |> normalize()) - - {:ok, exclude_query} = - Philomena.Images.Query.moderator_parser( - ctx, - user.watched_images_exclude_str |> normalize() - ) - - should = [tag_include, include_query] - must_not = [exclude_query] - - must_not = - if user.no_spoilered_in_watched do - user = user |> Repo.preload(:current_filter) - - tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}} - - {:ok, spoiler_query} = - Philomena.Images.Query.moderator_parser( - ctx, - user.current_filter.spoilered_complex_str |> normalize() - ) - - [tag_exclude, spoiler_query | must_not] - else - must_not - end - - %{bool: %{should: should, must_not: must_not}} - end - }, - aliases: %{ - "faved_by" => "favourited_by_users", - "upvoted_by" => "upvoters", - "downvoted_by" => "downvoters", - "faved_by_id" => "favourited_by_user_ids", - "upvoted_by_id" => "upvoter_ids", - "downvoted_by_id" => "downvoter_ids", - "hidden_by" => "hidden_by_users", - "hidden_by_id" => "hidden_by_user_ids", - "deleted" => "hidden_from_users" - }, - default: "namespaced_tags.name" + @moderator_parser Parser.parser( + int_fields: mod_int_fields, + float_fields: float_fields, + date_fields: date_fields, + literal_fields: mod_literal_fields, + ip_fields: mod_ip_fields, + ngram_fields: ngram_fields, + bool_fields: mod_bool_fields, + custom_fields: user_custom, + transforms: user_transforms, + aliases: mod_aliases, + default_field: default_field ) + def parse_anonymous(context, query_string) do + Parser.parse(@anonymous_parser, query_string, context) + end + + def parse_user(context, query_string) do + Parser.parse(@user_parser, query_string, context) + end + + def parse_moderator(context, query_string) do + Parser.parse(@moderator_parser, query_string, context) + end + def compile(user, query_string, watch \\ false) do query_string = query_string || "" case user do nil -> - anonymous_parser(%{user: nil, watch: watch}, query_string) + parse_anonymous(%{user: nil, watch: watch}, query_string) %{role: role} when role in ~W(user assistant) -> - user_parser(%{user: user, watch: watch}, query_string) + parse_user(%{user: user, watch: watch}, query_string) %{role: role} when role in ~W(moderator admin) -> - moderator_parser(%{user: user, watch: watch}, query_string) + parse_moderator(%{user: user, watch: watch}, query_string) _ -> raise ArgumentError, "Unknown user role." diff --git a/lib/search/bool_parser.ex b/lib/search/bool_parser.ex index 6daa9257..b9d76204 100644 --- a/lib/search/bool_parser.ex +++ b/lib/search/bool_parser.ex @@ -8,6 +8,7 @@ defmodule Search.BoolParser do ]) |> unwrap_and_tag(:bool) |> eos() + |> label("a boolean, like `true' or `false'") defparsec :parse, bool end \ No newline at end of file diff --git a/lib/search/date_parser.ex b/lib/search/date_parser.ex index 30a7cd22..4693d320 100644 --- a/lib/search/date_parser.ex +++ b/lib/search/date_parser.ex @@ -182,6 +182,7 @@ defmodule Search.DateParser do relative_date ]) |> eos() + |> label("a RFC3339 datetime fragment, like `2019-01-01', or relative date, like `3 days ago'") defparsec :parse, date end \ No newline at end of file diff --git a/lib/search/float_parser.ex b/lib/search/float_parser.ex index 03e518de..51294e8f 100644 --- a/lib/search/float_parser.ex +++ b/lib/search/float_parser.ex @@ -25,6 +25,7 @@ defmodule Search.FloatParser do float |> unwrap_and_tag(:float) ]) |> eos() + |> label("a real number, like `2.7182818' or `-10'") defparsec :parse, float_parser end \ No newline at end of file diff --git a/lib/search/int_parser.ex b/lib/search/int_parser.ex index d616f49b..9dc416ab 100644 --- a/lib/search/int_parser.ex +++ b/lib/search/int_parser.ex @@ -18,6 +18,7 @@ defmodule Search.IntParser do int |> unwrap_and_tag(:int) ]) |> eos() + |> label("an integer, like `3' or `-10'") defparsec :parse, int_parser end \ No newline at end of file diff --git a/lib/search/ip_parser.ex b/lib/search/ip_parser.ex index 3ae7b61e..75e5d0e2 100644 --- a/lib/search/ip_parser.ex +++ b/lib/search/ip_parser.ex @@ -130,9 +130,9 @@ defmodule Search.IpParser do ipv6_address |> optional(ipv6_prefix) ]) |> reduce({Enum, :join, []}) - |> label("a valid IPv4 or IPv6 address and optional CIDR prefix") |> unwrap_and_tag(:ip) |> eos() + |> label("a valid IPv4 or IPv6 address and optional CIDR prefix") defparsec :parse, ip end \ No newline at end of file diff --git a/lib/search/lexer.ex b/lib/search/lexer.ex index 653c3a8a..60a3129e 100644 --- a/lib/search/lexer.ex +++ b/lib/search/lexer.ex @@ -29,7 +29,6 @@ defmodule Search.Lexer do |> ignore() quot = string("\"") - backslash = string("\\") boost = ignore(string("^")) @@ -39,7 +38,6 @@ defmodule Search.Lexer do stop_words = repeat(space) |> choice([ - backslash |> eos(), l_and, l_or, rparen, @@ -60,20 +58,21 @@ defmodule Search.Lexer do text = parsec(:dirty_text) |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:text) + |> unwrap_and_tag(:term) + |> label("a term, like `safe'") quoted_text = ignore(quot) - |> choice([ + |> repeat(choice([ ignore(string("\\")) |> string("\""), ignore(string("\\")) |> string("\\"), string("\\") |> utf8_char([]), utf8_char(not: ?") - ]) - |> repeat() + ])) |> ignore(quot) |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:text) + |> unwrap_and_tag(:term) + |> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|) term = choice([ diff --git a/lib/search/literal_parser.ex b/lib/search/literal_parser.ex index a8eccb61..c5eee148 100644 --- a/lib/search/literal_parser.ex +++ b/lib/search/literal_parser.ex @@ -1,6 +1,8 @@ defmodule Search.LiteralParser do import NimbleParsec + defp trim([term]), do: String.trim(term) + edit_distance = ignore(string("~")) |> integer(min: 1) @@ -22,6 +24,7 @@ defmodule Search.LiteralParser do ]) |> repeat() |> reduce({List, :to_string, []}) + |> reduce(:trim) |> unwrap_and_tag(:literal) |> optional(edit_distance) |> eos() @@ -37,6 +40,7 @@ defmodule Search.LiteralParser do ]) |> repeat() |> reduce({List, :to_string, []}) + |> reduce(:trim) |> unwrap_and_tag(:wildcard) |> ignore(optional(edit_distance)) |> eos() diff --git a/lib/search/parser.ex b/lib/search/parser.ex index 3608a512..5f5422fb 100644 --- a/lib/search/parser.ex +++ b/lib/search/parser.ex @@ -30,14 +30,14 @@ defmodule Search.Parser do def parser(options) do parser = struct(Parser, options) fields = - Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++ - Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++ - Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++ - Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++ - Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++ - Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++ - Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++ - Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end) + Enum.map(parser.bool_fields, fn f -> {f, BoolParser} end) ++ + Enum.map(parser.date_fields, fn f -> {f, DateParser} end) ++ + Enum.map(parser.float_fields, fn f -> {f, FloatParser} end) ++ + Enum.map(parser.int_fields, fn f -> {f, IntParser} end) ++ + Enum.map(parser.ip_fields, fn f -> {f, IpParser} end) ++ + Enum.map(parser.literal_fields, fn f -> {f, LiteralParser} end) ++ + Enum.map(parser.ngram_fields, fn f -> {f, NgramParser} end) ++ + Enum.map(parser.custom_fields, fn f -> {f, :custom_field} end) %{parser | __fields__: Map.new(fields)} end @@ -50,11 +50,26 @@ defmodule Search.Parser do do {:ok, tree} else + {:ok, {_tree, tokens}} -> + {:error, "Junk at end of expression: " <> debug_tokens(tokens)} + + {:error, msg, start_pos, _1, _2, _3} -> + {:error, msg <> ", starting at: " <> start_pos} + + {:error, msg} -> + {:error, msg} + _ -> {:error, "Search parsing error."} end end + defp debug_tokens(tokens) do + tokens + |> Enum.map(fn {_k, v} -> v end) + |> Enum.join("") + end + # # Predictive LL(1) RD parser for search grammar # @@ -62,22 +77,22 @@ defmodule Search.Parser do defp search_top(parser, tokens), do: search_or(parser, tokens) defp search_or(parser, tokens) do - case search_and(parser, tokens) do - {:ok, {left, [{:or, _} | r_tokens]}} -> - {right, rest} = search_or(parser, r_tokens) - {:ok, {%{bool: %{should: [left, right]}}, rest}} - + with {:ok, {left, [{:or, _} | r_tokens]}} <- search_and(parser, tokens), + {:ok, {right, rest}} <- search_or(parser, r_tokens) + do + {:ok, {%{bool: %{should: [left, right]}}, rest}} + else value -> value end end defp search_and(parser, tokens) do - case search_boost(parser, tokens) do - {:ok, {left, [{:and, _} | r_tokens]}} -> - {right, rest} = search_or(parser, r_tokens) - {:ok, {%{bool: %{must: [left, right]}}, rest}} - + with {:ok, {left, [{:and, _} | r_tokens]}} <- search_boost(parser, tokens), + {:ok, {right, rest}} <- search_and(parser, r_tokens) + do + {:ok, {%{bool: %{must: [left, right]}}, rest}} + else value -> value end @@ -136,6 +151,9 @@ defmodule Search.Parser do end end + defp search_field(_parser, _tokens), do: + {:error, "Expected a term."} + # # Predictive LL(k) RD parser for search terms in parent grammar # diff --git a/lib/search/string.ex b/lib/search/string.ex new file mode 100644 index 00000000..adbe35c5 --- /dev/null +++ b/lib/search/string.ex @@ -0,0 +1,9 @@ +defmodule Search.String do + def normalize(str) do + str + |> String.replace("\r", "") + |> String.split("\n", trim: true) + |> Enum.map(fn s -> "(#{s})" end) + |> Enum.join(" || ") + end +end diff --git a/lib/search/term_range_parser.ex b/lib/search/term_range_parser.ex index 2a9f7fb3..e8aa0717 100644 --- a/lib/search/term_range_parser.ex +++ b/lib/search/term_range_parser.ex @@ -1,11 +1,12 @@ defmodule Search.TermRangeParser do + alias Search.LiteralParser # Unfortunately, we can't use NimbleParsec here. It requires # the compiler, and we're not in a macro environment. def parse(input, fields, default_field) do tokens = - Enum.find_value(fields, fn {p, f} -> + Enum.find_value(fields, fn {f, p} -> field(input, f, p) end)