This commit is contained in:
byte[] 2019-11-02 16:31:55 -04:00
parent 756599df56
commit c46cceab03
11 changed files with 203 additions and 193 deletions

View file

@ -1,187 +1,162 @@
defmodule Philomena.Images.Query do defmodule Philomena.Images.Query do
import Philomena.Search.Parser alias Search.Parser
import Philomena.Search.String alias Philomena.Repo
defparser("anonymous", def gallery_id_transform(_ctx, value),
int: do: {:ok, %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}}
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score), def user_my_transform(%{user: %{id: id}}, "faves"),
date: ~W(created_at updated_at first_seen_at), do: {:ok, %{term: %{favourited_by_user_ids: id}}}
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram: ~W(description), def user_my_transform(%{user: %{id: id}}, "upvotes"),
custom: ~W(gallery_id), do: {:ok, %{term: %{upvoter_ids: id}}}
transforms: %{
"gallery_id" => fn _ctx, value -> def user_my_transform(%{user: %{id: id}}, "downvotes"),
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}} do: {:ok, %{term: %{downvoter_ids: id}}}
def user_my_transform(%{watch: true}, "watched"),
do: {:error, "Recursive watchlists are not allowed."}
def user_my_transform(%{user: user} = ctx, "watched") do
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.parse_user(ctx, user.watched_images_query_str |> Search.String.normalize())
{:ok, exclude_query} =
Philomena.Images.Query.parse_user(
ctx,
user.watched_images_exclude_str |> Search.String.normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.parse_user(
ctx,
user.current_filter.spoilered_complex_str |> Search.String.normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end end
},
aliases: %{ %{bool: %{should: should, must_not: must_not}}
"faved_by" => "favourited_by_users", end
"faved_by_id" => "favourited_by_user_ids"
}, def user_my_transform(_ctx, _value),
default: "namespaced_tags.name" do: {:error, "Unknown `my' value."}
int_fields = ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count)
float_fields = ~W(aspect_ratio wilson_score)
date_fields = ~W(created_at updated_at first_seen_at)
literal_fields = ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format)
ngram_fields = ~W(description)
custom_fields = ~W(gallery_id)
default_field = "namespaced_tags.name"
transforms = %{
"gallery_id" => &Philomena.Images.Query.gallery_id_transform/2
}
aliases = %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
}
user_custom = custom_fields ++ ~W(my)
user_transforms = Map.merge(transforms, %{
"my" => &Philomena.Images.Query.user_my_transform/2
})
mod_int_fields = int_fields ++ ~W(upvoted_by_id downvoted_by_id true_uploader_id hidden_by_id deleted_by_user_id)
mod_literal_fields = literal_fields ++ ~W(fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user)
mod_ip_fields = ~W(ip)
mod_bool_fields = ~W(deleted)
mod_aliases = Map.merge(aliases, %{
"upvoted_by" => "upvoters",
"downvoted_by" => "downvoters",
"upvoted_by_id" => "upvoter_ids",
"downvoted_by_id" => "downvoter_ids",
"hidden_by" => "hidden_by_users",
"hidden_by_id" => "hidden_by_user_ids",
"deleted" => "hidden_from_users"
})
@anonymous_parser Parser.parser(
int_fields: int_fields,
float_fields: float_fields,
date_fields: date_fields,
literal_fields: literal_fields,
ngram_fields: ngram_fields,
custom_fields: custom_fields,
transforms: transforms,
aliases: aliases,
default_field: default_field
) )
defparser("user", @user_parser Parser.parser(
int: int_fields: int_fields,
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), float_fields: float_fields,
float: ~W(aspect_ratio wilson_score), date_fields: date_fields,
date: ~W(created_at updated_at first_seen_at), literal_fields: literal_fields,
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format), ngram_fields: ngram_fields,
ngram: ~W(description), custom_fields: user_custom,
custom: ~W(gallery_id my), transforms: user_transforms,
transforms: %{ aliases: aliases,
"gallery_id" => fn _ctx, value -> default_field: default_field
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
end,
"my" => fn
%{user: %{id: id}}, "faves" ->
%{term: %{favourited_by_user_ids: id}}
%{user: %{id: id}}, "upvotes" ->
%{term: %{upvoter_ids: id}}
%{user: %{id: id}}, "downvotes" ->
%{term: %{downvoter_ids: id}}
%{watch: true}, "watched" ->
raise ArgumentError, "Recursive watchlists are not allowed."
%{user: user} = ctx, "watched" ->
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.user_parser(ctx, user.watched_images_query_str |> normalize())
{:ok, exclude_query} =
Philomena.Images.Query.user_parser(
ctx,
user.watched_images_exclude_str |> normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.user_parser(
ctx,
user.current_filter.spoilered_complex_str |> normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end
%{bool: %{should: should, must_not: must_not}}
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
},
default: "namespaced_tags.name"
) )
defparser("moderator", @moderator_parser Parser.parser(
int: int_fields: mod_int_fields,
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id upvoted_by_id downvoted_by_id tag_count true_uploader_id hidden_by_id deleted_by_user-id), float_fields: float_fields,
float: ~W(aspect_ratio wilson_score), date_fields: date_fields,
date: ~W(created_at updated_at first_seen_at), literal_fields: mod_literal_fields,
literal: ip_fields: mod_ip_fields,
~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user), ngram_fields: ngram_fields,
ngram: ~W(description deletion_reason), bool_fields: mod_bool_fields,
ip: ~W(ip), custom_fields: user_custom,
bool: ~W(deleted), transforms: user_transforms,
custom: ~W(gallery_id my), aliases: mod_aliases,
transforms: %{ default_field: default_field
"gallery_id" => fn _ctx, value ->
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
end,
"my" => fn
%{user: %{id: id}}, "faves" ->
%{term: %{favourited_by_user_ids: id}}
%{user: %{id: id}}, "upvotes" ->
%{term: %{upvoter_ids: id}}
%{user: %{id: id}}, "downvotes" ->
%{term: %{downvoter_ids: id}}
%{watch: true}, "watched" ->
raise ArgumentError, "Recursive watchlists are not allowed."
%{user: user} = ctx, "watched" ->
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.moderator_parser(ctx, user.watched_images_query_str |> normalize())
{:ok, exclude_query} =
Philomena.Images.Query.moderator_parser(
ctx,
user.watched_images_exclude_str |> normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.moderator_parser(
ctx,
user.current_filter.spoilered_complex_str |> normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end
%{bool: %{should: should, must_not: must_not}}
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"upvoted_by" => "upvoters",
"downvoted_by" => "downvoters",
"faved_by_id" => "favourited_by_user_ids",
"upvoted_by_id" => "upvoter_ids",
"downvoted_by_id" => "downvoter_ids",
"hidden_by" => "hidden_by_users",
"hidden_by_id" => "hidden_by_user_ids",
"deleted" => "hidden_from_users"
},
default: "namespaced_tags.name"
) )
def parse_anonymous(context, query_string) do
Parser.parse(@anonymous_parser, query_string, context)
end
def parse_user(context, query_string) do
Parser.parse(@user_parser, query_string, context)
end
def parse_moderator(context, query_string) do
Parser.parse(@moderator_parser, query_string, context)
end
def compile(user, query_string, watch \\ false) do def compile(user, query_string, watch \\ false) do
query_string = query_string || "" query_string = query_string || ""
case user do case user do
nil -> nil ->
anonymous_parser(%{user: nil, watch: watch}, query_string) parse_anonymous(%{user: nil, watch: watch}, query_string)
%{role: role} when role in ~W(user assistant) -> %{role: role} when role in ~W(user assistant) ->
user_parser(%{user: user, watch: watch}, query_string) parse_user(%{user: user, watch: watch}, query_string)
%{role: role} when role in ~W(moderator admin) -> %{role: role} when role in ~W(moderator admin) ->
moderator_parser(%{user: user, watch: watch}, query_string) parse_moderator(%{user: user, watch: watch}, query_string)
_ -> _ ->
raise ArgumentError, "Unknown user role." raise ArgumentError, "Unknown user role."

View file

@ -8,6 +8,7 @@ defmodule Search.BoolParser do
]) ])
|> unwrap_and_tag(:bool) |> unwrap_and_tag(:bool)
|> eos() |> eos()
|> label("a boolean, like `true' or `false'")
defparsec :parse, bool defparsec :parse, bool
end end

View file

@ -182,6 +182,7 @@ defmodule Search.DateParser do
relative_date relative_date
]) ])
|> eos() |> eos()
|> label("a RFC3339 datetime fragment, like `2019-01-01', or relative date, like `3 days ago'")
defparsec :parse, date defparsec :parse, date
end end

View file

@ -25,6 +25,7 @@ defmodule Search.FloatParser do
float |> unwrap_and_tag(:float) float |> unwrap_and_tag(:float)
]) ])
|> eos() |> eos()
|> label("a real number, like `2.7182818' or `-10'")
defparsec :parse, float_parser defparsec :parse, float_parser
end end

View file

@ -18,6 +18,7 @@ defmodule Search.IntParser do
int |> unwrap_and_tag(:int) int |> unwrap_and_tag(:int)
]) ])
|> eos() |> eos()
|> label("an integer, like `3' or `-10'")
defparsec :parse, int_parser defparsec :parse, int_parser
end end

View file

@ -130,9 +130,9 @@ defmodule Search.IpParser do
ipv6_address |> optional(ipv6_prefix) ipv6_address |> optional(ipv6_prefix)
]) ])
|> reduce({Enum, :join, []}) |> reduce({Enum, :join, []})
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|> unwrap_and_tag(:ip) |> unwrap_and_tag(:ip)
|> eos() |> eos()
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
defparsec :parse, ip defparsec :parse, ip
end end

View file

@ -29,7 +29,6 @@ defmodule Search.Lexer do
|> ignore() |> ignore()
quot = string("\"") quot = string("\"")
backslash = string("\\")
boost = boost =
ignore(string("^")) ignore(string("^"))
@ -39,7 +38,6 @@ defmodule Search.Lexer do
stop_words = stop_words =
repeat(space) repeat(space)
|> choice([ |> choice([
backslash |> eos(),
l_and, l_and,
l_or, l_or,
rparen, rparen,
@ -60,20 +58,21 @@ defmodule Search.Lexer do
text = text =
parsec(:dirty_text) parsec(:dirty_text)
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> unwrap_and_tag(:text) |> unwrap_and_tag(:term)
|> label("a term, like `safe'")
quoted_text = quoted_text =
ignore(quot) ignore(quot)
|> choice([ |> repeat(choice([
ignore(string("\\")) |> string("\""), ignore(string("\\")) |> string("\""),
ignore(string("\\")) |> string("\\"), ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]), string("\\") |> utf8_char([]),
utf8_char(not: ?") utf8_char(not: ?")
]) ]))
|> repeat()
|> ignore(quot) |> ignore(quot)
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> unwrap_and_tag(:text) |> unwrap_and_tag(:term)
|> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|)
term = term =
choice([ choice([

View file

@ -1,6 +1,8 @@
defmodule Search.LiteralParser do defmodule Search.LiteralParser do
import NimbleParsec import NimbleParsec
defp trim([term]), do: String.trim(term)
edit_distance = edit_distance =
ignore(string("~")) ignore(string("~"))
|> integer(min: 1) |> integer(min: 1)
@ -22,6 +24,7 @@ defmodule Search.LiteralParser do
]) ])
|> repeat() |> repeat()
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> reduce(:trim)
|> unwrap_and_tag(:literal) |> unwrap_and_tag(:literal)
|> optional(edit_distance) |> optional(edit_distance)
|> eos() |> eos()
@ -37,6 +40,7 @@ defmodule Search.LiteralParser do
]) ])
|> repeat() |> repeat()
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
|> reduce(:trim)
|> unwrap_and_tag(:wildcard) |> unwrap_and_tag(:wildcard)
|> ignore(optional(edit_distance)) |> ignore(optional(edit_distance))
|> eos() |> eos()

View file

@ -30,14 +30,14 @@ defmodule Search.Parser do
def parser(options) do def parser(options) do
parser = struct(Parser, options) parser = struct(Parser, options)
fields = fields =
Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++ Enum.map(parser.bool_fields, fn f -> {f, BoolParser} end) ++
Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++ Enum.map(parser.date_fields, fn f -> {f, DateParser} end) ++
Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++ Enum.map(parser.float_fields, fn f -> {f, FloatParser} end) ++
Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++ Enum.map(parser.int_fields, fn f -> {f, IntParser} end) ++
Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++ Enum.map(parser.ip_fields, fn f -> {f, IpParser} end) ++
Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++ Enum.map(parser.literal_fields, fn f -> {f, LiteralParser} end) ++
Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++ Enum.map(parser.ngram_fields, fn f -> {f, NgramParser} end) ++
Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end) Enum.map(parser.custom_fields, fn f -> {f, :custom_field} end)
%{parser | __fields__: Map.new(fields)} %{parser | __fields__: Map.new(fields)}
end end
@ -50,11 +50,26 @@ defmodule Search.Parser do
do do
{:ok, tree} {:ok, tree}
else else
{:ok, {_tree, tokens}} ->
{:error, "Junk at end of expression: " <> debug_tokens(tokens)}
{:error, msg, start_pos, _1, _2, _3} ->
{:error, msg <> ", starting at: " <> start_pos}
{:error, msg} ->
{:error, msg}
_ -> _ ->
{:error, "Search parsing error."} {:error, "Search parsing error."}
end end
end end
defp debug_tokens(tokens) do
tokens
|> Enum.map(fn {_k, v} -> v end)
|> Enum.join("")
end
# #
# Predictive LL(1) RD parser for search grammar # Predictive LL(1) RD parser for search grammar
# #
@ -62,22 +77,22 @@ defmodule Search.Parser do
defp search_top(parser, tokens), do: search_or(parser, tokens) defp search_top(parser, tokens), do: search_or(parser, tokens)
defp search_or(parser, tokens) do defp search_or(parser, tokens) do
case search_and(parser, tokens) do with {:ok, {left, [{:or, _} | r_tokens]}} <- search_and(parser, tokens),
{:ok, {left, [{:or, _} | r_tokens]}} -> {:ok, {right, rest}} <- search_or(parser, r_tokens)
{right, rest} = search_or(parser, r_tokens) do
{:ok, {%{bool: %{should: [left, right]}}, rest}} {:ok, {%{bool: %{should: [left, right]}}, rest}}
else
value -> value ->
value value
end end
end end
defp search_and(parser, tokens) do defp search_and(parser, tokens) do
case search_boost(parser, tokens) do with {:ok, {left, [{:and, _} | r_tokens]}} <- search_boost(parser, tokens),
{:ok, {left, [{:and, _} | r_tokens]}} -> {:ok, {right, rest}} <- search_and(parser, r_tokens)
{right, rest} = search_or(parser, r_tokens) do
{:ok, {%{bool: %{must: [left, right]}}, rest}} {:ok, {%{bool: %{must: [left, right]}}, rest}}
else
value -> value ->
value value
end end
@ -136,6 +151,9 @@ defmodule Search.Parser do
end end
end end
defp search_field(_parser, _tokens), do:
{:error, "Expected a term."}
# #
# Predictive LL(k) RD parser for search terms in parent grammar # Predictive LL(k) RD parser for search terms in parent grammar
# #

9
lib/search/string.ex Normal file
View file

@ -0,0 +1,9 @@
defmodule Search.String do
def normalize(str) do
str
|> String.replace("\r", "")
|> String.split("\n", trim: true)
|> Enum.map(fn s -> "(#{s})" end)
|> Enum.join(" || ")
end
end

View file

@ -1,11 +1,12 @@
defmodule Search.TermRangeParser do defmodule Search.TermRangeParser do
alias Search.LiteralParser
# Unfortunately, we can't use NimbleParsec here. It requires # Unfortunately, we can't use NimbleParsec here. It requires
# the compiler, and we're not in a macro environment. # the compiler, and we're not in a macro environment.
def parse(input, fields, default_field) do def parse(input, fields, default_field) do
tokens = tokens =
Enum.find_value(fields, fn {p, f} -> Enum.find_value(fields, fn {f, p} ->
field(input, f, p) field(input, f, p)
end) end)