This commit is contained in:
byte[] 2019-11-02 16:31:55 -04:00
parent 756599df56
commit c46cceab03
11 changed files with 203 additions and 193 deletions

View file

@ -1,187 +1,162 @@
defmodule Philomena.Images.Query do
import Philomena.Search.Parser
import Philomena.Search.String
alias Search.Parser
alias Philomena.Repo
defparser("anonymous",
int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at),
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram: ~W(description),
custom: ~W(gallery_id),
transforms: %{
"gallery_id" => fn _ctx, value ->
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
def gallery_id_transform(_ctx, value),
do: {:ok, %{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}}
def user_my_transform(%{user: %{id: id}}, "faves"),
do: {:ok, %{term: %{favourited_by_user_ids: id}}}
def user_my_transform(%{user: %{id: id}}, "upvotes"),
do: {:ok, %{term: %{upvoter_ids: id}}}
def user_my_transform(%{user: %{id: id}}, "downvotes"),
do: {:ok, %{term: %{downvoter_ids: id}}}
def user_my_transform(%{watch: true}, "watched"),
do: {:error, "Recursive watchlists are not allowed."}
def user_my_transform(%{user: user} = ctx, "watched") do
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.parse_user(ctx, user.watched_images_query_str |> Search.String.normalize())
{:ok, exclude_query} =
Philomena.Images.Query.parse_user(
ctx,
user.watched_images_exclude_str |> Search.String.normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.parse_user(
ctx,
user.current_filter.spoilered_complex_str |> Search.String.normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
},
default: "namespaced_tags.name"
%{bool: %{should: should, must_not: must_not}}
end
def user_my_transform(_ctx, _value),
do: {:error, "Unknown `my' value."}
int_fields = ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count)
float_fields = ~W(aspect_ratio wilson_score)
date_fields = ~W(created_at updated_at first_seen_at)
literal_fields = ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format)
ngram_fields = ~W(description)
custom_fields = ~W(gallery_id)
default_field = "namespaced_tags.name"
transforms = %{
"gallery_id" => &Philomena.Images.Query.gallery_id_transform/2
}
aliases = %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
}
user_custom = custom_fields ++ ~W(my)
user_transforms = Map.merge(transforms, %{
"my" => &Philomena.Images.Query.user_my_transform/2
})
mod_int_fields = int_fields ++ ~W(upvoted_by_id downvoted_by_id true_uploader_id hidden_by_id deleted_by_user_id)
mod_literal_fields = literal_fields ++ ~W(fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user)
mod_ip_fields = ~W(ip)
mod_bool_fields = ~W(deleted)
mod_aliases = Map.merge(aliases, %{
"upvoted_by" => "upvoters",
"downvoted_by" => "downvoters",
"upvoted_by_id" => "upvoter_ids",
"downvoted_by_id" => "downvoter_ids",
"hidden_by" => "hidden_by_users",
"hidden_by_id" => "hidden_by_user_ids",
"deleted" => "hidden_from_users"
})
@anonymous_parser Parser.parser(
int_fields: int_fields,
float_fields: float_fields,
date_fields: date_fields,
literal_fields: literal_fields,
ngram_fields: ngram_fields,
custom_fields: custom_fields,
transforms: transforms,
aliases: aliases,
default_field: default_field
)
defparser("user",
int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at),
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram: ~W(description),
custom: ~W(gallery_id my),
transforms: %{
"gallery_id" => fn _ctx, value ->
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
end,
"my" => fn
%{user: %{id: id}}, "faves" ->
%{term: %{favourited_by_user_ids: id}}
%{user: %{id: id}}, "upvotes" ->
%{term: %{upvoter_ids: id}}
%{user: %{id: id}}, "downvotes" ->
%{term: %{downvoter_ids: id}}
%{watch: true}, "watched" ->
raise ArgumentError, "Recursive watchlists are not allowed."
%{user: user} = ctx, "watched" ->
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.user_parser(ctx, user.watched_images_query_str |> normalize())
{:ok, exclude_query} =
Philomena.Images.Query.user_parser(
ctx,
user.watched_images_exclude_str |> normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.user_parser(
ctx,
user.current_filter.spoilered_complex_str |> normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end
%{bool: %{should: should, must_not: must_not}}
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"faved_by_id" => "favourited_by_user_ids"
},
default: "namespaced_tags.name"
@user_parser Parser.parser(
int_fields: int_fields,
float_fields: float_fields,
date_fields: date_fields,
literal_fields: literal_fields,
ngram_fields: ngram_fields,
custom_fields: user_custom,
transforms: user_transforms,
aliases: aliases,
default_field: default_field
)
defparser("moderator",
int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id upvoted_by_id downvoted_by_id tag_count true_uploader_id hidden_by_id deleted_by_user-id),
float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at),
literal:
~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format fingerprint upvoted_by downvoted_by true_uploader hidden_by deleted_by_user),
ngram: ~W(description deletion_reason),
ip: ~W(ip),
bool: ~W(deleted),
custom: ~W(gallery_id my),
transforms: %{
"gallery_id" => fn _ctx, value ->
%{nested: %{path: :galleries, query: %{term: %{"galleries.id" => value}}}}
end,
"my" => fn
%{user: %{id: id}}, "faves" ->
%{term: %{favourited_by_user_ids: id}}
%{user: %{id: id}}, "upvotes" ->
%{term: %{upvoter_ids: id}}
%{user: %{id: id}}, "downvotes" ->
%{term: %{downvoter_ids: id}}
%{watch: true}, "watched" ->
raise ArgumentError, "Recursive watchlists are not allowed."
%{user: user} = ctx, "watched" ->
ctx = Map.merge(ctx, %{watch: true})
tag_include = %{terms: %{tag_ids: user.watched_tag_ids}}
{:ok, include_query} =
Philomena.Images.Query.moderator_parser(ctx, user.watched_images_query_str |> normalize())
{:ok, exclude_query} =
Philomena.Images.Query.moderator_parser(
ctx,
user.watched_images_exclude_str |> normalize()
)
should = [tag_include, include_query]
must_not = [exclude_query]
must_not =
if user.no_spoilered_in_watched do
user = user |> Repo.preload(:current_filter)
tag_exclude = %{terms: %{tag_ids: user.current_filter.spoilered_tag_ids}}
{:ok, spoiler_query} =
Philomena.Images.Query.moderator_parser(
ctx,
user.current_filter.spoilered_complex_str |> normalize()
)
[tag_exclude, spoiler_query | must_not]
else
must_not
end
%{bool: %{should: should, must_not: must_not}}
end
},
aliases: %{
"faved_by" => "favourited_by_users",
"upvoted_by" => "upvoters",
"downvoted_by" => "downvoters",
"faved_by_id" => "favourited_by_user_ids",
"upvoted_by_id" => "upvoter_ids",
"downvoted_by_id" => "downvoter_ids",
"hidden_by" => "hidden_by_users",
"hidden_by_id" => "hidden_by_user_ids",
"deleted" => "hidden_from_users"
},
default: "namespaced_tags.name"
@moderator_parser Parser.parser(
int_fields: mod_int_fields,
float_fields: float_fields,
date_fields: date_fields,
literal_fields: mod_literal_fields,
ip_fields: mod_ip_fields,
ngram_fields: ngram_fields,
bool_fields: mod_bool_fields,
custom_fields: user_custom,
transforms: user_transforms,
aliases: mod_aliases,
default_field: default_field
)
def parse_anonymous(context, query_string) do
Parser.parse(@anonymous_parser, query_string, context)
end
def parse_user(context, query_string) do
Parser.parse(@user_parser, query_string, context)
end
def parse_moderator(context, query_string) do
Parser.parse(@moderator_parser, query_string, context)
end
def compile(user, query_string, watch \\ false) do
query_string = query_string || ""
case user do
nil ->
anonymous_parser(%{user: nil, watch: watch}, query_string)
parse_anonymous(%{user: nil, watch: watch}, query_string)
%{role: role} when role in ~W(user assistant) ->
user_parser(%{user: user, watch: watch}, query_string)
parse_user(%{user: user, watch: watch}, query_string)
%{role: role} when role in ~W(moderator admin) ->
moderator_parser(%{user: user, watch: watch}, query_string)
parse_moderator(%{user: user, watch: watch}, query_string)
_ ->
raise ArgumentError, "Unknown user role."

View file

@ -8,6 +8,7 @@ defmodule Search.BoolParser do
])
|> unwrap_and_tag(:bool)
|> eos()
|> label("a boolean, like `true' or `false'")
defparsec :parse, bool
end

View file

@ -182,6 +182,7 @@ defmodule Search.DateParser do
relative_date
])
|> eos()
|> label("a RFC3339 datetime fragment, like `2019-01-01', or relative date, like `3 days ago'")
defparsec :parse, date
end

View file

@ -25,6 +25,7 @@ defmodule Search.FloatParser do
float |> unwrap_and_tag(:float)
])
|> eos()
|> label("a real number, like `2.7182818' or `-10'")
defparsec :parse, float_parser
end

View file

@ -18,6 +18,7 @@ defmodule Search.IntParser do
int |> unwrap_and_tag(:int)
])
|> eos()
|> label("an integer, like `3' or `-10'")
defparsec :parse, int_parser
end

View file

@ -130,9 +130,9 @@ defmodule Search.IpParser do
ipv6_address |> optional(ipv6_prefix)
])
|> reduce({Enum, :join, []})
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
|> unwrap_and_tag(:ip)
|> eos()
|> label("a valid IPv4 or IPv6 address and optional CIDR prefix")
defparsec :parse, ip
end

View file

@ -29,7 +29,6 @@ defmodule Search.Lexer do
|> ignore()
quot = string("\"")
backslash = string("\\")
boost =
ignore(string("^"))
@ -39,7 +38,6 @@ defmodule Search.Lexer do
stop_words =
repeat(space)
|> choice([
backslash |> eos(),
l_and,
l_or,
rparen,
@ -60,20 +58,21 @@ defmodule Search.Lexer do
text =
parsec(:dirty_text)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
|> unwrap_and_tag(:term)
|> label("a term, like `safe'")
quoted_text =
ignore(quot)
|> choice([
|> repeat(choice([
ignore(string("\\")) |> string("\""),
ignore(string("\\")) |> string("\\"),
string("\\") |> utf8_char([]),
utf8_char(not: ?")
])
|> repeat()
]))
|> ignore(quot)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:text)
|> unwrap_and_tag(:term)
|> label(~s|a term enclosed in quotes, like `"/)^3^(\\\\"'|)
term =
choice([

View file

@ -1,6 +1,8 @@
defmodule Search.LiteralParser do
import NimbleParsec
defp trim([term]), do: String.trim(term)
edit_distance =
ignore(string("~"))
|> integer(min: 1)
@ -22,6 +24,7 @@ defmodule Search.LiteralParser do
])
|> repeat()
|> reduce({List, :to_string, []})
|> reduce(:trim)
|> unwrap_and_tag(:literal)
|> optional(edit_distance)
|> eos()
@ -37,6 +40,7 @@ defmodule Search.LiteralParser do
])
|> repeat()
|> reduce({List, :to_string, []})
|> reduce(:trim)
|> unwrap_and_tag(:wildcard)
|> ignore(optional(edit_distance))
|> eos()

View file

@ -30,14 +30,14 @@ defmodule Search.Parser do
def parser(options) do
parser = struct(Parser, options)
fields =
Enum.map(parser.bool_fields, fn f -> {BoolParser, f} end) ++
Enum.map(parser.date_fields, fn f -> {DateParser, f} end) ++
Enum.map(parser.float_fields, fn f -> {FloatParser, f} end) ++
Enum.map(parser.int_fields, fn f -> {IntParser, f} end) ++
Enum.map(parser.ip_fields, fn f -> {IpParser, f} end) ++
Enum.map(parser.literal_fields, fn f -> {LiteralParser, f} end) ++
Enum.map(parser.ngram_fields, fn f -> {NgramParser, f} end) ++
Enum.map(parser.custom_fields, fn f -> {:custom_field, f} end)
Enum.map(parser.bool_fields, fn f -> {f, BoolParser} end) ++
Enum.map(parser.date_fields, fn f -> {f, DateParser} end) ++
Enum.map(parser.float_fields, fn f -> {f, FloatParser} end) ++
Enum.map(parser.int_fields, fn f -> {f, IntParser} end) ++
Enum.map(parser.ip_fields, fn f -> {f, IpParser} end) ++
Enum.map(parser.literal_fields, fn f -> {f, LiteralParser} end) ++
Enum.map(parser.ngram_fields, fn f -> {f, NgramParser} end) ++
Enum.map(parser.custom_fields, fn f -> {f, :custom_field} end)
%{parser | __fields__: Map.new(fields)}
end
@ -50,11 +50,26 @@ defmodule Search.Parser do
do
{:ok, tree}
else
{:ok, {_tree, tokens}} ->
{:error, "Junk at end of expression: " <> debug_tokens(tokens)}
{:error, msg, start_pos, _1, _2, _3} ->
{:error, msg <> ", starting at: " <> start_pos}
{:error, msg} ->
{:error, msg}
_ ->
{:error, "Search parsing error."}
end
end
defp debug_tokens(tokens) do
tokens
|> Enum.map(fn {_k, v} -> v end)
|> Enum.join("")
end
#
# Predictive LL(1) RD parser for search grammar
#
@ -62,22 +77,22 @@ defmodule Search.Parser do
defp search_top(parser, tokens), do: search_or(parser, tokens)
defp search_or(parser, tokens) do
case search_and(parser, tokens) do
{:ok, {left, [{:or, _} | r_tokens]}} ->
{right, rest} = search_or(parser, r_tokens)
{:ok, {%{bool: %{should: [left, right]}}, rest}}
with {:ok, {left, [{:or, _} | r_tokens]}} <- search_and(parser, tokens),
{:ok, {right, rest}} <- search_or(parser, r_tokens)
do
{:ok, {%{bool: %{should: [left, right]}}, rest}}
else
value ->
value
end
end
defp search_and(parser, tokens) do
case search_boost(parser, tokens) do
{:ok, {left, [{:and, _} | r_tokens]}} ->
{right, rest} = search_or(parser, r_tokens)
{:ok, {%{bool: %{must: [left, right]}}, rest}}
with {:ok, {left, [{:and, _} | r_tokens]}} <- search_boost(parser, tokens),
{:ok, {right, rest}} <- search_and(parser, r_tokens)
do
{:ok, {%{bool: %{must: [left, right]}}, rest}}
else
value ->
value
end
@ -136,6 +151,9 @@ defmodule Search.Parser do
end
end
defp search_field(_parser, _tokens), do:
{:error, "Expected a term."}
#
# Predictive LL(k) RD parser for search terms in parent grammar
#

9
lib/search/string.ex Normal file
View file

@ -0,0 +1,9 @@
defmodule Search.String do
def normalize(str) do
str
|> String.replace("\r", "")
|> String.split("\n", trim: true)
|> Enum.map(fn s -> "(#{s})" end)
|> Enum.join(" || ")
end
end

View file

@ -1,11 +1,12 @@
defmodule Search.TermRangeParser do
alias Search.LiteralParser
# Unfortunately, we can't use NimbleParsec here. It requires
# the compiler, and we're not in a macro environment.
def parse(input, fields, default_field) do
tokens =
Enum.find_value(fields, fn {p, f} ->
Enum.find_value(fields, fn {f, p} ->
field(input, f, p)
end)