This commit is contained in:
Liam P. White 2019-08-26 09:57:04 -04:00
parent fe69b708db
commit a4c4afa350
4 changed files with 112 additions and 52 deletions

View file

@ -1,6 +1,7 @@
defmodule Philomena.Images.Query do defmodule Philomena.Images.Query do
use Philomena.Search.Lexer, use Philomena.Search.Lexer,
int: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count), int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score), float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at), date: ~W(created_at updated_at first_seen_at),
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format), literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),

View file

@ -19,12 +19,12 @@ defmodule Philomena.Search.Helpers do
tz_hour = tz_hour =
tz_hour tz_hour
|> Integer.to_string |> Integer.to_string()
|> String.pad_leading(2, "0") |> String.pad_leading(2, "0")
tz_minute = tz_minute =
tz_minute tz_minute
|> Integer.to_string |> Integer.to_string()
|> String.pad_leading(2, "0") |> String.pad_leading(2, "0")
iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}" iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}"
@ -40,13 +40,13 @@ defmodule Philomena.Search.Helpers do
defp date_bounds([year]) do defp date_bounds([year]) do
lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0} lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 31536000, :second) upper = NaiveDateTime.add(lower, 31_536_000, :second)
[lower, upper] [lower, upper]
end end
defp date_bounds([year, month]) do defp date_bounds([year, month]) do
lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0} lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 2592000, :second) upper = NaiveDateTime.add(lower, 2_592_000, :second)
[lower, upper] [lower, upper]
end end
@ -63,13 +63,29 @@ defmodule Philomena.Search.Helpers do
end end
defp date_bounds([year, month, day, hour, minute]) do defp date_bounds([year, month, day, hour, minute]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: 0} lower = %NaiveDateTime{
year: year,
month: month,
day: day,
hour: hour,
minute: minute,
second: 0
}
upper = NaiveDateTime.add(lower, 60, :second) upper = NaiveDateTime.add(lower, 60, :second)
[lower, upper] [lower, upper]
end end
defp date_bounds([year, month, day, hour, minute, second]) do defp date_bounds([year, month, day, hour, minute, second]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: second} lower = %NaiveDateTime{
year: year,
month: month,
day: day,
hour: hour,
minute: minute,
second: second
}
upper = NaiveDateTime.add(lower, 1, :second) upper = NaiveDateTime.add(lower, 1, :second)
[lower, upper] [lower, upper]
end end

View file

@ -1,13 +1,13 @@
defmodule Philomena.Search.Lexer do defmodule Philomena.Search.Lexer do
defmacro __using__(opts) do defmacro __using__(opts) do
literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__) literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__)
ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__) ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__) bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__) date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__)
float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__) float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__)
int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__) int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__) ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__) custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
quote location: :keep do quote location: :keep do
import NimbleParsec import NimbleParsec
@ -82,8 +82,7 @@ defmodule Philomena.Search.Lexer do
]) ])
|> reduce({List, :to_string, []}) |> reduce({List, :to_string, []})
ipv6_hexadectet = ipv6_hexadectet = ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
ipv6_ls32 = ipv6_ls32 =
choice([ choice([
@ -91,47 +90,78 @@ defmodule Philomena.Search.Lexer do
ipv4_address ipv4_address
]) ])
ipv6_fragment = ipv6_fragment = ipv6_hexadectet |> string(":")
ipv6_hexadectet |> string(":")
ipv6_address = ipv6_address =
choice([ choice([
times(ipv6_fragment, 6) |> concat(ipv6_ls32), times(ipv6_fragment, 6) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
times(ipv6_fragment, 1)
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), |> concat(ipv6_hexadectet)
|> string("::")
|> times(ipv6_fragment, 3)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
times(ipv6_fragment, 2)
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), |> concat(ipv6_hexadectet)
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), |> string("::")
|> times(ipv6_fragment, 2)
|> concat(ipv6_ls32),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> times(ipv6_fragment, 2)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32), string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 3)
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), |> concat(ipv6_hexadectet)
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), |> string("::")
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), |> concat(ipv6_fragment)
|> concat(ipv6_ls32),
times(ipv6_fragment, 2)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_fragment)
|> concat(ipv6_ls32),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_fragment)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32), string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32), times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_ls32), ipv6_hexadectet |> string("::") |> concat(ipv6_ls32),
string("::") |> concat(ipv6_ls32), string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 5)
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), |> concat(ipv6_hexadectet)
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), |> string("::")
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), |> concat(ipv6_hexadectet),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), times(ipv6_fragment, 4)
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet), |> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 3)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 2)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet), ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet),
string("::") |> concat(ipv6_hexadectet), string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"),
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"), times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"),
@ -192,9 +222,7 @@ defmodule Philomena.Search.Lexer do
|> optional( |> optional(
hms_sep hms_sep
|> concat(minute) |> concat(minute)
|> optional( |> optional(concat(hms_sep, second))
concat(hms_sep, second)
)
) )
) )
) )
@ -229,9 +257,9 @@ defmodule Philomena.Search.Lexer do
string("minute") |> optional(string("s")) |> replace(60), string("minute") |> optional(string("s")) |> replace(60),
string("hour") |> optional(string("s")) |> replace(3600), string("hour") |> optional(string("s")) |> replace(3600),
string("day") |> optional(string("s")) |> replace(86400), string("day") |> optional(string("s")) |> replace(86400),
string("week") |> optional(string("s")) |> replace(604800), string("week") |> optional(string("s")) |> replace(604_800),
string("month") |> optional(string("s")) |> replace(2592000), string("month") |> optional(string("s")) |> replace(2_592_000),
string("year") |> optional(string("s")) |> replace(31536000) string("year") |> optional(string("s")) |> replace(31_536_000)
]) ])
|> ignore(string(" ago")) |> ignore(string(" ago"))
|> reduce(:relative_datetime) |> reduce(:relative_datetime)
@ -310,8 +338,7 @@ defmodule Philomena.Search.Lexer do
ip_value ip_value
]) ])
quoted_numeric = quoted_numeric = ignore(quot) |> concat(numeric) |> ignore(quot)
ignore(quot) |> concat(numeric) |> ignore(quot)
stop_words = stop_words =
choice([ choice([
@ -434,7 +461,7 @@ defmodule Philomena.Search.Lexer do
times(outer, min: 1) times(outer, min: 1)
|> eos() |> eos()
defparsec :search, search defparsec(:search, search)
end end
end end
end end

View file

@ -99,14 +99,19 @@ defmodule Philomena.Search.Parser do
case tokens do case tokens do
[{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens} {%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:float_field, field}, {:eq, _}, {:float, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:float_field, field}, {:eq, _}, {:float, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens} {%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:literal_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:literal_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:ngram_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] -> [{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{default_field => %{value: value, fuzziness: fuzz}}}, r_tokens} {%{fuzzy: %{default_field => %{value: value, fuzziness: fuzz}}}, r_tokens}
_ -> _ ->
search_range(ctx, tokens) search_range(ctx, tokens)
end end
@ -118,12 +123,18 @@ defmodule Philomena.Search.Parser do
defp search_range(ctx, tokens) do defp search_range(ctx, tokens) do
case tokens do case tokens do
[{:int_field, field}, {range, _}, {:int, value} | r_tokens] when range in [:gt, :gte, :lt, :lte] -> [{:int_field, field}, {range, _}, {:int, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens} {%{range: %{field => %{range => value}}}, r_tokens}
[{:float_field, field}, {range, _}, {:float, value} | r_tokens] when range in [:gt, :gte, :lt, :lte] ->
[{:float_field, field}, {range, _}, {:number, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens} {%{range: %{field => %{range => value}}}, r_tokens}
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens] when range in [:gt, :gte, :lt, :lte] ->
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => lower}}}, r_tokens} {%{range: %{field => %{range => lower}}}, r_tokens}
_ -> _ ->
search_custom(ctx, tokens) search_custom(ctx, tokens)
end end
@ -133,6 +144,7 @@ defmodule Philomena.Search.Parser do
case tokens do case tokens do
[{:custom_field, field}, {:text, value} | r_tokens] -> [{:custom_field, field}, {:text, value} | r_tokens] ->
{ctx[:field_transforms][field].(value), r_tokens} {ctx[:field_transforms][field].(value), r_tokens}
_ -> _ ->
search_term(ctx, tokens) search_term(ctx, tokens)
end end
@ -142,12 +154,16 @@ defmodule Philomena.Search.Parser do
case tokens do case tokens do
[{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] -> [{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] ->
{%{range: %{field => %{gte: lower, lte: higher}}}, r_tokens} {%{range: %{field => %{gte: lower, lte: higher}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] -> [{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
{%{match: %{field => value}}, r_tokens} {%{match: %{field => value}}, r_tokens}
[{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] -> [{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] ->
{%{term: %{field => value}}, r_tokens} {%{term: %{field => value}}, r_tokens}
[{:default, [text: value]} | r_tokens] -> [{:default, [text: value]} | r_tokens] ->
{%{term: %{ctx[:default_field] => value}}, r_tokens} {%{term: %{ctx[:default_field] => value}}, r_tokens}
_ -> _ ->
raise ArgumentError, "Expected a term" raise ArgumentError, "Expected a term"
end end