This commit is contained in:
Liam P. White 2019-08-26 09:57:04 -04:00
parent fe69b708db
commit a4c4afa350
4 changed files with 112 additions and 52 deletions

View file

@ -1,8 +1,9 @@
defmodule Philomena.Images.Query do
use Philomena.Search.Lexer,
int: ~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
int:
~W(id width height comment_count score upvotes downvotes faves uploader_id faved_by_id tag_count),
float: ~W(aspect_ratio wilson_score),
date: ~W(created_at updated_at first_seen_at),
literal: ~W(faved_by orig_sha512_hash sha512_hash uploader source_url original_format),
ngram: ~W(description)
end
end

View file

@ -17,14 +17,14 @@ defmodule Philomena.Search.Helpers do
defp build_datetime(naive, tz_off, tz_hour, tz_minute) do
# Unbelievable that there is no way to build this with integer arguments.
tz_hour =
tz_hour =
tz_hour
|> Integer.to_string
|> Integer.to_string()
|> String.pad_leading(2, "0")
tz_minute =
tz_minute
|> Integer.to_string
|> Integer.to_string()
|> String.pad_leading(2, "0")
iso8601_string = "#{NaiveDateTime.to_iso8601(naive)}#{tz_off}#{tz_hour}#{tz_minute}"
@ -40,13 +40,13 @@ defmodule Philomena.Search.Helpers do
defp date_bounds([year]) do
lower = %NaiveDateTime{year: year, month: 1, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 31536000, :second)
upper = NaiveDateTime.add(lower, 31_536_000, :second)
[lower, upper]
end
defp date_bounds([year, month]) do
lower = %NaiveDateTime{year: year, month: month, day: 1, hour: 0, minute: 0, second: 0}
upper = NaiveDateTime.add(lower, 2592000, :second)
upper = NaiveDateTime.add(lower, 2_592_000, :second)
[lower, upper]
end
@ -63,13 +63,29 @@ defmodule Philomena.Search.Helpers do
end
defp date_bounds([year, month, day, hour, minute]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: 0}
lower = %NaiveDateTime{
year: year,
month: month,
day: day,
hour: hour,
minute: minute,
second: 0
}
upper = NaiveDateTime.add(lower, 60, :second)
[lower, upper]
end
defp date_bounds([year, month, day, hour, minute, second]) do
lower = %NaiveDateTime{year: year, month: month, day: day, hour: hour, minute: minute, second: second}
lower = %NaiveDateTime{
year: year,
month: month,
day: day,
hour: hour,
minute: minute,
second: second
}
upper = NaiveDateTime.add(lower, 1, :second)
[lower, upper]
end
@ -109,4 +125,4 @@ defmodule Philomena.Search.Helpers do
def full_choice(combinator, choices) do
choice(combinator, choices)
end
end
end

View file

@ -1,13 +1,13 @@
defmodule Philomena.Search.Lexer do
defmacro __using__(opts) do
literal_fields = Keyword.get(opts, :literal, []) |> Macro.expand(__CALLER__)
ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__)
float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__)
int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
ngram_fields = Keyword.get(opts, :ngram, []) |> Macro.expand(__CALLER__)
bool_fields = Keyword.get(opts, :bool, []) |> Macro.expand(__CALLER__)
date_fields = Keyword.get(opts, :date, []) |> Macro.expand(__CALLER__)
float_fields = Keyword.get(opts, :float, []) |> Macro.expand(__CALLER__)
int_fields = Keyword.get(opts, :int, []) |> Macro.expand(__CALLER__)
ip_fields = Keyword.get(opts, :ip, []) |> Macro.expand(__CALLER__)
custom_fields = Keyword.get(opts, :custom, []) |> Macro.expand(__CALLER__)
quote location: :keep do
import NimbleParsec
@ -82,8 +82,7 @@ defmodule Philomena.Search.Lexer do
])
|> reduce({List, :to_string, []})
ipv6_hexadectet =
ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
ipv6_hexadectet = ascii_string('0123456789abcdefABCDEF', min: 1, max: 4)
ipv6_ls32 =
choice([
@ -91,47 +90,78 @@ defmodule Philomena.Search.Lexer do
ipv4_address
])
ipv6_fragment =
ipv6_hexadectet |> string(":")
ipv6_fragment = ipv6_hexadectet |> string(":")
ipv6_address =
choice([
times(ipv6_fragment, 6) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 5) |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 4) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> times(ipv6_fragment, 3)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 3) |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 2)
|> concat(ipv6_hexadectet)
|> string("::")
|> times(ipv6_fragment, 2)
|> concat(ipv6_ls32),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> times(ipv6_fragment, 2)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
string("::") |> times(ipv6_fragment, 2) |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 3)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_fragment)
|> concat(ipv6_ls32),
times(ipv6_fragment, 2)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_fragment)
|> concat(ipv6_ls32),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_fragment)
|> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
string("::") |> concat(ipv6_fragment) |> concat(ipv6_ls32),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_ls32),
ipv6_hexadectet |> string("::") |> concat(ipv6_ls32),
string("::") |> concat(ipv6_ls32),
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 3) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 2) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 1) |> concat(ipv6_hexadectet) |> string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 5)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 4)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 3)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 2)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
times(ipv6_fragment, 1)
|> concat(ipv6_hexadectet)
|> string("::")
|> concat(ipv6_hexadectet),
ipv6_hexadectet |> string("::") |> concat(ipv6_hexadectet),
string("::") |> concat(ipv6_hexadectet),
times(ipv6_fragment, 6) |> concat(ipv6_hexadectet) |> string("::"),
times(ipv6_fragment, 5) |> concat(ipv6_hexadectet) |> string("::"),
times(ipv6_fragment, 4) |> concat(ipv6_hexadectet) |> string("::"),
@ -164,7 +194,7 @@ defmodule Philomena.Search.Lexer do
year = integer(4)
month = integer(2)
day = integer(2)
hour = integer(2)
minute = integer(2)
second = integer(2)
@ -192,9 +222,7 @@ defmodule Philomena.Search.Lexer do
|> optional(
hms_sep
|> concat(minute)
|> optional(
concat(hms_sep, second)
)
|> optional(concat(hms_sep, second))
)
)
)
@ -229,9 +257,9 @@ defmodule Philomena.Search.Lexer do
string("minute") |> optional(string("s")) |> replace(60),
string("hour") |> optional(string("s")) |> replace(3600),
string("day") |> optional(string("s")) |> replace(86400),
string("week") |> optional(string("s")) |> replace(604800),
string("month") |> optional(string("s")) |> replace(2592000),
string("year") |> optional(string("s")) |> replace(31536000)
string("week") |> optional(string("s")) |> replace(604_800),
string("month") |> optional(string("s")) |> replace(2_592_000),
string("year") |> optional(string("s")) |> replace(31_536_000)
])
|> ignore(string(" ago"))
|> reduce(:relative_datetime)
@ -310,8 +338,7 @@ defmodule Philomena.Search.Lexer do
ip_value
])
quoted_numeric =
ignore(quot) |> concat(numeric) |> ignore(quot)
quoted_numeric = ignore(quot) |> concat(numeric) |> ignore(quot)
stop_words =
choice([
@ -434,7 +461,7 @@ defmodule Philomena.Search.Lexer do
times(outer, min: 1)
|> eos()
defparsec :search, search
defparsec(:search, search)
end
end
end

View file

@ -99,14 +99,19 @@ defmodule Philomena.Search.Parser do
case tokens do
[{:int_field, field}, {:eq, _}, {:int, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:float_field, field}, {:eq, _}, {:float, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{range: %{field => %{gte: trunc(value - fuzz), lte: trunc(value + fuzz)}}}, r_tokens}
[{:literal_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{field => %{value: value, fuzziness: fuzz}}}, r_tokens}
[{:default, [text: value]}, {:fuzz, _}, {:number, fuzz} | r_tokens] ->
{%{fuzzy: %{default_field => %{value: value, fuzziness: fuzz}}}, r_tokens}
_ ->
search_range(ctx, tokens)
end
@ -118,12 +123,18 @@ defmodule Philomena.Search.Parser do
defp search_range(ctx, tokens) do
case tokens do
[{:int_field, field}, {range, _}, {:int, value} | r_tokens] when range in [:gt, :gte, :lt, :lte] ->
[{:int_field, field}, {range, _}, {:int, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens}
[{:float_field, field}, {range, _}, {:float, value} | r_tokens] when range in [:gt, :gte, :lt, :lte] ->
[{:float_field, field}, {range, _}, {:number, value} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => value}}}, r_tokens}
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens] when range in [:gt, :gte, :lt, :lte] ->
[{:date_field, field}, {range, _}, {:date, [lower, _higher]} | r_tokens]
when range in [:gt, :gte, :lt, :lte] ->
{%{range: %{field => %{range => lower}}}, r_tokens}
_ ->
search_custom(ctx, tokens)
end
@ -133,6 +144,7 @@ defmodule Philomena.Search.Parser do
case tokens do
[{:custom_field, field}, {:text, value} | r_tokens] ->
{ctx[:field_transforms][field].(value), r_tokens}
_ ->
search_term(ctx, tokens)
end
@ -142,12 +154,16 @@ defmodule Philomena.Search.Parser do
case tokens do
[{:date_field, field}, {:eq, _}, {:date, [lower, higher]} | r_tokens] ->
{%{range: %{field => %{gte: lower, lte: higher}}}, r_tokens}
[{:ngram_field, field}, {:eq, _}, {:text, value} | r_tokens] ->
{%{match: %{field => value}}, r_tokens}
[{_field_type, field}, {:eq, _}, {_value_type, value} | r_tokens] ->
{%{term: %{field => value}}, r_tokens}
[{:default, [text: value]} | r_tokens] ->
{%{term: %{ctx[:default_field] => value}}, r_tokens}
_ ->
raise ArgumentError, "Expected a term"
end