From 566ba9d4c1c0e62803040dd728a51d8cb6ba3900 Mon Sep 17 00:00:00 2001 From: Liam Date: Sat, 25 May 2024 14:03:45 -0400 Subject: [PATCH 1/2] Split out query features to PhilomenaQuery namespace --- README.md | 2 +- docker/app/run-test | 4 +- lib/mix/tasks/reindex_all.ex | 16 +- lib/mix/tasks/upload_to_s3.ex | 2 +- lib/philomena/batch.ex | 56 -- lib/philomena/comments.ex | 8 +- lib/philomena/comments/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/elasticsearch.ex | 294 -------- lib/philomena/filters.ex | 10 +- lib/philomena/filters/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/galleries.ex | 10 +- lib/philomena/galleries/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/images.ex | 8 +- lib/philomena/images/query.ex | 4 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/posts.ex | 8 +- lib/philomena/posts/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/reports.ex | 8 +- lib/philomena/reports/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/schema/search.ex | 2 +- lib/philomena/schema/time.ex | 2 +- lib/philomena/search/string.ex | 13 - lib/philomena/tags.ex | 12 +- lib/philomena/tags/query.ex | 2 +- ...elasticsearch_index.ex => search_index.ex} | 4 +- lib/philomena/user_downvote_wipe.ex | 6 +- lib/philomena/workers/index_worker.ex | 2 +- .../workers/tag_change_revert_worker.ex | 2 +- lib/philomena_query/batch.ex | 111 +++ .../parse}/bool_parser.ex | 4 +- .../parse}/date_parser.ex | 10 +- .../parse}/evaluator.ex | 25 +- .../parse}/float_parser.ex | 8 +- .../parse}/helpers.ex | 4 +- .../parse}/int_parser.ex | 8 +- .../parse}/ip_parser.ex | 4 +- .../search => philomena_query/parse}/lexer.ex | 6 +- .../parse}/literal_parser.ex | 6 +- .../parse}/ngram_parser.ex | 6 +- .../parse}/parser.ex | 117 +++- lib/philomena_query/parse/string.ex | 32 + .../parse}/term_range_parser.ex | 8 +- .../relative_date.ex | 48 +- lib/philomena_query/search.ex | 654 ++++++++++++++++++ .../search_index.ex} | 5 +- .../controllers/activity_controller.ex | 8 +- .../controllers/admin/report_controller.ex | 6 +- .../api/json/search/comment_controller.ex | 6 +- .../api/json/search/filter_controller.ex | 6 +- .../api/json/search/gallery_controller.ex | 6 +- .../api/json/search/image_controller.ex | 4 +- .../api/json/search/post_controller.ex | 6 +- .../api/json/search/tag_controller.ex | 6 +- .../controllers/api/rss/watched_controller.ex | 4 +- .../autocomplete/tag_controller.ex | 6 +- .../controllers/comment_controller.ex | 6 +- .../controllers/filter_controller.ex | 6 +- .../controllers/gallery_controller.ex | 12 +- .../controllers/image/navigate_controller.ex | 4 +- .../controllers/image/random_controller.ex | 4 +- .../controllers/image/related_controller.ex | 4 +- .../controllers/image_controller.ex | 4 +- .../controllers/post_controller.ex | 6 +- .../controllers/profile_controller.ex | 10 +- .../controllers/search_controller.ex | 6 +- .../controllers/tag_controller.ex | 8 +- lib/philomena_web/image_loader.ex | 4 +- lib/philomena_web/image_navigator.ex | 6 +- .../plugs/filter_forced_users_plug.ex | 4 +- lib/philomena_web/plugs/image_filter_plug.ex | 2 +- lib/philomena_web/stats_updater.ex | 6 +- .../views/api/json/image_view.ex | 2 +- lib/philomena_web/views/image_view.ex | 2 +- lib/philomena_web/views/layout_view.ex | 4 +- lib/philomena_web/views/tag_view.ex | 6 +- priv/repo/seeds.exs | 10 +- 81 files changed, 1185 insertions(+), 554 deletions(-) delete mode 100644 lib/philomena/batch.ex rename lib/philomena/comments/{elasticsearch_index.ex => search_index.ex} (94%) delete mode 100644 lib/philomena/elasticsearch.ex rename lib/philomena/filters/{elasticsearch_index.ex => search_index.ex} (95%) rename lib/philomena/galleries/{elasticsearch_index.ex => search_index.ex} (94%) rename lib/philomena/images/{elasticsearch_index.ex => search_index.ex} (98%) rename lib/philomena/posts/{elasticsearch_index.ex => search_index.ex} (96%) rename lib/philomena/reports/{elasticsearch_index.ex => search_index.ex} (96%) delete mode 100644 lib/philomena/search/string.ex rename lib/philomena/tags/{elasticsearch_index.ex => search_index.ex} (95%) create mode 100644 lib/philomena_query/batch.ex rename lib/{philomena/search => philomena_query/parse}/bool_parser.ex (84%) rename lib/{philomena/search => philomena_query/parse}/date_parser.ex (96%) rename lib/{philomena/search => philomena_query/parse}/evaluator.ex (90%) rename lib/{philomena/search => philomena_query/parse}/float_parser.ex (81%) rename lib/{philomena/search => philomena_query/parse}/helpers.ex (90%) rename lib/{philomena/search => philomena_query/parse}/int_parser.ex (75%) rename lib/{philomena/search => philomena_query/parse}/ip_parser.ex (98%) rename lib/{philomena/search => philomena_query/parse}/lexer.ex (94%) rename lib/{philomena/search => philomena_query/parse}/literal_parser.ex (89%) rename lib/{philomena/search => philomena_query/parse}/ngram_parser.ex (51%) rename lib/{philomena/search => philomena_query/parse}/parser.ex (73%) create mode 100644 lib/philomena_query/parse/string.ex rename lib/{philomena/search => philomena_query/parse}/term_range_parser.ex (90%) rename lib/{philomena => philomena_query}/relative_date.ex (63%) create mode 100644 lib/philomena_query/search.ex rename lib/{philomena/elasticsearch_index.ex => philomena_query/search_index.ex} (66%) diff --git a/README.md b/README.md index 2cf81be3..966d5e06 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Once the application has started, navigate to http://localhost:8080 and login wi If you are running Docker on Windows and the application crashes immediately upon startup, please ensure that `autocrlf` is set to `false` in your Git config, and then re-clone the repository. Additionally, it is recommended that you allocate at least 4GB of RAM to your Docker VM. -If you run into an Elasticsearch bootstrap error, you may need to increase your `max_map_count` on the host as follows: +If you run into an OpenSearch bootstrap error, you may need to increase your `max_map_count` on the host as follows: ``` sudo sysctl -w vm.max_map_count=262144 ``` diff --git a/docker/app/run-test b/docker/app/run-test index 679a438c..e55fa838 100755 --- a/docker/app/run-test +++ b/docker/app/run-test @@ -5,9 +5,9 @@ export MIX_ENV=test # Always install mix dependencies (cd /srv/philomena && mix deps.get) -# Sleep to allow Elasticsearch to finish initializing +# Sleep to allow OpenSearch to finish initializing # if it's not done doing whatever it does yet -echo -n "Waiting for Elasticsearch" +echo -n "Waiting for OpenSearch" until wget -qO - opensearch:9200; do echo -n "." diff --git a/lib/mix/tasks/reindex_all.ex b/lib/mix/tasks/reindex_all.ex index 7208c27a..c1f24114 100644 --- a/lib/mix/tasks/reindex_all.ex +++ b/lib/mix/tasks/reindex_all.ex @@ -1,7 +1,7 @@ defmodule Mix.Tasks.ReindexAll do use Mix.Task - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.{ Comments.Comment, @@ -27,7 +27,7 @@ defmodule Mix.Tasks.ReindexAll do {Filters, Filter} ] - @shortdoc "Destroys and recreates all Elasticsearch indices." + @shortdoc "Destroys and recreates all OpenSearch indices." @requirements ["app.start"] @impl Mix.Task def run(args) do @@ -38,23 +38,23 @@ defmodule Mix.Tasks.ReindexAll do @indices |> Enum.map(fn {context, schema} -> Task.async(fn -> - Elasticsearch.delete_index!(schema) - Elasticsearch.create_index!(schema) + Search.delete_index!(schema) + Search.create_index!(schema) - Elasticsearch.reindex(preload(schema, ^context.indexing_preloads()), schema) + Search.reindex(preload(schema, ^context.indexing_preloads()), schema) end) end) |> Task.await_many(:infinity) # Reports are a bit special - Elasticsearch.delete_index!(Report) - Elasticsearch.create_index!(Report) + Search.delete_index!(Report) + Search.create_index!(Report) Report |> preload([:user, :admin]) |> Repo.all() |> Polymorphic.load_polymorphic(reportable: [reportable_id: :reportable_type]) - |> Enum.map(&Elasticsearch.index_document(&1, Report)) + |> Enum.map(&Search.index_document(&1, Report)) end end diff --git a/lib/mix/tasks/upload_to_s3.ex b/lib/mix/tasks/upload_to_s3.ex index 0155514f..ae6b22fd 100644 --- a/lib/mix/tasks/upload_to_s3.ex +++ b/lib/mix/tasks/upload_to_s3.ex @@ -11,7 +11,7 @@ defmodule Mix.Tasks.UploadToS3 do alias Philomena.Images.Thumbnailer alias Philomena.Objects - alias Philomena.Batch + alias PhilomenaQuery.Batch import Ecto.Query @shortdoc "Dumps existing image files to S3 storage backend" diff --git a/lib/philomena/batch.ex b/lib/philomena/batch.ex deleted file mode 100644 index 29d03582..00000000 --- a/lib/philomena/batch.ex +++ /dev/null @@ -1,56 +0,0 @@ -defmodule Philomena.Batch do - alias Philomena.Repo - import Ecto.Query - - @doc """ - Load records from the given queryable in batches, to avoid locking. - - Valid options: - * :batch_size - * :id_field - """ - def record_batches(queryable, opts \\ [], callback) do - query_batches(queryable, opts, &callback.(Repo.all(&1))) - end - - @doc """ - Load queries from the given queryable in batches, to avoid locking. - - Valid options: - * :batch_size - * :id_field - """ - def query_batches(queryable, opts \\ [], callback) do - ids = load_ids(queryable, -1, opts) - - query_batches(queryable, opts, callback, ids) - end - - defp query_batches(_queryable, _opts, _callback, []), do: [] - - defp query_batches(queryable, opts, callback, ids) do - id_field = Keyword.get(opts, :id_field, :id) - - queryable - |> where([m], field(m, ^id_field) in ^ids) - |> callback.() - - ids = load_ids(queryable, Enum.max(ids), opts) - - query_batches(queryable, opts, callback, ids) - end - - defp load_ids(queryable, max_id, opts) do - id_field = Keyword.get(opts, :id_field, :id) - batch_size = Keyword.get(opts, :batch_size, 1000) - - queryable - |> exclude(:preload) - |> exclude(:order_by) - |> order_by(asc: ^id_field) - |> where([m], field(m, ^id_field) > ^max_id) - |> select([m], field(m, ^id_field)) - |> limit(^batch_size) - |> Repo.all() - end -end diff --git a/lib/philomena/comments.ex b/lib/philomena/comments.ex index b353a181..1d606ff0 100644 --- a/lib/philomena/comments.ex +++ b/lib/philomena/comments.ex @@ -7,11 +7,11 @@ defmodule Philomena.Comments do alias Ecto.Multi alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Reports.Report alias Philomena.UserStatistics alias Philomena.Comments.Comment - alias Philomena.Comments.ElasticsearchIndex, as: CommentIndex + alias Philomena.Comments.SearchIndex, as: CommentIndex alias Philomena.IndexWorker alias Philomena.Images.Image alias Philomena.Images @@ -265,7 +265,7 @@ defmodule Philomena.Comments do def user_name_reindex(old_name, new_name) do data = CommentIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Comment, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Comment, data.query, data.set_replacements, data.replacements) end def reindex_comment(%Comment{} = comment) do @@ -288,6 +288,6 @@ defmodule Philomena.Comments do Comment |> preload(^indexing_preloads()) |> where([c], field(c, ^column) in ^condition) - |> Elasticsearch.reindex(Comment) + |> Search.reindex(Comment) end end diff --git a/lib/philomena/comments/query.ex b/lib/philomena/comments/query.ex index 21df1ed1..6b2bea42 100644 --- a/lib/philomena/comments/query.ex +++ b/lib/philomena/comments/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Comments.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp user_id_transform(_ctx, data) do case Integer.parse(data) do diff --git a/lib/philomena/comments/elasticsearch_index.ex b/lib/philomena/comments/search_index.ex similarity index 94% rename from lib/philomena/comments/elasticsearch_index.ex rename to lib/philomena/comments/search_index.ex index 82d65bcc..b08b1594 100644 --- a/lib/philomena/comments/elasticsearch_index.ex +++ b/lib/philomena/comments/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Comments.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Comments.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/elasticsearch.ex b/lib/philomena/elasticsearch.ex deleted file mode 100644 index a85bfc50..00000000 --- a/lib/philomena/elasticsearch.ex +++ /dev/null @@ -1,294 +0,0 @@ -defmodule Philomena.Elasticsearch do - alias Philomena.Batch - alias Philomena.Repo - require Logger - import Ecto.Query - import Elastix.HTTP - - alias Philomena.Comments.Comment - alias Philomena.Galleries.Gallery - alias Philomena.Images.Image - alias Philomena.Posts.Post - alias Philomena.Reports.Report - alias Philomena.Tags.Tag - alias Philomena.Filters.Filter - - alias Philomena.Comments.ElasticsearchIndex, as: CommentIndex - alias Philomena.Galleries.ElasticsearchIndex, as: GalleryIndex - alias Philomena.Images.ElasticsearchIndex, as: ImageIndex - alias Philomena.Posts.ElasticsearchIndex, as: PostIndex - alias Philomena.Reports.ElasticsearchIndex, as: ReportIndex - alias Philomena.Tags.ElasticsearchIndex, as: TagIndex - alias Philomena.Filters.ElasticsearchIndex, as: FilterIndex - - defp index_for(Comment), do: CommentIndex - defp index_for(Gallery), do: GalleryIndex - defp index_for(Image), do: ImageIndex - defp index_for(Post), do: PostIndex - defp index_for(Report), do: ReportIndex - defp index_for(Tag), do: TagIndex - defp index_for(Filter), do: FilterIndex - - defp elastic_url do - Application.get_env(:philomena, :opensearch_url) - end - - def create_index!(module) do - index = index_for(module) - - Elastix.Index.create( - elastic_url(), - index.index_name(), - index.mapping() - ) - end - - def delete_index!(module) do - index = index_for(module) - - Elastix.Index.delete(elastic_url(), index.index_name()) - end - - def update_mapping!(module) do - index = index_for(module) - - index_name = index.index_name() - mapping = index.mapping().mappings.properties - - Elastix.Mapping.put(elastic_url(), index_name, "_doc", %{properties: mapping}, - include_type_name: true - ) - end - - def index_document(doc, module) do - index = index_for(module) - data = index.as_json(doc) - - Elastix.Document.index( - elastic_url(), - index.index_name(), - "_doc", - data.id, - data - ) - end - - def delete_document(id, module) do - index = index_for(module) - - Elastix.Document.delete( - elastic_url(), - index.index_name(), - "_doc", - id - ) - end - - def reindex(queryable, module, opts \\ []) do - index = index_for(module) - - Batch.record_batches(queryable, opts, fn records -> - lines = - Enum.flat_map(records, fn record -> - doc = index.as_json(record) - - [ - %{index: %{_index: index.index_name(), _id: doc.id}}, - doc - ] - end) - - Elastix.Bulk.post( - elastic_url(), - lines, - index: index.index_name(), - httpoison_options: [timeout: 30_000] - ) - end) - end - - def update_by_query(module, query_body, set_replacements, replacements) do - index = index_for(module) - - url = - elastic_url() - |> prepare_url([index.index_name(), "_update_by_query"]) - |> append_query_string(%{conflicts: "proceed", wait_for_completion: "false"}) - - # Elasticsearch "Painless" scripting language - script = """ - // Replace values in "sets" (arrays in the source document) - for (int i = 0; i < params.set_replacements.length; ++i) { - def replacement = params.set_replacements[i]; - def path = replacement.path; - def old_value = replacement.old; - def new_value = replacement.new; - def reference = ctx._source; - - for (int j = 0; j < path.length; ++j) { - reference = reference[path[j]]; - } - - for (int j = 0; j < reference.length; ++j) { - if (reference[j].equals(old_value)) { - reference[j] = new_value; - } - } - } - - // Replace values in standalone fields - for (int i = 0; i < params.replacements.length; ++i) { - def replacement = params.replacements[i]; - def path = replacement.path; - def old_value = replacement.old; - def new_value = replacement.new; - def reference = ctx._source; - - // A little bit more complicated: go up to the last one before it - // so that the value can actually be replaced - - for (int j = 0; j < path.length - 1; ++j) { - reference = reference[path[j]]; - } - - if (reference[path[path.length - 1]] != null && reference[path[path.length - 1]].equals(old_value)) { - reference[path[path.length - 1]] = new_value; - } - } - """ - - body = - Jason.encode!(%{ - script: %{ - source: script, - params: %{ - set_replacements: set_replacements, - replacements: replacements - } - }, - query: query_body - }) - - {:ok, %{status_code: 200}} = Elastix.HTTP.post(url, body) - end - - def search(module, query_body) do - index = index_for(module) - - {:ok, %{body: results, status_code: 200}} = - Elastix.Search.search( - elastic_url(), - index.index_name(), - [], - query_body - ) - - results - end - - def msearch(definitions) do - msearch_body = - Enum.flat_map(definitions, fn def -> - [ - %{index: index_for(def.module).index_name()}, - def.body - ] - end) - - {:ok, %{body: results, status_code: 200}} = - Elastix.Search.search( - elastic_url(), - "_all", - [], - msearch_body - ) - - results["responses"] - end - - def search_definition(module, elastic_query, pagination_params \\ %{}) do - page_number = pagination_params[:page_number] || 1 - page_size = pagination_params[:page_size] || 25 - - elastic_query = - Map.merge(elastic_query, %{ - from: (page_number - 1) * page_size, - size: page_size, - _source: false, - track_total_hits: true - }) - - %{ - module: module, - body: elastic_query, - page_number: page_number, - page_size: page_size - } - end - - defp process_results(results, definition) do - time = results["took"] - count = results["hits"]["total"]["value"] - entries = Enum.map(results["hits"]["hits"], &{String.to_integer(&1["_id"]), &1}) - - Logger.debug("[Elasticsearch] Query took #{time}ms") - Logger.debug("[Elasticsearch] #{Jason.encode!(definition.body)}") - - %Scrivener.Page{ - entries: entries, - page_number: definition.page_number, - page_size: definition.page_size, - total_entries: count, - total_pages: div(count + definition.page_size - 1, definition.page_size) - } - end - - def search_results(definition) do - process_results(search(definition.module, definition.body), definition) - end - - def msearch_results(definitions) do - Enum.map(Enum.zip(msearch(definitions), definitions), fn {result, definition} -> - process_results(result, definition) - end) - end - - defp load_records_from_results(results, ecto_queries) do - Enum.map(Enum.zip(results, ecto_queries), fn {page, ecto_query} -> - {ids, hits} = Enum.unzip(page.entries) - - records = - ecto_query - |> where([m], m.id in ^ids) - |> Repo.all() - |> Enum.sort_by(&Enum.find_index(ids, fn el -> el == &1.id end)) - - %{page | entries: Enum.zip(records, hits)} - end) - end - - def search_records_with_hits(definition, ecto_query) do - [page] = load_records_from_results([search_results(definition)], [ecto_query]) - - page - end - - def msearch_records_with_hits(definitions, ecto_queries) do - load_records_from_results(msearch_results(definitions), ecto_queries) - end - - def search_records(definition, ecto_query) do - page = search_records_with_hits(definition, ecto_query) - {records, _hits} = Enum.unzip(page.entries) - - %{page | entries: records} - end - - def msearch_records(definitions, ecto_queries) do - Enum.map(load_records_from_results(msearch_results(definitions), ecto_queries), fn page -> - {records, _hits} = Enum.unzip(page.entries) - - %{page | entries: records} - end) - end -end diff --git a/lib/philomena/filters.ex b/lib/philomena/filters.ex index 363926f9..6ff7daa4 100644 --- a/lib/philomena/filters.ex +++ b/lib/philomena/filters.ex @@ -7,8 +7,8 @@ defmodule Philomena.Filters do alias Philomena.Repo alias Philomena.Filters.Filter - alias Philomena.Elasticsearch - alias Philomena.Filters.ElasticsearchIndex, as: FilterIndex + alias PhilomenaQuery.Search + alias Philomena.Filters.SearchIndex, as: FilterIndex alias Philomena.IndexWorker @doc """ @@ -223,7 +223,7 @@ defmodule Philomena.Filters do def user_name_reindex(old_name, new_name) do data = FilterIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Filter, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Filter, data.query, data.set_replacements, data.replacements) end def reindex_filter(%Filter{} = filter) do @@ -233,7 +233,7 @@ defmodule Philomena.Filters do end def unindex_filter(%Filter{} = filter) do - Elasticsearch.delete_document(filter.id, Filter) + Search.delete_document(filter.id, Filter) filter end @@ -246,6 +246,6 @@ defmodule Philomena.Filters do Filter |> preload(^indexing_preloads()) |> where([f], field(f, ^column) in ^condition) - |> Elasticsearch.reindex(Filter) + |> Search.reindex(Filter) end end diff --git a/lib/philomena/filters/query.ex b/lib/philomena/filters/query.ex index d5cc0e4e..adf53b09 100644 --- a/lib/philomena/filters/query.ex +++ b/lib/philomena/filters/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Filters.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp user_my_transform(%{user: %{id: id}}, "filters"), do: {:ok, %{term: %{user_id: id}}} diff --git a/lib/philomena/filters/elasticsearch_index.ex b/lib/philomena/filters/search_index.ex similarity index 95% rename from lib/philomena/filters/elasticsearch_index.ex rename to lib/philomena/filters/search_index.ex index 94f60f15..12a59385 100644 --- a/lib/philomena/filters/elasticsearch_index.ex +++ b/lib/philomena/filters/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Filters.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Filters.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/galleries.ex b/lib/philomena/galleries.ex index d3c060ce..b0a61bd9 100644 --- a/lib/philomena/galleries.ex +++ b/lib/philomena/galleries.ex @@ -7,10 +7,10 @@ defmodule Philomena.Galleries do alias Ecto.Multi alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Galleries.Gallery alias Philomena.Galleries.Interaction - alias Philomena.Galleries.ElasticsearchIndex, as: GalleryIndex + alias Philomena.Galleries.SearchIndex, as: GalleryIndex alias Philomena.IndexWorker alias Philomena.GalleryReorderWorker alias Philomena.Notifications @@ -135,7 +135,7 @@ defmodule Philomena.Galleries do def user_name_reindex(old_name, new_name) do data = GalleryIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Gallery, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Gallery, data.query, data.set_replacements, data.replacements) end defp reindex_after_update({:ok, gallery}) do @@ -155,7 +155,7 @@ defmodule Philomena.Galleries do end def unindex_gallery(%Gallery{} = gallery) do - Elasticsearch.delete_document(gallery.id, Gallery) + Search.delete_document(gallery.id, Gallery) gallery end @@ -168,7 +168,7 @@ defmodule Philomena.Galleries do Gallery |> preload(^indexing_preloads()) |> where([g], field(g, ^column) in ^condition) - |> Elasticsearch.reindex(Gallery) + |> Search.reindex(Gallery) end def add_image_to_gallery(gallery, image) do diff --git a/lib/philomena/galleries/query.ex b/lib/philomena/galleries/query.ex index 8fd38c31..9baad469 100644 --- a/lib/philomena/galleries/query.ex +++ b/lib/philomena/galleries/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Galleries.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp fields do [ diff --git a/lib/philomena/galleries/elasticsearch_index.ex b/lib/philomena/galleries/search_index.ex similarity index 94% rename from lib/philomena/galleries/elasticsearch_index.ex rename to lib/philomena/galleries/search_index.ex index c2071b45..37485b20 100644 --- a/lib/philomena/galleries/elasticsearch_index.ex +++ b/lib/philomena/galleries/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Galleries.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Galleries.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/images.ex b/lib/philomena/images.ex index fe2e0ac2..95788ad5 100644 --- a/lib/philomena/images.ex +++ b/lib/philomena/images.ex @@ -9,7 +9,7 @@ defmodule Philomena.Images do alias Ecto.Multi alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.ThumbnailWorker alias Philomena.ImagePurgeWorker alias Philomena.DuplicateReports.DuplicateReport @@ -18,7 +18,7 @@ defmodule Philomena.Images do alias Philomena.Images.Tagging alias Philomena.Images.Thumbnailer alias Philomena.Images.Source - alias Philomena.Images.ElasticsearchIndex, as: ImageIndex + alias Philomena.Images.SearchIndex, as: ImageIndex alias Philomena.IndexWorker alias Philomena.ImageFeatures.ImageFeature alias Philomena.SourceChanges.SourceChange @@ -812,7 +812,7 @@ defmodule Philomena.Images do def user_name_reindex(old_name, new_name) do data = ImageIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Image, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Image, data.query, data.set_replacements, data.replacements) end def reindex_image(%Image{} = image) do @@ -845,7 +845,7 @@ defmodule Philomena.Images do Image |> preload(^indexing_preloads()) |> where([i], field(i, ^column) in ^condition) - |> Elasticsearch.reindex(Image) + |> Search.reindex(Image) end def purge_files(image, hidden_key) do diff --git a/lib/philomena/images/query.ex b/lib/philomena/images/query.ex index db00a20b..575c6e71 100644 --- a/lib/philomena/images/query.ex +++ b/lib/philomena/images/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Images.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser alias Philomena.Repo defp gallery_id_transform(_ctx, value) do @@ -60,7 +60,7 @@ defmodule Philomena.Images.Query do do: {:error, "Unknown `my' value."} defp invalid_filter_guard(ctx, search_string) do - case parse(user_fields(), ctx, Philomena.Search.String.normalize(search_string)) do + case parse(user_fields(), ctx, PhilomenaQuery.Parse.String.normalize(search_string)) do {:ok, query} -> query _error -> %{match_all: %{}} end diff --git a/lib/philomena/images/elasticsearch_index.ex b/lib/philomena/images/search_index.ex similarity index 98% rename from lib/philomena/images/elasticsearch_index.ex rename to lib/philomena/images/search_index.ex index 0e5fb296..9fb29dc2 100644 --- a/lib/philomena/images/elasticsearch_index.ex +++ b/lib/philomena/images/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Images.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Images.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/posts.ex b/lib/philomena/posts.ex index 7a7fb7a5..16795e6c 100644 --- a/lib/philomena/posts.ex +++ b/lib/philomena/posts.ex @@ -7,12 +7,12 @@ defmodule Philomena.Posts do alias Ecto.Multi alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Topics.Topic alias Philomena.Topics alias Philomena.UserStatistics alias Philomena.Posts.Post - alias Philomena.Posts.ElasticsearchIndex, as: PostIndex + alias Philomena.Posts.SearchIndex, as: PostIndex alias Philomena.IndexWorker alias Philomena.Forums.Forum alias Philomena.Notifications @@ -309,7 +309,7 @@ defmodule Philomena.Posts do def user_name_reindex(old_name, new_name) do data = PostIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Post, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Post, data.query, data.set_replacements, data.replacements) end defp reindex_after_update({:ok, post}) do @@ -336,6 +336,6 @@ defmodule Philomena.Posts do Post |> preload(^indexing_preloads()) |> where([p], field(p, ^column) in ^condition) - |> Elasticsearch.reindex(Post) + |> Search.reindex(Post) end end diff --git a/lib/philomena/posts/query.ex b/lib/philomena/posts/query.ex index 463216b9..27773776 100644 --- a/lib/philomena/posts/query.ex +++ b/lib/philomena/posts/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Posts.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp user_id_transform(_ctx, data) do case Integer.parse(data) do diff --git a/lib/philomena/posts/elasticsearch_index.ex b/lib/philomena/posts/search_index.ex similarity index 96% rename from lib/philomena/posts/elasticsearch_index.ex rename to lib/philomena/posts/search_index.ex index 43a14284..b0fdb94c 100644 --- a/lib/philomena/posts/elasticsearch_index.ex +++ b/lib/philomena/posts/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Posts.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Posts.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/reports.ex b/lib/philomena/reports.ex index 25a04e74..1639929d 100644 --- a/lib/philomena/reports.ex +++ b/lib/philomena/reports.ex @@ -6,9 +6,9 @@ defmodule Philomena.Reports do import Ecto.Query, warn: false alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Reports.Report - alias Philomena.Reports.ElasticsearchIndex, as: ReportIndex + alias Philomena.Reports.SearchIndex, as: ReportIndex alias Philomena.IndexWorker alias Philomena.Polymorphic @@ -152,7 +152,7 @@ defmodule Philomena.Reports do def user_name_reindex(old_name, new_name) do data = ReportIndex.user_name_update_by_query(old_name, new_name) - Elasticsearch.update_by_query(Report, data.query, data.set_replacements, data.replacements) + Search.update_by_query(Report, data.query, data.set_replacements, data.replacements) end defp reindex_after_update({:ok, report}) do @@ -183,7 +183,7 @@ defmodule Philomena.Reports do |> preload([:user, :admin]) |> Repo.all() |> Polymorphic.load_polymorphic(reportable: [reportable_id: :reportable_type]) - |> Enum.map(&Elasticsearch.index_document(&1, Report)) + |> Enum.map(&Search.index_document(&1, Report)) end def count_reports(user) do diff --git a/lib/philomena/reports/query.ex b/lib/philomena/reports/query.ex index c085a604..d5adc2cc 100644 --- a/lib/philomena/reports/query.ex +++ b/lib/philomena/reports/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Reports.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp fields do [ diff --git a/lib/philomena/reports/elasticsearch_index.ex b/lib/philomena/reports/search_index.ex similarity index 96% rename from lib/philomena/reports/elasticsearch_index.ex rename to lib/philomena/reports/search_index.ex index bafcf673..15a08708 100644 --- a/lib/philomena/reports/elasticsearch_index.ex +++ b/lib/philomena/reports/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Reports.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Reports.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/schema/search.ex b/lib/philomena/schema/search.ex index 47887b75..9b4e7e08 100644 --- a/lib/philomena/schema/search.ex +++ b/lib/philomena/schema/search.ex @@ -1,6 +1,6 @@ defmodule Philomena.Schema.Search do alias Philomena.Images.Query - alias Philomena.Search.String + alias PhilomenaQuery.Parse.String import Ecto.Changeset def validate_search(changeset, field, user, watched \\ false) do diff --git a/lib/philomena/schema/time.ex b/lib/philomena/schema/time.ex index af36538a..fff11419 100644 --- a/lib/philomena/schema/time.ex +++ b/lib/philomena/schema/time.ex @@ -1,5 +1,5 @@ defmodule Philomena.Schema.Time do - alias Philomena.RelativeDate + alias PhilomenaQuery.RelativeDate import Ecto.Changeset def assign_time(changeset, field, target_field) do diff --git a/lib/philomena/search/string.ex b/lib/philomena/search/string.ex deleted file mode 100644 index a7f1441c..00000000 --- a/lib/philomena/search/string.ex +++ /dev/null @@ -1,13 +0,0 @@ -defmodule Philomena.Search.String do - def normalize(nil) do - "" - end - - def normalize(str) do - str - |> String.replace("\r", "") - |> String.split("\n", trim: true) - |> Enum.map(fn s -> "(#{s})" end) - |> Enum.join(" || ") - end -end diff --git a/lib/philomena/tags.ex b/lib/philomena/tags.ex index d4d938b7..1f6e80c6 100644 --- a/lib/philomena/tags.ex +++ b/lib/philomena/tags.ex @@ -6,7 +6,7 @@ defmodule Philomena.Tags do import Ecto.Query, warn: false alias Philomena.Repo - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.IndexWorker alias Philomena.TagAliasWorker alias Philomena.TagUnaliasWorker @@ -194,12 +194,12 @@ defmodule Philomena.Tags do {:ok, tag} = Repo.delete(tag) - Elasticsearch.delete_document(tag.id, Tag) + Search.delete_document(tag.id, Tag) Image |> where([i], i.id in ^image_ids) |> preload(^Images.indexing_preloads()) - |> Elasticsearch.reindex(Image) + |> Search.reindex(Image) end def alias_tag(%Tag{} = tag, attrs) do @@ -301,13 +301,13 @@ defmodule Philomena.Tags do |> join(:inner, [i], _ in assoc(i, :tags)) |> where([_i, t], t.id == ^tag.id) |> preload(^Images.indexing_preloads()) - |> Elasticsearch.reindex(Image) + |> Search.reindex(Image) Filter |> where([f], fragment("? @> ARRAY[?]::integer[]", f.hidden_tag_ids, ^tag.id)) |> or_where([f], fragment("? @> ARRAY[?]::integer[]", f.spoilered_tag_ids, ^tag.id)) |> preload(^Filters.indexing_preloads()) - |> Elasticsearch.reindex(Filter) + |> Search.reindex(Filter) end def unalias_tag(%Tag{} = tag) do @@ -416,7 +416,7 @@ defmodule Philomena.Tags do Tag |> preload(^indexing_preloads()) |> where([t], field(t, ^column) in ^condition) - |> Elasticsearch.reindex(Tag) + |> Search.reindex(Tag) end alias Philomena.Tags.Implication diff --git a/lib/philomena/tags/query.ex b/lib/philomena/tags/query.ex index fd905775..5bfd2126 100644 --- a/lib/philomena/tags/query.ex +++ b/lib/philomena/tags/query.ex @@ -1,5 +1,5 @@ defmodule Philomena.Tags.Query do - alias Philomena.Search.Parser + alias PhilomenaQuery.Parse.Parser defp fields do [ diff --git a/lib/philomena/tags/elasticsearch_index.ex b/lib/philomena/tags/search_index.ex similarity index 95% rename from lib/philomena/tags/elasticsearch_index.ex rename to lib/philomena/tags/search_index.ex index 86a7e11e..be592d36 100644 --- a/lib/philomena/tags/elasticsearch_index.ex +++ b/lib/philomena/tags/search_index.ex @@ -1,5 +1,5 @@ -defmodule Philomena.Tags.ElasticsearchIndex do - @behaviour Philomena.ElasticsearchIndex +defmodule Philomena.Tags.SearchIndex do + @behaviour PhilomenaQuery.SearchIndex @impl true def index_name do diff --git a/lib/philomena/user_downvote_wipe.ex b/lib/philomena/user_downvote_wipe.ex index dd796c44..6fb217fe 100644 --- a/lib/philomena/user_downvote_wipe.ex +++ b/lib/philomena/user_downvote_wipe.ex @@ -1,6 +1,6 @@ defmodule Philomena.UserDownvoteWipe do - alias Philomena.Batch - alias Philomena.Elasticsearch + alias PhilomenaQuery.Batch + alias PhilomenaQuery.Search alias Philomena.Users alias Philomena.Users.User alias Philomena.Images.Image @@ -63,7 +63,7 @@ defmodule Philomena.UserDownvoteWipe do Image |> where([i], i.id in ^image_ids) |> preload(^Images.indexing_preloads()) - |> Elasticsearch.reindex(Image) + |> Search.reindex(Image) # allow time for indexing to catch up :timer.sleep(:timer.seconds(10)) diff --git a/lib/philomena/workers/index_worker.ex b/lib/philomena/workers/index_worker.ex index 3544d122..91ceaa3f 100644 --- a/lib/philomena/workers/index_worker.ex +++ b/lib/philomena/workers/index_worker.ex @@ -15,7 +15,7 @@ defmodule Philomena.IndexWorker do # Image # |> preload(^indexing_preloads()) # |> where([i], field(i, ^column) in ^condition) - # |> Elasticsearch.reindex(Image) + # |> Search.reindex(Image) # end # def perform(module, column, condition) do diff --git a/lib/philomena/workers/tag_change_revert_worker.ex b/lib/philomena/workers/tag_change_revert_worker.ex index 774bfd6a..519b8404 100644 --- a/lib/philomena/workers/tag_change_revert_worker.ex +++ b/lib/philomena/workers/tag_change_revert_worker.ex @@ -1,7 +1,7 @@ defmodule Philomena.TagChangeRevertWorker do alias Philomena.TagChanges.TagChange alias Philomena.TagChanges - alias Philomena.Batch + alias PhilomenaQuery.Batch alias Philomena.Repo import Ecto.Query diff --git a/lib/philomena_query/batch.ex b/lib/philomena_query/batch.ex new file mode 100644 index 00000000..ce78cb6a --- /dev/null +++ b/lib/philomena_query/batch.ex @@ -0,0 +1,111 @@ +defmodule PhilomenaQuery.Batch do + @moduledoc """ + Locking-reduced database batch operations. + + These operations are non-transactional by their very nature. This prevents inadvertent + downtimes due to blocking, but can result in consistency errors in the database, + and so should be used sparingly. + + They are best suited for when large numbers of rows can be expected to be processed, + as doing so may otherwise result in Ecto timing out the query. + """ + + alias Philomena.Repo + import Ecto.Query + + @type queryable :: any() + + @type batch_size :: {:batch_size, integer()} + @type id_field :: {:id_field, atom()} + @type batch_options :: [batch_size() | id_field()] + + @type record_batch_callback :: ([struct()] -> any()) + @type query_batch_callback :: ([Ecto.Query.t()] -> any()) + + @doc """ + Execute a callback with lists of schema structures on a queryable, + using batches to avoid locking. + + Valid options: + * `batch_size` (integer) - the number of records to load per batch + * `id_field` (atom) - the name of the field containing the ID + + ## Example + + queryable = from i in Image, where: i.image_width >= 1920 + + cb = fn images -> + Enum.each(images, &IO.inspect(&1.id)) + end + + PhilomenaQuery.Batch.record_batches(queryable, cb) + + """ + @spec record_batches(queryable(), batch_options(), record_batch_callback()) :: [] + def record_batches(queryable, opts \\ [], callback) do + query_batches(queryable, opts, &callback.(Repo.all(&1))) + end + + @doc """ + Execute a callback with bulk queries on a queryable, using batches to avoid locking. + + Valid options: + * `batch_size` (integer) - the number of records to load per batch + * `id_field` (atom) - the name of the field containing the ID + + > #### Info {: .info} + > + > If you are looking to receive schema structures (e.g., you are querying for `Image`s, + > and you want to receive `Image` objects, then use `record_batches/3` instead. + + An `m:Ecto.Query` which selects all IDs in the current batch is passed into the callback + during each invocation. + + ## Example + + queryable = from ui in ImageVote, where: ui.user_id == 1234 + + opts = [id_field: :image_id] + + cb = fn bulk_query -> + Repo.delete_all(bulk_query) + end + + PhilomenaQuery.Batch.query_batches(queryable, opts, cb) + + """ + @spec query_batches(queryable(), batch_options(), query_batch_callback()) :: [] + def query_batches(queryable, opts \\ [], callback) do + ids = load_ids(queryable, -1, opts) + + query_batches(queryable, opts, callback, ids) + end + + defp query_batches(_queryable, _opts, _callback, []), do: [] + + defp query_batches(queryable, opts, callback, ids) do + id_field = Keyword.get(opts, :id_field, :id) + + queryable + |> where([m], field(m, ^id_field) in ^ids) + |> callback.() + + ids = load_ids(queryable, Enum.max(ids), opts) + + query_batches(queryable, opts, callback, ids) + end + + defp load_ids(queryable, max_id, opts) do + id_field = Keyword.get(opts, :id_field, :id) + batch_size = Keyword.get(opts, :batch_size, 1000) + + queryable + |> exclude(:preload) + |> exclude(:order_by) + |> order_by(asc: ^id_field) + |> where([m], field(m, ^id_field) > ^max_id) + |> select([m], field(m, ^id_field)) + |> limit(^batch_size) + |> Repo.all() + end +end diff --git a/lib/philomena/search/bool_parser.ex b/lib/philomena_query/parse/bool_parser.ex similarity index 84% rename from lib/philomena/search/bool_parser.ex rename to lib/philomena_query/parse/bool_parser.ex index bb389096..c897f7e3 100644 --- a/lib/philomena/search/bool_parser.ex +++ b/lib/philomena_query/parse/bool_parser.ex @@ -1,4 +1,6 @@ -defmodule Philomena.Search.BoolParser do +defmodule PhilomenaQuery.Parse.BoolParser do + @moduledoc false + import NimbleParsec space = diff --git a/lib/philomena/search/date_parser.ex b/lib/philomena_query/parse/date_parser.ex similarity index 96% rename from lib/philomena/search/date_parser.ex rename to lib/philomena_query/parse/date_parser.ex index 5c8a8cb1..db590af0 100644 --- a/lib/philomena/search/date_parser.ex +++ b/lib/philomena_query/parse/date_parser.ex @@ -1,4 +1,6 @@ -defmodule Philomena.Search.DateParser do +defmodule PhilomenaQuery.Parse.DateParser do + @moduledoc false + import NimbleParsec @dialyzer [:no_match, :no_unused] @@ -100,9 +102,9 @@ defmodule Philomena.Search.DateParser do end defp relative_datetime(_rest, [count, scale], context, _line, _offset) do - millenium_seconds = 31_536_000_000 + millennium_seconds = 31_536_000_000 - case count * scale <= millenium_seconds do + case count * scale <= millennium_seconds do true -> now = DateTime.utc_now() @@ -113,7 +115,7 @@ defmodule Philomena.Search.DateParser do _false -> {:error, - "invalid date format in input; requested time #{count * scale} seconds is over a millenium ago"} + "invalid date format in input; requested time #{count * scale} seconds is over a millennium ago"} end end diff --git a/lib/philomena/search/evaluator.ex b/lib/philomena_query/parse/evaluator.ex similarity index 90% rename from lib/philomena/search/evaluator.ex rename to lib/philomena_query/parse/evaluator.ex index 25ef2337..6cb6d6cc 100644 --- a/lib/philomena/search/evaluator.ex +++ b/lib/philomena_query/parse/evaluator.ex @@ -1,6 +1,25 @@ -defmodule Philomena.Search.Evaluator do +defmodule PhilomenaQuery.Parse.Evaluator do + @moduledoc """ + Tools to evaluate whether a search query matches a document. + """ + # TODO: rethink the necessity of this module. - # Can we do this in elasticsearch instead? + # Can we do this in the search engine instead? + + @doc """ + Check whether a hit is matched by a query. + + - `doc` - a document definition. This could be returned by the index's `as_json/1` function. + - `query` - a search query + + ## Example + + iex> Evaluator.hits?(def, %{term: %{tags: "safe"}}) + true + + """ + @spec hits?(map(), map()) :: boolean() + def hits?(doc, query) def hits?(doc, %{bool: bool_query}) do must(doc, bool_query[:must]) and @@ -101,7 +120,7 @@ defmodule Philomena.Search.Evaluator do defp atomify(atom) when is_atom(atom), do: atom defp atomify(string) when is_binary(string), do: String.to_existing_atom(string) - def levenshtein(s1, s2) do + defp levenshtein(s1, s2) do {dist, _lookup} = levenshtein_lookup(s1, s2, %{}, 0) dist diff --git a/lib/philomena/search/float_parser.ex b/lib/philomena_query/parse/float_parser.ex similarity index 81% rename from lib/philomena/search/float_parser.ex rename to lib/philomena_query/parse/float_parser.ex index 8b5321e2..2fa253fc 100644 --- a/lib/philomena/search/float_parser.ex +++ b/lib/philomena_query/parse/float_parser.ex @@ -1,8 +1,10 @@ -defmodule Philomena.Search.FloatParser do +defmodule PhilomenaQuery.Parse.FloatParser do + @moduledoc false + import NimbleParsec - defp to_number(input), do: Philomena.Search.Helpers.to_number(input) - defp range(input), do: Philomena.Search.Helpers.range(input) + defp to_number(input), do: PhilomenaQuery.Parse.Helpers.to_number(input) + defp range(input), do: PhilomenaQuery.Parse.Helpers.range(input) space = choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) diff --git a/lib/philomena/search/helpers.ex b/lib/philomena_query/parse/helpers.ex similarity index 90% rename from lib/philomena/search/helpers.ex rename to lib/philomena_query/parse/helpers.ex index 33a8597b..eb1e72b9 100644 --- a/lib/philomena/search/helpers.ex +++ b/lib/philomena_query/parse/helpers.ex @@ -1,4 +1,6 @@ -defmodule Philomena.Search.Helpers do +defmodule PhilomenaQuery.Parse.Helpers do + @moduledoc false + # Apparently, it's too hard for the standard library to to parse a number # as a float if it doesn't contain a decimal point. WTF def to_number(term) do diff --git a/lib/philomena/search/int_parser.ex b/lib/philomena_query/parse/int_parser.ex similarity index 75% rename from lib/philomena/search/int_parser.ex rename to lib/philomena_query/parse/int_parser.ex index d6e4f509..a3f7842a 100644 --- a/lib/philomena/search/int_parser.ex +++ b/lib/philomena_query/parse/int_parser.ex @@ -1,8 +1,10 @@ -defmodule Philomena.Search.IntParser do +defmodule PhilomenaQuery.Parse.IntParser do + @moduledoc false + import NimbleParsec - defp to_int(input), do: Philomena.Search.Helpers.to_int(input) - defp range(input), do: Philomena.Search.Helpers.range(input) + defp to_int(input), do: PhilomenaQuery.Parse.Helpers.to_int(input) + defp range(input), do: PhilomenaQuery.Parse.Helpers.range(input) space = choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) diff --git a/lib/philomena/search/ip_parser.ex b/lib/philomena_query/parse/ip_parser.ex similarity index 98% rename from lib/philomena/search/ip_parser.ex rename to lib/philomena_query/parse/ip_parser.ex index c7476a0b..28472407 100644 --- a/lib/philomena/search/ip_parser.ex +++ b/lib/philomena_query/parse/ip_parser.ex @@ -1,4 +1,6 @@ -defmodule Philomena.Search.IpParser do +defmodule PhilomenaQuery.Parse.IpParser do + @moduledoc false + import NimbleParsec ipv4_octet = diff --git a/lib/philomena/search/lexer.ex b/lib/philomena_query/parse/lexer.ex similarity index 94% rename from lib/philomena/search/lexer.ex rename to lib/philomena_query/parse/lexer.ex index 9096ee0e..648099a4 100644 --- a/lib/philomena/search/lexer.ex +++ b/lib/philomena_query/parse/lexer.ex @@ -1,7 +1,9 @@ -defmodule Philomena.Search.Lexer do +defmodule PhilomenaQuery.Parse.Lexer do + @moduledoc false + import NimbleParsec - defp to_number(input), do: Philomena.Search.Helpers.to_number(input) + defp to_number(input), do: PhilomenaQuery.Parse.Helpers.to_number(input) space = choice([string(" "), string("\t"), string("\n"), string("\r"), string("\v"), string("\f")]) diff --git a/lib/philomena/search/literal_parser.ex b/lib/philomena_query/parse/literal_parser.ex similarity index 89% rename from lib/philomena/search/literal_parser.ex rename to lib/philomena_query/parse/literal_parser.ex index 59224960..253d647f 100644 --- a/lib/philomena/search/literal_parser.ex +++ b/lib/philomena_query/parse/literal_parser.ex @@ -1,8 +1,10 @@ -defmodule Philomena.Search.LiteralParser do +defmodule PhilomenaQuery.Parse.LiteralParser do + @moduledoc false + import NimbleParsec @dialyzer [:no_match, :no_unused] - defp to_number(input), do: Philomena.Search.Helpers.to_number(input) + defp to_number(input), do: PhilomenaQuery.Parse.Helpers.to_number(input) float = ascii_string([?0..?9], min: 1) diff --git a/lib/philomena/search/ngram_parser.ex b/lib/philomena_query/parse/ngram_parser.ex similarity index 51% rename from lib/philomena/search/ngram_parser.ex rename to lib/philomena_query/parse/ngram_parser.ex index dd8d41ca..7f2060e2 100644 --- a/lib/philomena/search/ngram_parser.ex +++ b/lib/philomena_query/parse/ngram_parser.ex @@ -1,5 +1,7 @@ -defmodule Philomena.Search.NgramParser do - alias Philomena.Search.LiteralParser +defmodule PhilomenaQuery.Parse.NgramParser do + @moduledoc false + + alias PhilomenaQuery.Parse.LiteralParser # Dummy stub. Used for convenient parser implementation. def parse(input), do: LiteralParser.parse(input) diff --git a/lib/philomena/search/parser.ex b/lib/philomena_query/parse/parser.ex similarity index 73% rename from lib/philomena/search/parser.ex rename to lib/philomena_query/parse/parser.ex index b2f5e47e..ba4b0597 100644 --- a/lib/philomena/search/parser.ex +++ b/lib/philomena_query/parse/parser.ex @@ -1,5 +1,34 @@ -defmodule Philomena.Search.Parser do - alias Philomena.Search.{ +defmodule PhilomenaQuery.Parse.Parser do + @moduledoc """ + A search language for safely evaluating user-input queries. + + The query language supports the following features: + - Disjunction (OR/||) + - Conjunction (AND/&&/,) + - Negation (NOT/-/!) + - Expression boosting + - Parenthetical grouping + + Several types of terms are supported: + - Booleans + - Dates (absolute and relative, time points and ranges) + - Floats + - Integers + - IP Addresses + - Literal text + - Stemmed text + + Specific terms can support the following features: + - Range queries (.lte/.lt/.gte/.gt) + - Fuzzing (~0.5) + - Wildcarding (*?) + - CIDR masks (/27) + + The rich search expression grammar is arguably a defining feature of Philomena, and its + feature set makes it stand out in comparison to traditional boorus. + """ + + alias PhilomenaQuery.Parse.{ BoolParser, DateParser, FloatParser, @@ -12,6 +41,31 @@ defmodule Philomena.Search.Parser do TermRangeParser } + @type context :: any() + @type query :: map() + + @type default_field_type :: :term | :ngram + + @type transform_result :: {:ok, query()} | {:error, String.t()} + @type transform :: (context, String.t() -> transform_result()) + + @type t :: %__MODULE__{ + default_field: {String.t(), default_field_type()}, + bool_fields: [String.t()], + date_fields: [String.t()], + float_fields: [String.t()], + int_fields: [String.t()], + ip_fields: [String.t()], + literal_fields: [String.t()], + ngram_fields: [String.t()], + custom_fields: [String.t()], + transforms: %{String.t() => transform()}, + aliases: %{String.t() => String.t()}, + no_downcase_fields: [String.t()], + __fields__: map(), + __data__: context() + } + defstruct [ :default_field, bool_fields: [], @@ -31,6 +85,37 @@ defmodule Philomena.Search.Parser do @max_clause_count 512 + @doc """ + Creates a `Parser` suitable for safely parsing user-input queries. + + Fields refer to attributes of the indexed document which will be searchable with + `m:PhilomenaQuery.Search`. + + Available options: + - `bool_fields` - a list of field names parsed as booleans + - `float_fields` - a list of field names parsed as floats + - `int_fields` - a list of field names parsed as integers + - `ip_fields` - a list of field names parsed as IP CIDR masks + - `literal_fields` - wildcardable fields which are searched as the exact value + - `ngram_fields` - wildcardable fields which are searched as stemmed values + - `custom_fields` - fields which do not exist on the document and are created by a callback + - `transforms` - a map of custom field names to transform functions + - `aliases` - a map of field names to the names they should have in the search engine + - `no_downcase_fields` - a list of field names which do not have string downcasing applied + + ## Example + + options = [ + bool_fields: ["hidden"], + custom_fields: ["example"], + transforms: %{"example" => fn _ctx, term -> %{term: %{term => "example"}} end}, + aliases: %{"hidden" => "hidden_from_users"} + ] + + Parser.parser(options) + + """ + @spec parser(keyword()) :: t() def parser(options) do parser = struct(Parser, options) @@ -47,6 +132,34 @@ defmodule Philomena.Search.Parser do %{parser | __fields__: Map.new(fields)} end + @doc """ + Parse the query into a definition suitable for the search engine. + + The parser argument should have been created with a previous call to `parser/1`. When the + `context` argument is passed, it becomes the first argument to any transform functions defined + in the `transform` option. + + ## Example + + iex> Parser.parse(parser, "safe") + {:ok, %{term: %{"namespaced_tags.name" => "safe"}}} + + iex> Parser.parse(nil, "safe OR solo") + {:ok, + %{ + bool: %{ + should: [ + %{term: %{"namespaced_tags.name" => "safe"}}, + %{term: %{"namespaced_tags.name" => "solo"}} + ] + } + }} + + iex> Parser.parse(parser, ")") + {:error, "Imbalanced parentheses."} + + """ + @spec parse(t(), String.t(), context()) :: {:ok, query()} | {:error, String.t()} def parse(parser, input, context \\ nil) # Empty search should emit a match_none. diff --git a/lib/philomena_query/parse/string.ex b/lib/philomena_query/parse/string.ex new file mode 100644 index 00000000..f6dc2fa0 --- /dev/null +++ b/lib/philomena_query/parse/string.ex @@ -0,0 +1,32 @@ +defmodule PhilomenaQuery.Parse.String do + @moduledoc """ + Search string normalization utilities. + """ + + @doc """ + Convert a multiline or empty search string into a single search string. + + ## Examples + + iex> Search.String.normalize(nil) + "" + + iex> Search.String.normalize("foo\nbar") + "(foo) || (bar)" + + """ + @spec normalize(String.t() | nil) :: String.t() + def normalize(str) + + def normalize(nil) do + "" + end + + def normalize(str) do + str + |> String.replace("\r", "") + |> String.split("\n", trim: true) + |> Enum.map(fn s -> "(#{s})" end) + |> Enum.join(" || ") + end +end diff --git a/lib/philomena/search/term_range_parser.ex b/lib/philomena_query/parse/term_range_parser.ex similarity index 90% rename from lib/philomena/search/term_range_parser.ex rename to lib/philomena_query/parse/term_range_parser.ex index e956aed5..f5613da4 100644 --- a/lib/philomena/search/term_range_parser.ex +++ b/lib/philomena_query/parse/term_range_parser.ex @@ -1,6 +1,8 @@ -defmodule Philomena.Search.TermRangeParser do - alias Philomena.Search.LiteralParser - alias Philomena.Search.NgramParser +defmodule PhilomenaQuery.Parse.TermRangeParser do + @moduledoc false + + alias PhilomenaQuery.Parse.LiteralParser + alias PhilomenaQuery.Parse.NgramParser # Unfortunately, we can't use NimbleParsec here. It requires # the compiler, and we're not in a macro environment. diff --git a/lib/philomena/relative_date.ex b/lib/philomena_query/relative_date.ex similarity index 63% rename from lib/philomena/relative_date.ex rename to lib/philomena_query/relative_date.ex index 1c678d15..35b0fc82 100644 --- a/lib/philomena/relative_date.ex +++ b/lib/philomena_query/relative_date.ex @@ -1,4 +1,8 @@ -defmodule Philomena.RelativeDate do +defmodule PhilomenaQuery.RelativeDate do + @moduledoc """ + Relative date parsing, for strings like "a week ago" or "5 years from now". + """ + import NimbleParsec number_words = @@ -72,6 +76,13 @@ defmodule Philomena.RelativeDate do defparsecp(:relative_date, relative_date) + @doc """ + Parse an absolute date in valid ISO 8601 format, or an English-language relative date. + + See `parse_absolute/1` and `parse_relative/1` for examples of what may be accepted + by this function. + """ + @spec parse_absolute(String.t()) :: {:ok, DateTime.t()} | {:error, any()} def parse(input) do input = input @@ -87,6 +98,22 @@ defmodule Philomena.RelativeDate do end end + @doc """ + Parse an absolute date, given in a valid ISO 8601 format. + + ## Example + + iex> PhilomenaQuery.RelativeDate.parse_absolute("2024-01-01T00:00:00Z") + {:ok, ~U[2024-01-01 00:00:00Z]} + + iex> PhilomenaQuery.RelativeDate.parse_absolute("2024-01-01T00:00:00-01:00") + {:ok, ~U[2024-01-01 01:00:00Z] + + iex> PhilomenaQuery.RelativeDate.parse_absolute("2024") + {:error, "Parse error"} + + """ + @spec parse_absolute(String.t()) :: {:ok, DateTime.t()} | {:error, any()} def parse_absolute(input) do case DateTime.from_iso8601(input) do {:ok, datetime, _offset} -> @@ -97,6 +124,25 @@ defmodule Philomena.RelativeDate do end end + @doc """ + Parse an English-language relative date. Accepts "moon" to mean 1000 years from now. + + ## Example + + iex> PhilomenaQuery.RelativeDate.parse_relative("a year ago") + {:ok, ~U[2023-01-01 00:00:00Z] + + iex> PhilomenaQuery.RelativeDate.parse_relative("three days from now") + {:ok, ~U[2024-01-04 00:00:00Z]} + + iex> PhilomenaQuery.RelativeDate.parse_relative("moon") + {:ok, ~U[3024-01-01 00:00:00Z]} + + iex> PhilomenaQuery.RelativeDate.parse_relative("2024") + {:error, "Parse error"} + + """ + @spec parse_relative(String.t()) :: {:ok, DateTime.t()} | {:error, any()} def parse_relative(input) do case relative_date(input) do {:ok, [moon: _moon], _1, _2, _3, _4} -> diff --git a/lib/philomena_query/search.ex b/lib/philomena_query/search.ex new file mode 100644 index 00000000..13a34f44 --- /dev/null +++ b/lib/philomena_query/search.ex @@ -0,0 +1,654 @@ +defmodule PhilomenaQuery.Search do + @moduledoc """ + Low-level search engine interaction. + + This module generates and delivers search bodies to the OpenSearch backend. + + Note that before an index can be used to index or query documents, a call to + `create_index!/1` must be made. When setting up an application, or dealing with data loss + in the search engine, you must call `create_index!/1` before running an indexing task. + """ + + alias PhilomenaQuery.Batch + alias Philomena.Repo + require Logger + import Ecto.Query + import Elastix.HTTP + + alias Philomena.Comments.Comment + alias Philomena.Galleries.Gallery + alias Philomena.Images.Image + alias Philomena.Posts.Post + alias Philomena.Reports.Report + alias Philomena.Tags.Tag + alias Philomena.Filters.Filter + + alias Philomena.Comments.SearchIndex, as: CommentIndex + alias Philomena.Galleries.SearchIndex, as: GalleryIndex + alias Philomena.Images.SearchIndex, as: ImageIndex + alias Philomena.Posts.SearchIndex, as: PostIndex + alias Philomena.Reports.SearchIndex, as: ReportIndex + alias Philomena.Tags.SearchIndex, as: TagIndex + alias Philomena.Filters.SearchIndex, as: FilterIndex + + defp index_for(Comment), do: CommentIndex + defp index_for(Gallery), do: GalleryIndex + defp index_for(Image), do: ImageIndex + defp index_for(Post), do: PostIndex + defp index_for(Report), do: ReportIndex + defp index_for(Tag), do: TagIndex + defp index_for(Filter), do: FilterIndex + + defp opensearch_url do + Application.get_env(:philomena, :opensearch_url) + end + + @type index_module :: module() + @type queryable :: any() + @type query_body :: map() + + @type replacement :: %{ + path: [String.t()], + old: term(), + new: term() + } + + @type search_definition :: %{ + module: index_module(), + body: query_body(), + page_number: integer(), + page_size: integer() + } + + @type pagination_params :: %{ + optional(:page_number) => integer(), + optional(:page_size) => integer() + } + + @doc ~S""" + Create the index with the module's index name and mapping. + + `PUT /#{index_name}` + + You **must** use this function before indexing documents in order for the mapping to be created + correctly. If you index documents without a mapping created, the search engine will create a + mapping which does not contain the correct types for mapping fields, which will require + destroying and recreating the index. + + ## Example + + iex> Search.create_index!(Image) + + """ + @spec create_index!(index_module()) :: any() + def create_index!(module) do + index = index_for(module) + + Elastix.Index.create( + opensearch_url(), + index.index_name(), + index.mapping() + ) + end + + @doc ~S""" + Delete the index with the module's index name. + + `DELETE /#{index_name}` + + This undoes the effect of `create_index!/1` and removes the index permanently, deleting + all indexed documents within. + + ## Example + + iex> Search.delete_index!(Image) + + """ + @spec delete_index!(index_module()) :: any() + def delete_index!(module) do + index = index_for(module) + + Elastix.Index.delete(opensearch_url(), index.index_name()) + end + + @doc ~S""" + Update the schema mapping for the module's index name. + + `PUT /#{index_name}/_mapping` + + This is used to add new fields to an existing search mapping. This cannot be used to + remove fields; removing fields requires recreating the index. + + ## Example + + iex> Search.update_mapping!(Image) + + """ + @spec update_mapping!(index_module()) :: any() + def update_mapping!(module) do + index = index_for(module) + + index_name = index.index_name() + mapping = index.mapping().mappings.properties + + Elastix.Mapping.put(opensearch_url(), index_name, "_doc", %{properties: mapping}, + include_type_name: true + ) + end + + @doc ~S""" + Add a single document to the index named by the module. + + `PUT /#{index_name}/_doc/#{id}` + + This allows the search engine to query the document. + + Note that indexing is near real-time and requires an index refresh before the document will + become visible. Unless changed in the mapping, this happens after 5 seconds have elapsed. + + ## Example + + iex> Search.index_document(%Image{...}, Image) + + """ + @spec index_document(struct(), index_module()) :: any() + def index_document(doc, module) do + index = index_for(module) + data = index.as_json(doc) + + Elastix.Document.index( + opensearch_url(), + index.index_name(), + "_doc", + data.id, + data + ) + end + + @doc ~S""" + Remove a single document from the index named by the module. + + `DELETE /#{index_name}/_doc/#{id}` + + This undoes the effect of `index_document/2`; it instructs the search engine to discard + the document and no longer return it in queries. + + Note that indexing is near real-time and requires an index refresh before the document will + be removed. Unless changed in the mapping, this happens after 5 seconds have elapsed. + + ## Example + + iex> Search.delete_document(image.id, Image) + + """ + @spec delete_document(term(), index_module()) :: any() + def delete_document(id, module) do + index = index_for(module) + + Elastix.Document.delete( + opensearch_url(), + index.index_name(), + "_doc", + id + ) + end + + @doc """ + Efficiently index a batch of documents in the index named by the module. + + This function is substantially more efficient than running `index_document/2` for + each instance of a schema struct and can index with hundreds of times the throughput. + + The queryable should be a schema type with its indexing preloads included in + the query. The options are forwarded to `PhilomenaQuery.Batch.record_batches/3`. + + Note that indexing is near real-time and requires an index refresh before documents will + become visible. Unless changed in the mapping, this happens after 5 seconds have elapsed. + + ## Example + + query = + from i in Image, + where: i.id < 100_000, + preload: ^Images.indexing_preloads() + + Search.reindex(query, Image, batch_size: 5000) + + """ + @spec reindex(queryable(), index_module(), Batch.batch_options()) :: [] + def reindex(queryable, module, opts \\ []) do + index = index_for(module) + + Batch.record_batches(queryable, opts, fn records -> + lines = + Enum.flat_map(records, fn record -> + doc = index.as_json(record) + + [ + %{index: %{_index: index.index_name(), _id: doc.id}}, + doc + ] + end) + + Elastix.Bulk.post( + opensearch_url(), + lines, + index: index.index_name(), + httpoison_options: [timeout: 30_000] + ) + end) + end + + @doc ~S""" + Asynchronously update all documents in the given index matching a query. + + `POST /#{index_name}/_update_by_query` + + This is used to replace values in documents on the fly without requiring a more-expensive + reindex operation from the database. + + `set_replacements` are used to rename values in fields which are conceptually sets (arrays). + `replacements` are used to rename values in fields which are standalone terms. + + Both `replacements` and `set_replacements` may be specified. Specifying neither will waste + the search engine's time evaluating the query and indexing the documents, so be sure to + specify at least one. + + This function does not wait for completion of the update. + + ## Examples + + query_body = %{term: %{"namespaced_tags.name" => old_name}} + replacement = %{path: ["namespaced_tags", "name"], old: old_name, new: new_name} + Search.update_by_query(Image, query_body, [], [replacement]) + + query_body = %{term: %{author: old_name}} + set_replacement = %{path: ["author"], old: old_name, new: new_name} + Search.update_by_query(Post, query_body, [set_replacement], []) + + """ + @spec update_by_query(index_module(), query_body(), [replacement()], [replacement()]) :: any() + def update_by_query(module, query_body, set_replacements, replacements) do + index = index_for(module) + + url = + opensearch_url() + |> prepare_url([index.index_name(), "_update_by_query"]) + |> append_query_string(%{conflicts: "proceed", wait_for_completion: "false"}) + + # "Painless" scripting language + script = """ + // Replace values in "sets" (arrays in the source document) + for (int i = 0; i < params.set_replacements.length; ++i) { + def replacement = params.set_replacements[i]; + def path = replacement.path; + def old_value = replacement.old; + def new_value = replacement.new; + def reference = ctx._source; + + for (int j = 0; j < path.length; ++j) { + reference = reference[path[j]]; + } + + for (int j = 0; j < reference.length; ++j) { + if (reference[j].equals(old_value)) { + reference[j] = new_value; + } + } + } + + // Replace values in standalone fields + for (int i = 0; i < params.replacements.length; ++i) { + def replacement = params.replacements[i]; + def path = replacement.path; + def old_value = replacement.old; + def new_value = replacement.new; + def reference = ctx._source; + + // A little bit more complicated: go up to the last one before it + // so that the value can actually be replaced + + for (int j = 0; j < path.length - 1; ++j) { + reference = reference[path[j]]; + } + + if (reference[path[path.length - 1]] != null && reference[path[path.length - 1]].equals(old_value)) { + reference[path[path.length - 1]] = new_value; + } + } + """ + + body = + Jason.encode!(%{ + script: %{ + source: script, + params: %{ + set_replacements: set_replacements, + replacements: replacements + } + }, + query: query_body + }) + + {:ok, %{status_code: 200}} = Elastix.HTTP.post(url, body) + end + + @doc ~S""" + Search the index named by the module. + + `GET /#{index_name}/_search` + + Given a query body, this returns the raw query results. + + ## Example + + iex> Search.search(Image, %{query: %{match_all: %{}}}) + %{ + "_shards" => %{"failed" => 0, "skipped" => 0, "successful" => 5, "total" => 5}, + "hits" => %{ + "hits" => [%{"_id" => "1", "_index" => "images", "_score" => 1.0, ...}, ...] + "max_score" => 1.0, + "total" => %{"relation" => "eq", "value" => 6} + }, + "timed_out" => false, + "took" => 1 + } + + """ + @spec search(index_module(), query_body()) :: map() + def search(module, query_body) do + index = index_for(module) + + {:ok, %{body: results, status_code: 200}} = + Elastix.Search.search( + opensearch_url(), + index.index_name(), + [], + query_body + ) + + results + end + + @doc ~S""" + Given maps of module and body, searches each index with the respective body. + + `GET /_all/_search` + + This is more efficient than performing a `search/1` for each index individually. + Like `search/1`, this returns the raw query results. + + ## Example + + iex> Search.msearch([ + ...> %{module: Image, body: %{query: %{match_all: %{}}}}, + ...> %{module: Post, body: %{query: %{match_all: %{}}}} + ...> ]) + [ + %{"_shards" => ..., "hits" => ..., "timed_out" => false, "took" => 1}, + %{"_shards" => ..., "hits" => ..., "timed_out" => false, "took" => 2} + ] + + """ + @spec msearch([search_definition()]) :: [map()] + def msearch(definitions) do + msearch_body = + Enum.flat_map(definitions, fn def -> + [ + %{index: index_for(def.module).index_name()}, + def.body + ] + end) + + {:ok, %{body: results, status_code: 200}} = + Elastix.Search.search( + opensearch_url(), + "_all", + [], + msearch_body + ) + + results["responses"] + end + + @doc """ + Transforms an index module, query body, and pagination parameters into a query suitable + for submission to the search engine. + + Any of the following functions may be used for submission: + - `search_results/1` + - `msearch_results/1` + - `search_records/2` + - `msearch_records/2` + - `search_records_with_hits/2` + - `msearch_records_with_hits/2` + + ## Example + + iex> Search.search_definition(Image, %{query: %{match_all: %{}}}, %{page_number: 3, page_size: 50}) + %{ + module: Image, + body: %{ + size: 50, + query: %{match_all: %{}}, + from: 100, + _source: false, + track_total_hits: true + }, + page_size: 50, + page_number: 3 + } + + """ + @spec search_definition(index_module(), query_body(), pagination_params()) :: + search_definition() + def search_definition(module, search_query, pagination_params \\ %{}) do + page_number = pagination_params[:page_number] || 1 + page_size = pagination_params[:page_size] || 25 + + search_query = + Map.merge(search_query, %{ + from: (page_number - 1) * page_size, + size: page_size, + _source: false, + track_total_hits: true + }) + + %{ + module: module, + body: search_query, + page_number: page_number, + page_size: page_size + } + end + + defp process_results(results, definition) do + time = results["took"] + count = results["hits"]["total"]["value"] + entries = Enum.map(results["hits"]["hits"], &{String.to_integer(&1["_id"]), &1}) + + Logger.debug("[Search] Query took #{time}ms") + Logger.debug("[Search] #{Jason.encode!(definition.body)}") + + %Scrivener.Page{ + entries: entries, + page_number: definition.page_number, + page_size: definition.page_size, + total_entries: count, + total_pages: div(count + definition.page_size - 1, definition.page_size) + } + end + + @doc """ + Given a search definition generated by `search_definition/3`, submit the query and return + a `m:Scrivener.Page` of results. + + The `entries` in the page are a list of tuples of record IDs paired with the hit that generated + them. + + ## Example + + iex> Search.search_results(definition) + %Scrivener.Page{ + entries: [{1, %{"_id" => "1", ...}}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + + """ + @spec search_results(search_definition()) :: Scrivener.Page.t() + def search_results(definition) do + process_results(search(definition.module, definition.body), definition) + end + + @doc """ + Given a list of search definitions, each generated by `search_definition/3`, submit the query + and return a corresponding list of `m:Scrivener.Page` for each query. + + The `entries` in the page are a list of tuples of record IDs paired with the hit that generated + them. + + ## Example + + iex> Search.msearch_results([definition]) + [ + %Scrivener.Page{ + entries: [{1, %{"_id" => "1", ...}}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + ] + + """ + @spec msearch_results([search_definition()]) :: [Scrivener.Page.t()] + def msearch_results(definitions) do + Enum.map(Enum.zip(msearch(definitions), definitions), fn {result, definition} -> + process_results(result, definition) + end) + end + + defp load_records_from_results(results, ecto_queries) do + Enum.map(Enum.zip(results, ecto_queries), fn {page, ecto_query} -> + {ids, hits} = Enum.unzip(page.entries) + + records = + ecto_query + |> where([m], m.id in ^ids) + |> Repo.all() + |> Enum.sort_by(&Enum.find_index(ids, fn el -> el == &1.id end)) + + %{page | entries: Enum.zip(records, hits)} + end) + end + + @doc """ + Given a search definition generated by `search_definition/3`, submit the query and return a + `m:Scrivener.Page` of results. + + The `entries` in the page are a list of tuples of schema structs paired with the hit that + generated them. + + ## Example + + iex> Search.search_records_with_hits(definition, preload(Image, :tags)) + %Scrivener.Page{ + entries: [{%Image{id: 1, ...}, %{"_id" => "1", ...}}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + + """ + @spec search_records_with_hits(search_definition(), queryable()) :: Scrivener.Page.t() + def search_records_with_hits(definition, ecto_query) do + [page] = load_records_from_results([search_results(definition)], [ecto_query]) + + page + end + + @doc """ + Given a list of search definitions, each generated by `search_definition/3`, submit the query + and return a corresponding list of `m:Scrivener.Page` for each query. + + The `entries` in the page are a list of tuples of schema structs paired with the hit that + generated them. + + ## Example + + iex> Search.msearch_records_with_hits([definition], [preload(Image, :tags)]) + [ + %Scrivener.Page{ + entries: [{%Image{id: 1, ...}, %{"_id" => "1", ...}}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + ] + + """ + @spec msearch_records_with_hits([search_definition()], [queryable()]) :: [Scrivener.Page.t()] + def msearch_records_with_hits(definitions, ecto_queries) do + load_records_from_results(msearch_results(definitions), ecto_queries) + end + + @doc """ + Given a search definition generated by `search_definition/3`, submit the query and return a + `m:Scrivener.Page` of results. + + The `entries` in the page are a list of schema structs. + + ## Example + + iex> Search.search_records(definition, preload(Image, :tags)) + %Scrivener.Page{ + entries: [%Image{id: 1, ...}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + + """ + @spec search_records(search_definition(), queryable()) :: Scrivener.Page.t() + def search_records(definition, ecto_query) do + page = search_records_with_hits(definition, ecto_query) + {records, _hits} = Enum.unzip(page.entries) + + %{page | entries: records} + end + + @doc """ + Given a list of search definitions, each generated by `search_definition/3`, submit the query + and return a corresponding list of `m:Scrivener.Page` for each query. + + The `entries` in the page are a list of schema structs. + + ## Example + + iex> Search.msearch_records([definition], [preload(Image, :tags)]) + [ + %Scrivener.Page{ + entries: [%Image{id: 1, ...}, ...], + page_number: 1, + page_size: 25, + total_entries: 6, + total_pages: 1 + } + ] + + """ + @spec msearch_records([search_definition()], [queryable()]) :: [Scrivener.Page.t()] + def msearch_records(definitions, ecto_queries) do + Enum.map(load_records_from_results(msearch_results(definitions), ecto_queries), fn page -> + {records, _hits} = Enum.unzip(page.entries) + + %{page | entries: records} + end) + end +end diff --git a/lib/philomena/elasticsearch_index.ex b/lib/philomena_query/search_index.ex similarity index 66% rename from lib/philomena/elasticsearch_index.ex rename to lib/philomena_query/search_index.ex index 6810748e..3a4fe9da 100644 --- a/lib/philomena/elasticsearch_index.ex +++ b/lib/philomena_query/search_index.ex @@ -1,4 +1,4 @@ -defmodule Philomena.ElasticsearchIndex do +defmodule PhilomenaQuery.SearchIndex do # Returns the index name for the index. # This is usually a collection name like "images". @callback index_name() :: String.t() @@ -6,7 +6,6 @@ defmodule Philomena.ElasticsearchIndex do # Returns the mapping and settings for the index. @callback mapping() :: map() - # Returns the JSON representation of the given struct - # for indexing in Elasticsearch. + # Returns the JSON representation of the given struct for indexing in OpenSearch. @callback as_json(struct()) :: map() end diff --git a/lib/philomena_web/controllers/activity_controller.ex b/lib/philomena_web/controllers/activity_controller.ex index 9c7dc7ec..45356603 100644 --- a/lib/philomena_web/controllers/activity_controller.ex +++ b/lib/philomena_web/controllers/activity_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.ActivityController do use PhilomenaWeb, :controller alias PhilomenaWeb.ImageLoader - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.{ Images.Image, @@ -36,7 +36,7 @@ defmodule PhilomenaWeb.ActivityController do ) comments = - Elasticsearch.search_definition( + Search.search_definition( Comment, %{ query: %{ @@ -144,7 +144,7 @@ defmodule PhilomenaWeb.ActivityController do defp multi_search(images, top_scoring, comments, nil) do responses = - Elasticsearch.msearch_records( + Search.msearch_records( [images, top_scoring, comments], [ preload(Image, [:sources, tags: :aliases]), @@ -157,7 +157,7 @@ defmodule PhilomenaWeb.ActivityController do end defp multi_search(images, top_scoring, comments, watched) do - Elasticsearch.msearch_records( + Search.msearch_records( [images, top_scoring, comments, watched], [ preload(Image, [:sources, tags: :aliases]), diff --git a/lib/philomena_web/controllers/admin/report_controller.ex b/lib/philomena_web/controllers/admin/report_controller.ex index 26ea7b5d..e6fc6a97 100644 --- a/lib/philomena_web/controllers/admin/report_controller.ex +++ b/lib/philomena_web/controllers/admin/report_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Admin.ReportController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias PhilomenaWeb.MarkdownRenderer alias Philomena.Reports.Report alias Philomena.Reports.Query @@ -94,14 +94,14 @@ defmodule PhilomenaWeb.Admin.ReportController do defp load_reports(conn, query) do reports = Report - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: query, sort: sorts() }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Report, [:admin, user: :linked_tags])) + |> Search.search_records(preload(Report, [:admin, user: :linked_tags])) entries = Polymorphic.load_polymorphic(reports, reportable: [reportable_id: :reportable_type]) diff --git a/lib/philomena_web/controllers/api/json/search/comment_controller.ex b/lib/philomena_web/controllers/api/json/search/comment_controller.ex index e96ba08d..5dbe5e4c 100644 --- a/lib/philomena_web/controllers/api/json/search/comment_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/comment_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.CommentController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Comments.Comment alias Philomena.Comments.Query import Ecto.Query @@ -14,7 +14,7 @@ defmodule PhilomenaWeb.Api.Json.Search.CommentController do {:ok, query} -> comments = Comment - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -31,7 +31,7 @@ defmodule PhilomenaWeb.Api.Json.Search.CommentController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Comment, [:image, :user])) + |> Search.search_records(preload(Comment, [:image, :user])) conn |> put_view(PhilomenaWeb.Api.Json.CommentView) diff --git a/lib/philomena_web/controllers/api/json/search/filter_controller.ex b/lib/philomena_web/controllers/api/json/search/filter_controller.ex index ab9df7fb..7b402065 100644 --- a/lib/philomena_web/controllers/api/json/search/filter_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/filter_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.FilterController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Filters.Filter alias Philomena.Filters.Query import Ecto.Query @@ -13,7 +13,7 @@ defmodule PhilomenaWeb.Api.Json.Search.FilterController do {:ok, query} -> filters = Filter - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -36,7 +36,7 @@ defmodule PhilomenaWeb.Api.Json.Search.FilterController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Filter, [:user])) + |> Search.search_records(preload(Filter, [:user])) conn |> put_view(PhilomenaWeb.Api.Json.FilterView) diff --git a/lib/philomena_web/controllers/api/json/search/gallery_controller.ex b/lib/philomena_web/controllers/api/json/search/gallery_controller.ex index d8b8b2ef..8b2f247b 100644 --- a/lib/philomena_web/controllers/api/json/search/gallery_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/gallery_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.GalleryController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Galleries.Gallery alias Philomena.Galleries.Query import Ecto.Query @@ -11,14 +11,14 @@ defmodule PhilomenaWeb.Api.Json.Search.GalleryController do {:ok, query} -> galleries = Gallery - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: query, sort: %{created_at: :desc} }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Gallery, [:creator])) + |> Search.search_records(preload(Gallery, [:creator])) conn |> put_view(PhilomenaWeb.Api.Json.GalleryView) diff --git a/lib/philomena_web/controllers/api/json/search/image_controller.ex b/lib/philomena_web/controllers/api/json/search/image_controller.ex index b410dedc..109e7abe 100644 --- a/lib/philomena_web/controllers/api/json/search/image_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/image_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.Api.Json.Search.ImageController do use PhilomenaWeb, :controller alias PhilomenaWeb.ImageLoader - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Interactions alias Philomena.Images.Image import Ecto.Query @@ -13,7 +13,7 @@ defmodule PhilomenaWeb.Api.Json.Search.ImageController do case ImageLoader.search_string(conn, params["q"]) do {:ok, {images, _tags}} -> - images = Elasticsearch.search_records(images, queryable) + images = Search.search_records(images, queryable) interactions = Interactions.user_interactions(images, user) conn diff --git a/lib/philomena_web/controllers/api/json/search/post_controller.ex b/lib/philomena_web/controllers/api/json/search/post_controller.ex index 2b39501f..919a5b13 100644 --- a/lib/philomena_web/controllers/api/json/search/post_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/post_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.PostController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Posts.Post alias Philomena.Posts.Query import Ecto.Query @@ -13,7 +13,7 @@ defmodule PhilomenaWeb.Api.Json.Search.PostController do {:ok, query} -> posts = Post - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -28,7 +28,7 @@ defmodule PhilomenaWeb.Api.Json.Search.PostController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Post, [:user, :topic])) + |> Search.search_records(preload(Post, [:user, :topic])) conn |> put_view(PhilomenaWeb.Api.Json.Forum.Topic.PostView) diff --git a/lib/philomena_web/controllers/api/json/search/tag_controller.ex b/lib/philomena_web/controllers/api/json/search/tag_controller.ex index 1a765fb1..8cdaf7f4 100644 --- a/lib/philomena_web/controllers/api/json/search/tag_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/tag_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.TagController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Tags.Tag alias Philomena.Tags.Query import Ecto.Query @@ -11,11 +11,11 @@ defmodule PhilomenaWeb.Api.Json.Search.TagController do {:ok, query} -> tags = Tag - |> Elasticsearch.search_definition( + |> Search.search_definition( %{query: query, sort: %{images: :desc}}, conn.assigns.pagination ) - |> Elasticsearch.search_records( + |> Search.search_records( preload(Tag, [:aliased_tag, :aliases, :implied_tags, :implied_by_tags, :dnp_entries]) ) diff --git a/lib/philomena_web/controllers/api/rss/watched_controller.ex b/lib/philomena_web/controllers/api/rss/watched_controller.ex index ba38218e..b2f00f0c 100644 --- a/lib/philomena_web/controllers/api/rss/watched_controller.ex +++ b/lib/philomena_web/controllers/api/rss/watched_controller.ex @@ -3,13 +3,13 @@ defmodule PhilomenaWeb.Api.Rss.WatchedController do alias PhilomenaWeb.ImageLoader alias Philomena.Images.Image - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search import Ecto.Query def index(conn, _params) do {:ok, {images, _tags}} = ImageLoader.search_string(conn, "my:watched") - images = Elasticsearch.search_records(images, preload(Image, [:sources, tags: :aliases])) + images = Search.search_records(images, preload(Image, [:sources, tags: :aliases])) # NB: this is RSS, but using the RSS format causes Phoenix not to # escape HTML diff --git a/lib/philomena_web/controllers/autocomplete/tag_controller.ex b/lib/philomena_web/controllers/autocomplete/tag_controller.ex index 9d43e470..7c06eb59 100644 --- a/lib/philomena_web/controllers/autocomplete/tag_controller.ex +++ b/lib/philomena_web/controllers/autocomplete/tag_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Autocomplete.TagController do use PhilomenaWeb, :controller - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Tags.Tag import Ecto.Query @@ -13,7 +13,7 @@ defmodule PhilomenaWeb.Autocomplete.TagController do term -> Tag - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -27,7 +27,7 @@ defmodule PhilomenaWeb.Autocomplete.TagController do }, %{page_size: 10} ) - |> Elasticsearch.search_records(preload(Tag, :aliased_tag)) + |> Search.search_records(preload(Tag, :aliased_tag)) |> Enum.map(&(&1.aliased_tag || &1)) |> Enum.uniq_by(& &1.id) |> Enum.filter(&(&1.images_count > 0)) diff --git a/lib/philomena_web/controllers/comment_controller.ex b/lib/philomena_web/controllers/comment_controller.ex index ad802c07..99b14f25 100644 --- a/lib/philomena_web/controllers/comment_controller.ex +++ b/lib/philomena_web/controllers/comment_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.CommentController do use PhilomenaWeb, :controller alias PhilomenaWeb.MarkdownRenderer - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.{Comments.Query, Comments.Comment} import Ecto.Query @@ -21,7 +21,7 @@ defmodule PhilomenaWeb.CommentController do defp render_index({:ok, query}, conn, user) do comments = Comment - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -35,7 +35,7 @@ defmodule PhilomenaWeb.CommentController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records( + |> Search.search_records( preload(Comment, [:deleted_by, image: [:sources, tags: :aliases], user: [awards: :badge]]) ) diff --git a/lib/philomena_web/controllers/filter_controller.ex b/lib/philomena_web/controllers/filter_controller.ex index 9f19415f..61469ffd 100644 --- a/lib/philomena_web/controllers/filter_controller.ex +++ b/lib/philomena_web/controllers/filter_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.FilterController do use PhilomenaWeb, :controller alias Philomena.{Filters, Filters.Filter, Filters.Query, Tags.Tag} - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Schema.TagList alias Philomena.Repo import Ecto.Query @@ -47,7 +47,7 @@ defmodule PhilomenaWeb.FilterController do defp render_index({:ok, query}, conn, user) do filters = Filter - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -61,7 +61,7 @@ defmodule PhilomenaWeb.FilterController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records(preload(Filter, [:user])) + |> Search.search_records(preload(Filter, [:user])) render(conn, "index.html", title: "Filters", filters: filters) end diff --git a/lib/philomena_web/controllers/gallery_controller.ex b/lib/philomena_web/controllers/gallery_controller.ex index 2b298d2c..64a020e0 100644 --- a/lib/philomena_web/controllers/gallery_controller.ex +++ b/lib/philomena_web/controllers/gallery_controller.ex @@ -3,7 +3,7 @@ defmodule PhilomenaWeb.GalleryController do alias PhilomenaWeb.ImageLoader alias PhilomenaWeb.NotificationCountPlug - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Interactions alias Philomena.Galleries.Gallery alias Philomena.Galleries @@ -21,7 +21,7 @@ defmodule PhilomenaWeb.GalleryController do def index(conn, params) do galleries = Gallery - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -32,7 +32,7 @@ defmodule PhilomenaWeb.GalleryController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records( + |> Search.search_records( preload(Gallery, [:creator, thumbnail: [:sources, tags: :aliases]]) ) @@ -62,7 +62,7 @@ defmodule PhilomenaWeb.GalleryController do {gallery_prev, gallery_next} = prev_next_page_images(conn, query) [images, gallery_prev, gallery_next] = - Elasticsearch.msearch_records_with_hits( + Search.msearch_records_with_hits( [images, gallery_prev, gallery_next], [ preload(Image, [:sources, tags: :aliases]), @@ -154,7 +154,7 @@ defmodule PhilomenaWeb.GalleryController do limit = conn.assigns.image_pagination.page_size offset = (conn.assigns.image_pagination.page_number - 1) * limit - # Inconsistency: Elasticsearch doesn't allow requesting offsets which are less than 0, + # Inconsistency: OpenSearch doesn't allow requesting offsets which are less than 0, # but it does allow requesting offsets which are beyond the total number of results. prev_image = gallery_image(offset - 1, conn, query) @@ -164,7 +164,7 @@ defmodule PhilomenaWeb.GalleryController do end defp gallery_image(offset, _conn, _query) when offset < 0 do - Elasticsearch.search_definition(Image, %{query: %{match_none: %{}}}) + Search.search_definition(Image, %{query: %{match_none: %{}}}) end defp gallery_image(offset, conn, query) do diff --git a/lib/philomena_web/controllers/image/navigate_controller.ex b/lib/philomena_web/controllers/image/navigate_controller.ex index c0827caa..9cb61d48 100644 --- a/lib/philomena_web/controllers/image/navigate_controller.ex +++ b/lib/philomena_web/controllers/image/navigate_controller.ex @@ -4,7 +4,7 @@ defmodule PhilomenaWeb.Image.NavigateController do alias PhilomenaWeb.ImageLoader alias PhilomenaWeb.ImageNavigator alias PhilomenaWeb.ImageScope - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Images.Image alias Philomena.Images.Query @@ -37,7 +37,7 @@ defmodule PhilomenaWeb.Image.NavigateController do body = %{range: %{id: %{gt: conn.assigns.image.id}}} {images, _tags} = ImageLoader.query(conn, body, pagination: pagination) - images = Elasticsearch.search_records(images, Image) + images = Search.search_records(images, Image) page_num = page_for_offset(pagination.page_size, images.total_entries) diff --git a/lib/philomena_web/controllers/image/random_controller.ex b/lib/philomena_web/controllers/image/random_controller.ex index 9bc293f7..e104ee40 100644 --- a/lib/philomena_web/controllers/image/random_controller.ex +++ b/lib/philomena_web/controllers/image/random_controller.ex @@ -4,7 +4,7 @@ defmodule PhilomenaWeb.Image.RandomController do alias PhilomenaWeb.ImageSorter alias PhilomenaWeb.ImageScope alias PhilomenaWeb.ImageLoader - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Images.Image def index(conn, params) do @@ -32,7 +32,7 @@ defmodule PhilomenaWeb.Image.RandomController do defp unwrap_random_result({:ok, {definition, _tags}}) do definition - |> Elasticsearch.search_records(Image) + |> Search.search_records(Image) |> Enum.to_list() |> unwrap() end diff --git a/lib/philomena_web/controllers/image/related_controller.ex b/lib/philomena_web/controllers/image/related_controller.ex index 2abcbe6b..138d97a9 100644 --- a/lib/philomena_web/controllers/image/related_controller.ex +++ b/lib/philomena_web/controllers/image/related_controller.ex @@ -4,7 +4,7 @@ defmodule PhilomenaWeb.Image.RelatedController do alias PhilomenaWeb.ImageLoader alias Philomena.Interactions alias Philomena.Images.Image - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search import Ecto.Query plug PhilomenaWeb.CanaryMapPlug, index: :show @@ -60,7 +60,7 @@ defmodule PhilomenaWeb.Image.RelatedController do pagination: %{conn.assigns.image_pagination | page_number: 1} ) - images = Elasticsearch.search_records(images, preload(Image, [:sources, tags: :aliases])) + images = Search.search_records(images, preload(Image, [:sources, tags: :aliases])) interactions = Interactions.user_interactions(images, user) diff --git a/lib/philomena_web/controllers/image_controller.ex b/lib/philomena_web/controllers/image_controller.ex index 30364cda..9cb0914a 100644 --- a/lib/philomena_web/controllers/image_controller.ex +++ b/lib/philomena_web/controllers/image_controller.ex @@ -14,7 +14,7 @@ defmodule PhilomenaWeb.ImageController do Galleries.Gallery } - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Interactions alias Philomena.Comments alias Philomena.Repo @@ -40,7 +40,7 @@ defmodule PhilomenaWeb.ImageController do {:ok, {images, _tags}} = ImageLoader.search_string(conn, "created_at.lte:3 minutes ago, -thumbnails_generated:false") - images = Elasticsearch.search_records(images, preload(Image, [:sources, tags: :aliases])) + images = Search.search_records(images, preload(Image, [:sources, tags: :aliases])) interactions = Interactions.user_interactions(images, conn.assigns.current_user) diff --git a/lib/philomena_web/controllers/post_controller.ex b/lib/philomena_web/controllers/post_controller.ex index cafcff23..17b8fcd5 100644 --- a/lib/philomena_web/controllers/post_controller.ex +++ b/lib/philomena_web/controllers/post_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.PostController do use PhilomenaWeb, :controller alias PhilomenaWeb.MarkdownRenderer - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.{Posts.Query, Posts.Post} import Ecto.Query @@ -21,7 +21,7 @@ defmodule PhilomenaWeb.PostController do defp render_index({:ok, query}, conn, user) do posts = Post - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -32,7 +32,7 @@ defmodule PhilomenaWeb.PostController do }, conn.assigns.pagination ) - |> Elasticsearch.search_records( + |> Search.search_records( preload(Post, [:deleted_by, topic: :forum, user: [awards: :badge]]) ) diff --git a/lib/philomena_web/controllers/profile_controller.ex b/lib/philomena_web/controllers/profile_controller.ex index c91ea8d2..b5f1020d 100644 --- a/lib/philomena_web/controllers/profile_controller.ex +++ b/lib/philomena_web/controllers/profile_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.ProfileController do use PhilomenaWeb, :controller alias PhilomenaWeb.ImageLoader - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias PhilomenaWeb.MarkdownRenderer alias Philomena.UserStatistics.UserStatistic alias Philomena.Users.User @@ -79,7 +79,7 @@ defmodule PhilomenaWeb.ProfileController do recent_artwork = recent_artwork(conn, tags) recent_comments = - Elasticsearch.search_definition( + Search.search_definition( Comment, %{ query: %{ @@ -100,7 +100,7 @@ defmodule PhilomenaWeb.ProfileController do ) recent_posts = - Elasticsearch.search_definition( + Search.search_definition( Post, %{ query: %{ @@ -119,7 +119,7 @@ defmodule PhilomenaWeb.ProfileController do ) [recent_uploads, recent_faves, recent_artwork, recent_comments, recent_posts] = - Elasticsearch.msearch_records( + Search.msearch_records( [recent_uploads, recent_faves, recent_artwork, recent_comments, recent_posts], [ preload(Image, [:sources, tags: :aliases]), @@ -228,7 +228,7 @@ defmodule PhilomenaWeb.ProfileController do defp tags(links), do: Enum.map(links, & &1.tag) |> Enum.reject(&is_nil/1) defp recent_artwork(_conn, []) do - Elasticsearch.search_definition(Image, %{query: %{match_none: %{}}}) + Search.search_definition(Image, %{query: %{match_none: %{}}}) end defp recent_artwork(conn, tags) do diff --git a/lib/philomena_web/controllers/search_controller.ex b/lib/philomena_web/controllers/search_controller.ex index 969bc096..694a726e 100644 --- a/lib/philomena_web/controllers/search_controller.ex +++ b/lib/philomena_web/controllers/search_controller.ex @@ -3,7 +3,7 @@ defmodule PhilomenaWeb.SearchController do alias PhilomenaWeb.ImageLoader alias Philomena.Images.Image - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Interactions import Ecto.Query @@ -41,8 +41,8 @@ defmodule PhilomenaWeb.SearchController do end end - defp search_function(true), do: &Elasticsearch.search_records_with_hits/2 - defp search_function(_custom), do: &Elasticsearch.search_records/2 + defp search_function(true), do: &Search.search_records_with_hits/2 + defp search_function(_custom), do: &Search.search_records/2 defp custom_ordering?(%{params: %{"sf" => sf}}) when sf != "id", do: true defp custom_ordering?(_conn), do: false diff --git a/lib/philomena_web/controllers/tag_controller.ex b/lib/philomena_web/controllers/tag_controller.ex index 2c94d3e5..162393bb 100644 --- a/lib/philomena_web/controllers/tag_controller.ex +++ b/lib/philomena_web/controllers/tag_controller.ex @@ -2,7 +2,7 @@ defmodule PhilomenaWeb.TagController do use PhilomenaWeb, :controller alias PhilomenaWeb.ImageLoader - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.{Tags, Tags.Tag} alias Philomena.{Images, Images.Image} alias PhilomenaWeb.MarkdownRenderer @@ -34,7 +34,7 @@ defmodule PhilomenaWeb.TagController do with {:ok, query} <- Tags.Query.compile(query_string) do tags = Tag - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: query, size: 250, @@ -42,7 +42,7 @@ defmodule PhilomenaWeb.TagController do }, %{conn.assigns.pagination | page_size: 250} ) - |> Elasticsearch.search_records(Tag) + |> Search.search_records(Tag) render(conn, "index.html", title: "Tags", tags: tags) else @@ -57,7 +57,7 @@ defmodule PhilomenaWeb.TagController do {images, _tags} = ImageLoader.query(conn, %{term: %{"namespaced_tags.name" => tag.name}}) - images = Elasticsearch.search_records(images, preload(Image, [:sources, tags: :aliases])) + images = Search.search_records(images, preload(Image, [:sources, tags: :aliases])) interactions = Interactions.user_interactions(images, user) diff --git a/lib/philomena_web/image_loader.ex b/lib/philomena_web/image_loader.ex index c6a33c2a..d2bc80ea 100644 --- a/lib/philomena_web/image_loader.ex +++ b/lib/philomena_web/image_loader.ex @@ -1,6 +1,6 @@ defmodule PhilomenaWeb.ImageLoader do alias PhilomenaWeb.ImageSorter - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Images.{Image, Query} alias PhilomenaWeb.MarkdownRenderer alias Philomena.Tags.Tag @@ -36,7 +36,7 @@ defmodule PhilomenaWeb.ImageLoader do %{query: query, sorts: sort} = sorts.(body) definition = - Elasticsearch.search_definition( + Search.search_definition( Image, %{ query: %{ diff --git a/lib/philomena_web/image_navigator.ex b/lib/philomena_web/image_navigator.ex index fce68944..547fb617 100644 --- a/lib/philomena_web/image_navigator.ex +++ b/lib/philomena_web/image_navigator.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.ImageNavigator do alias PhilomenaWeb.ImageSorter alias Philomena.Images.Image - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search @order_for_dir %{ "next" => %{"asc" => "asc", "desc" => "desc"}, @@ -54,8 +54,8 @@ defmodule PhilomenaWeb.ImageNavigator do defp maybe_search_after(module, body, options, queryable, true) do module - |> Elasticsearch.search_definition(body, options) - |> Elasticsearch.search_records_with_hits(queryable) + |> Search.search_definition(body, options) + |> Search.search_records_with_hits(queryable) end defp maybe_search_after(_module, _body, _options, _queryable, _false) do diff --git a/lib/philomena_web/plugs/filter_forced_users_plug.ex b/lib/philomena_web/plugs/filter_forced_users_plug.ex index 17d1886c..e28de969 100644 --- a/lib/philomena_web/plugs/filter_forced_users_plug.ex +++ b/lib/philomena_web/plugs/filter_forced_users_plug.ex @@ -6,8 +6,8 @@ defmodule PhilomenaWeb.FilterForcedUsersPlug do import Phoenix.Controller import Plug.Conn - alias Philomena.Search.String, as: SearchString - alias Philomena.Search.Evaluator + alias PhilomenaQuery.Parse.String, as: SearchString + alias PhilomenaQuery.Parse.Evaluator alias Philomena.Images.Query alias PhilomenaWeb.ImageView diff --git a/lib/philomena_web/plugs/image_filter_plug.ex b/lib/philomena_web/plugs/image_filter_plug.ex index 3281d0e8..c8138d68 100644 --- a/lib/philomena_web/plugs/image_filter_plug.ex +++ b/lib/philomena_web/plugs/image_filter_plug.ex @@ -1,6 +1,6 @@ defmodule PhilomenaWeb.ImageFilterPlug do import Plug.Conn - import Philomena.Search.String + import PhilomenaQuery.Parse.String alias Philomena.Images.Query diff --git a/lib/philomena_web/stats_updater.ex b/lib/philomena_web/stats_updater.ex index dc53324d..8eec95c5 100644 --- a/lib/philomena_web/stats_updater.ex +++ b/lib/philomena_web/stats_updater.ex @@ -1,6 +1,6 @@ defmodule PhilomenaWeb.StatsUpdater do alias Philomena.Config - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Images.Image alias Philomena.Comments.Comment alias Philomena.Topics.Topic @@ -68,8 +68,8 @@ defmodule PhilomenaWeb.StatsUpdater do data = Config.get(:aggregation) { - Elasticsearch.search(Image, data["images"]), - Elasticsearch.search(Comment, data["comments"]) + Search.search(Image, data["images"]), + Search.search(Comment, data["comments"]) } end diff --git a/lib/philomena_web/views/api/json/image_view.ex b/lib/philomena_web/views/api/json/image_view.ex index 924ae86b..d6c8b951 100644 --- a/lib/philomena_web/views/api/json/image_view.ex +++ b/lib/philomena_web/views/api/json/image_view.ex @@ -71,7 +71,7 @@ defmodule PhilomenaWeb.Api.Json.ImageView do tag_ids: Enum.map(image.tags, & &1.id), uploader: if(!!image.user and !image.anonymous, do: image.user.name), uploader_id: if(!!image.user and !image.anonymous, do: image.user.id), - wilson_score: Philomena.Images.ElasticsearchIndex.wilson_score(image), + wilson_score: Philomena.Images.SearchIndex.wilson_score(image), intensities: intensities(image), score: image.score, upvotes: image.upvotes_count, diff --git a/lib/philomena_web/views/image_view.ex b/lib/philomena_web/views/image_view.ex index dc9d9be8..4a69af5e 100644 --- a/lib/philomena_web/views/image_view.ex +++ b/lib/philomena_web/views/image_view.ex @@ -291,7 +291,7 @@ defmodule PhilomenaWeb.ImageView do } } - Philomena.Search.Evaluator.hits?(doc, query) + PhilomenaQuery.Parse.Evaluator.hits?(doc, query) end def image_source_icon(nil), do: "fa fa-link" diff --git a/lib/philomena_web/views/layout_view.ex b/lib/philomena_web/views/layout_view.ex index b9c2210b..b2cfd06f 100644 --- a/lib/philomena_web/views/layout_view.ex +++ b/lib/philomena_web/views/layout_view.ex @@ -43,9 +43,9 @@ defmodule PhilomenaWeb.LayoutView do data = [ filter_id: filter.id, hidden_tag_list: Jason.encode!(filter.hidden_tag_ids), - hidden_filter: Philomena.Search.String.normalize(filter.hidden_complex_str || ""), + hidden_filter: PhilomenaQuery.Parse.String.normalize(filter.hidden_complex_str || ""), spoilered_tag_list: Jason.encode!(filter.spoilered_tag_ids), - spoilered_filter: Philomena.Search.String.normalize(filter.spoilered_complex_str || ""), + spoilered_filter: PhilomenaQuery.Parse.String.normalize(filter.spoilered_complex_str || ""), user_id: if(user, do: user.id, else: nil), user_name: if(user, do: user.name, else: nil), user_slug: if(user, do: user.slug, else: nil), diff --git a/lib/philomena_web/views/tag_view.ex b/lib/philomena_web/views/tag_view.ex index bcbc1e22..ef860d71 100644 --- a/lib/philomena_web/views/tag_view.ex +++ b/lib/philomena_web/views/tag_view.ex @@ -3,7 +3,7 @@ defmodule PhilomenaWeb.TagView do # this is bad practice, don't copy this. alias Philomena.Config - alias Philomena.Elasticsearch + alias PhilomenaQuery.Search alias Philomena.Tags.Tag alias Philomena.Repo alias PhilomenaWeb.ImageScope @@ -143,7 +143,7 @@ defmodule PhilomenaWeb.TagView do defp implied_by_multitag(tag_names, ignore_tag_names) do Tag - |> Elasticsearch.search_definition( + |> Search.search_definition( %{ query: %{ bool: %{ @@ -155,7 +155,7 @@ defmodule PhilomenaWeb.TagView do }, %{page_size: 40} ) - |> Elasticsearch.search_records(preload(Tag, :implied_tags)) + |> Search.search_records(preload(Tag, :implied_tags)) end defp manages_links?(conn), diff --git a/priv/repo/seeds.exs b/priv/repo/seeds.exs index ad09bcea..1fe20dcd 100644 --- a/priv/repo/seeds.exs +++ b/priv/repo/seeds.exs @@ -26,17 +26,17 @@ alias Philomena.{ StaticPages.StaticPage } -alias Philomena.Elasticsearch +alias PhilomenaQuery.Search alias Philomena.Users alias Philomena.Tags alias Philomena.Filters import Ecto.Query -IO.puts("---- Creating Elasticsearch indices") +IO.puts("---- Creating search indices") for model <- [Image, Comment, Gallery, Tag, Post, Report, Filter] do - Elasticsearch.delete_index!(model) - Elasticsearch.create_index!(model) + Search.delete_index!(model) + Search.create_index!(model) end resources = @@ -112,6 +112,6 @@ for page_def <- resources["pages"] do end IO.puts("---- Indexing content") -Elasticsearch.reindex(Tag |> preload(^Tags.indexing_preloads()), Tag) +Search.reindex(Tag |> preload(^Tags.indexing_preloads()), Tag) IO.puts("---- Done.") From 9b204c908dd5cd2f1841b8a26a7c20626db6ebf0 Mon Sep 17 00:00:00 2001 From: Liam Date: Sat, 1 Jun 2024 23:03:34 -0400 Subject: [PATCH 2/2] More fully separate underlying search behavior from usage in application --- lib/philomena/search_policy.ex | 55 +++++++++++++++++++++ lib/philomena_query/search.ex | 87 ++++++++++++---------------------- 2 files changed, 86 insertions(+), 56 deletions(-) create mode 100644 lib/philomena/search_policy.ex diff --git a/lib/philomena/search_policy.ex b/lib/philomena/search_policy.ex new file mode 100644 index 00000000..d753026a --- /dev/null +++ b/lib/philomena/search_policy.ex @@ -0,0 +1,55 @@ +defmodule Philomena.SearchPolicy do + alias Philomena.Comments.Comment + alias Philomena.Galleries.Gallery + alias Philomena.Images.Image + alias Philomena.Posts.Post + alias Philomena.Reports.Report + alias Philomena.Tags.Tag + alias Philomena.Filters.Filter + + alias Philomena.Comments.SearchIndex, as: CommentIndex + alias Philomena.Galleries.SearchIndex, as: GalleryIndex + alias Philomena.Images.SearchIndex, as: ImageIndex + alias Philomena.Posts.SearchIndex, as: PostIndex + alias Philomena.Reports.SearchIndex, as: ReportIndex + alias Philomena.Tags.SearchIndex, as: TagIndex + alias Philomena.Filters.SearchIndex, as: FilterIndex + + @type schema_module :: Comment | Gallery | Image | Post | Report | Tag | Filter + + @doc """ + For a given schema module (e.g. `m:Philomena.Images.Image`), return the associated module + which implements the `SearchIndex` behaviour (e.g. `m:Philomena.Images.SearchIndex`). + + ## Example + + iex> SearchPolicy.index_for(Gallery) + Philomena.Galleries.SearchIndex + + iex> SearchPolicy.index_for(:foo) + ** (FunctionClauseError) no function clause matching in Philomena.SearchPolicy.index_for/1 + + """ + @spec index_for(schema_module()) :: module() + def index_for(Comment), do: CommentIndex + def index_for(Gallery), do: GalleryIndex + def index_for(Image), do: ImageIndex + def index_for(Post), do: PostIndex + def index_for(Report), do: ReportIndex + def index_for(Tag), do: TagIndex + def index_for(Filter), do: FilterIndex + + @doc """ + Return the path used to interact with the search engine. + + ## Example + + iex> SearchPolicy.opensearch_url() + "http://localhost:9200" + + """ + @spec opensearch_url :: String.t() + def opensearch_url do + Application.get_env(:philomena, :opensearch_url) + end +end diff --git a/lib/philomena_query/search.ex b/lib/philomena_query/search.ex index 13a34f44..b4960657 100644 --- a/lib/philomena_query/search.ex +++ b/lib/philomena_query/search.ex @@ -15,35 +15,10 @@ defmodule PhilomenaQuery.Search do import Ecto.Query import Elastix.HTTP - alias Philomena.Comments.Comment - alias Philomena.Galleries.Gallery - alias Philomena.Images.Image - alias Philomena.Posts.Post - alias Philomena.Reports.Report - alias Philomena.Tags.Tag - alias Philomena.Filters.Filter + # todo: fetch through compile_env? + @policy Philomena.SearchPolicy - alias Philomena.Comments.SearchIndex, as: CommentIndex - alias Philomena.Galleries.SearchIndex, as: GalleryIndex - alias Philomena.Images.SearchIndex, as: ImageIndex - alias Philomena.Posts.SearchIndex, as: PostIndex - alias Philomena.Reports.SearchIndex, as: ReportIndex - alias Philomena.Tags.SearchIndex, as: TagIndex - alias Philomena.Filters.SearchIndex, as: FilterIndex - - defp index_for(Comment), do: CommentIndex - defp index_for(Gallery), do: GalleryIndex - defp index_for(Image), do: ImageIndex - defp index_for(Post), do: PostIndex - defp index_for(Report), do: ReportIndex - defp index_for(Tag), do: TagIndex - defp index_for(Filter), do: FilterIndex - - defp opensearch_url do - Application.get_env(:philomena, :opensearch_url) - end - - @type index_module :: module() + @type schema_module :: @policy.schema_module() @type queryable :: any() @type query_body :: map() @@ -54,7 +29,7 @@ defmodule PhilomenaQuery.Search do } @type search_definition :: %{ - module: index_module(), + module: schema_module(), body: query_body(), page_number: integer(), page_size: integer() @@ -80,12 +55,12 @@ defmodule PhilomenaQuery.Search do iex> Search.create_index!(Image) """ - @spec create_index!(index_module()) :: any() + @spec create_index!(schema_module()) :: any() def create_index!(module) do - index = index_for(module) + index = @policy.index_for(module) Elastix.Index.create( - opensearch_url(), + @policy.opensearch_url(), index.index_name(), index.mapping() ) @@ -104,11 +79,11 @@ defmodule PhilomenaQuery.Search do iex> Search.delete_index!(Image) """ - @spec delete_index!(index_module()) :: any() + @spec delete_index!(schema_module()) :: any() def delete_index!(module) do - index = index_for(module) + index = @policy.index_for(module) - Elastix.Index.delete(opensearch_url(), index.index_name()) + Elastix.Index.delete(@policy.opensearch_url(), index.index_name()) end @doc ~S""" @@ -124,14 +99,14 @@ defmodule PhilomenaQuery.Search do iex> Search.update_mapping!(Image) """ - @spec update_mapping!(index_module()) :: any() + @spec update_mapping!(schema_module()) :: any() def update_mapping!(module) do - index = index_for(module) + index = @policy.index_for(module) index_name = index.index_name() mapping = index.mapping().mappings.properties - Elastix.Mapping.put(opensearch_url(), index_name, "_doc", %{properties: mapping}, + Elastix.Mapping.put(@policy.opensearch_url(), index_name, "_doc", %{properties: mapping}, include_type_name: true ) end @@ -151,13 +126,13 @@ defmodule PhilomenaQuery.Search do iex> Search.index_document(%Image{...}, Image) """ - @spec index_document(struct(), index_module()) :: any() + @spec index_document(struct(), schema_module()) :: any() def index_document(doc, module) do - index = index_for(module) + index = @policy.index_for(module) data = index.as_json(doc) Elastix.Document.index( - opensearch_url(), + @policy.opensearch_url(), index.index_name(), "_doc", data.id, @@ -181,12 +156,12 @@ defmodule PhilomenaQuery.Search do iex> Search.delete_document(image.id, Image) """ - @spec delete_document(term(), index_module()) :: any() + @spec delete_document(term(), schema_module()) :: any() def delete_document(id, module) do - index = index_for(module) + index = @policy.index_for(module) Elastix.Document.delete( - opensearch_url(), + @policy.opensearch_url(), index.index_name(), "_doc", id @@ -215,9 +190,9 @@ defmodule PhilomenaQuery.Search do Search.reindex(query, Image, batch_size: 5000) """ - @spec reindex(queryable(), index_module(), Batch.batch_options()) :: [] + @spec reindex(queryable(), schema_module(), Batch.batch_options()) :: [] def reindex(queryable, module, opts \\ []) do - index = index_for(module) + index = @policy.index_for(module) Batch.record_batches(queryable, opts, fn records -> lines = @@ -231,7 +206,7 @@ defmodule PhilomenaQuery.Search do end) Elastix.Bulk.post( - opensearch_url(), + @policy.opensearch_url(), lines, index: index.index_name(), httpoison_options: [timeout: 30_000] @@ -267,12 +242,12 @@ defmodule PhilomenaQuery.Search do Search.update_by_query(Post, query_body, [set_replacement], []) """ - @spec update_by_query(index_module(), query_body(), [replacement()], [replacement()]) :: any() + @spec update_by_query(schema_module(), query_body(), [replacement()], [replacement()]) :: any() def update_by_query(module, query_body, set_replacements, replacements) do - index = index_for(module) + index = @policy.index_for(module) url = - opensearch_url() + @policy.opensearch_url() |> prepare_url([index.index_name(), "_update_by_query"]) |> append_query_string(%{conflicts: "proceed", wait_for_completion: "false"}) @@ -355,13 +330,13 @@ defmodule PhilomenaQuery.Search do } """ - @spec search(index_module(), query_body()) :: map() + @spec search(schema_module(), query_body()) :: map() def search(module, query_body) do - index = index_for(module) + index = @policy.index_for(module) {:ok, %{body: results, status_code: 200}} = Elastix.Search.search( - opensearch_url(), + @policy.opensearch_url(), index.index_name(), [], query_body @@ -395,14 +370,14 @@ defmodule PhilomenaQuery.Search do msearch_body = Enum.flat_map(definitions, fn def -> [ - %{index: index_for(def.module).index_name()}, + %{index: @policy.index_for(def.module).index_name()}, def.body ] end) {:ok, %{body: results, status_code: 200}} = Elastix.Search.search( - opensearch_url(), + @policy.opensearch_url(), "_all", [], msearch_body @@ -440,7 +415,7 @@ defmodule PhilomenaQuery.Search do } """ - @spec search_definition(index_module(), query_body(), pagination_params()) :: + @spec search_definition(schema_module(), query_body(), pagination_params()) :: search_definition() def search_definition(module, search_query, pagination_params \\ %{}) do page_number = pagination_params[:page_number] || 1