From 28b1b34b00781f7c5e3a12f854820bbff1d22357 Mon Sep 17 00:00:00 2001 From: "Luna D." Date: Sun, 30 Jun 2024 14:12:55 +0200 Subject: [PATCH] fix fast indexer --- index/comments.mk | 7 ++++--- index/filters.mk | 11 ++++------- index/galleries.mk | 7 ++++--- index/images.mk | 9 +++++---- index/posts.mk | 7 ++++--- index/reports.mk | 7 ++++--- index/tags.mk | 7 ++++--- 7 files changed, 29 insertions(+), 26 deletions(-) diff --git a/index/comments.mk b/index/comments.mk index 5699ea35..9c7403da 100644 --- a/index/comments.mk +++ b/index/comments.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=comments.jsonl --output=http://localhost:9200/ --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=comments.jsonl --output=$OPENSEARCH_URL --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata authors tags - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_comments cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_comments cascade;' sed -i comments.jsonl -e 's/\\\\/\\/g' metadata: comment_search_json diff --git a/index/filters.mk b/index/filters.mk index 7992bc2f..93d260cb 100644 --- a/index/filters.mk +++ b/index/filters.mk @@ -1,19 +1,16 @@ DATABASE ?= philomena -ELASTICSEARCH_URL ?= http://localhost:9200/ +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump -# uncomment if getting "redirection unexpected" error on dump_jsonl -#SHELL=/bin/bash - .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=filters.jsonl --output=$(ELASTICSEARCH_URL) --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=filters.jsonl --output=$OPENSEARCH_URL --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata creators - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_filters cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_filters cascade;' sed -i filters.jsonl -e 's/\\\\/\\/g' metadata: filter_search_json diff --git a/index/galleries.mk b/index/galleries.mk index 1447f1a9..0243b7e5 100644 --- a/index/galleries.mk +++ b/index/galleries.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=galleries.jsonl --output=http://localhost:9200/ --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=galleries.jsonl --output=$OPENSEARCH_URL --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata subscribers images - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_galleries cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_galleries cascade;' sed -i galleries.jsonl -e 's/\\\\/\\/g' metadata: gallery_search_json diff --git a/index/images.mk b/index/images.mk index 8c843ee2..2ed13496 100644 --- a/index/images.mk +++ b/index/images.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=images.jsonl --output=$OPENSEARCH_URL --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_images cascade;' sed -i images.jsonl -e 's/\\\\/\\/g' metadata: image_search_json @@ -84,7 +85,7 @@ tags: image_search_json 'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end), 'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end), 'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end), - 'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end), + 'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end) ) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id; SQL diff --git a/index/posts.mk b/index/posts.mk index 939066cb..4d530713 100644 --- a/index/posts.mk +++ b/index/posts.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=posts.jsonl --output=http://localhost:9200/ --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=posts.jsonl --output=$OPENSEARCH_URL --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata authors - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_posts cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_posts cascade;' sed -i posts.jsonl -e 's/\\\\/\\/g' metadata: post_search_json diff --git a/index/reports.mk b/index/reports.mk index d8d810da..21b5189f 100644 --- a/index/reports.mk +++ b/index/reports.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=reports.jsonl --output=http://localhost:9200/ --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=reports.jsonl --output=$OPENSEARCH_URL --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata image_ids comment_image_ids - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_reports cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_reports cascade;' sed -i reports.jsonl -e 's/\\\\/\\/g' metadata: report_search_json diff --git a/index/tags.mk b/index/tags.mk index 1b184310..49362f03 100644 --- a/index/tags.mk +++ b/index/tags.mk @@ -1,15 +1,16 @@ DATABASE ?= philomena +OPENSEARCH_URL ?= http://localhost:9200/ ELASTICDUMP ?= elasticdump .ONESHELL: all: import_es import_es: dump_jsonl - $(ELASTICDUMP) --input=tags.jsonl --output=http://localhost:9200/ --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" + $(ELASTICDUMP) --input=tags.jsonl --output=$OPENSEARCH_URL --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" dump_jsonl: metadata aliases implied_tags implied_by_tags - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl - psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_tags cascade;' + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl + psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_tags cascade;' sed -i tags.jsonl -e 's/\\\\/\\/g' metadata: tag_search_json