fix fast indexer

This commit is contained in:
Luna D. 2024-06-30 14:12:55 +02:00
parent 200444bb91
commit 28b1b34b00
No known key found for this signature in database
GPG key ID: 4B1C63448394F688
7 changed files with 29 additions and 26 deletions

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=comments.jsonl --output=http://localhost:9200/ --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=comments.jsonl --output=$OPENSEARCH_URL --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata authors tags dump_jsonl: metadata authors tags
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_comments cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_comments cascade;'
sed -i comments.jsonl -e 's/\\\\/\\/g' sed -i comments.jsonl -e 's/\\\\/\\/g'
metadata: comment_search_json metadata: comment_search_json

View file

@ -1,19 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
ELASTICSEARCH_URL ?= http://localhost:9200/ OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
# uncomment if getting "redirection unexpected" error on dump_jsonl
#SHELL=/bin/bash
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=filters.jsonl --output=$(ELASTICSEARCH_URL) --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=filters.jsonl --output=$OPENSEARCH_URL --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata creators dump_jsonl: metadata creators
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_filters cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_filters cascade;'
sed -i filters.jsonl -e 's/\\\\/\\/g' sed -i filters.jsonl -e 's/\\\\/\\/g'
metadata: filter_search_json metadata: filter_search_json

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=galleries.jsonl --output=http://localhost:9200/ --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=galleries.jsonl --output=$OPENSEARCH_URL --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata subscribers images dump_jsonl: metadata subscribers images
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_galleries cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_galleries cascade;'
sed -i galleries.jsonl -e 's/\\\\/\\/g' sed -i galleries.jsonl -e 's/\\\\/\\/g'
metadata: gallery_search_json metadata: gallery_search_json

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=images.jsonl --output=$OPENSEARCH_URL --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_images cascade;'
sed -i images.jsonl -e 's/\\\\/\\/g' sed -i images.jsonl -e 's/\\\\/\\/g'
metadata: image_search_json metadata: image_search_json
@ -84,7 +85,7 @@ tags: image_search_json
'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end), 'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end),
'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end), 'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end),
'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end), 'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end),
'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end), 'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end)
) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id; ) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id;
SQL SQL

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=posts.jsonl --output=http://localhost:9200/ --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=posts.jsonl --output=$OPENSEARCH_URL --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata authors dump_jsonl: metadata authors
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_posts cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_posts cascade;'
sed -i posts.jsonl -e 's/\\\\/\\/g' sed -i posts.jsonl -e 's/\\\\/\\/g'
metadata: post_search_json metadata: post_search_json

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=reports.jsonl --output=http://localhost:9200/ --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=reports.jsonl --output=$OPENSEARCH_URL --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata image_ids comment_image_ids dump_jsonl: metadata image_ids comment_image_ids
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_reports cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_reports cascade;'
sed -i reports.jsonl -e 's/\\\\/\\/g' sed -i reports.jsonl -e 's/\\\\/\\/g'
metadata: report_search_json metadata: report_search_json

View file

@ -1,15 +1,16 @@
DATABASE ?= philomena DATABASE ?= philomena
OPENSEARCH_URL ?= http://localhost:9200/
ELASTICDUMP ?= elasticdump ELASTICDUMP ?= elasticdump
.ONESHELL: .ONESHELL:
all: import_es all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=tags.jsonl --output=http://localhost:9200/ --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=tags.jsonl --output=$OPENSEARCH_URL --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata aliases implied_tags implied_by_tags dump_jsonl: metadata aliases implied_tags implied_by_tags
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_tags cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_tags cascade;'
sed -i tags.jsonl -e 's/\\\\/\\/g' sed -i tags.jsonl -e 's/\\\\/\\/g'
metadata: tag_search_json metadata: tag_search_json