mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 13:47:58 +01:00
fix fast indexer
This commit is contained in:
parent
200444bb91
commit
28b1b34b00
7 changed files with 29 additions and 26 deletions
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=comments.jsonl --output=http://localhost:9200/ --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=comments.jsonl --output=$OPENSEARCH_URL --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata authors tags
|
dump_jsonl: metadata authors tags
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_comments cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_comments cascade;'
|
||||||
sed -i comments.jsonl -e 's/\\\\/\\/g'
|
sed -i comments.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: comment_search_json
|
metadata: comment_search_json
|
||||||
|
|
|
@ -1,19 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
ELASTICSEARCH_URL ?= http://localhost:9200/
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
# uncomment if getting "redirection unexpected" error on dump_jsonl
|
|
||||||
#SHELL=/bin/bash
|
|
||||||
|
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=filters.jsonl --output=$(ELASTICSEARCH_URL) --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=filters.jsonl --output=$OPENSEARCH_URL --output-index=filters --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata creators
|
dump_jsonl: metadata creators
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_filters.jsonb_object_agg(object) from temp_filters.filter_search_json group by filter_id) to stdout;' > filters.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_filters cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_filters cascade;'
|
||||||
sed -i filters.jsonl -e 's/\\\\/\\/g'
|
sed -i filters.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: filter_search_json
|
metadata: filter_search_json
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=galleries.jsonl --output=http://localhost:9200/ --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=galleries.jsonl --output=$OPENSEARCH_URL --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata subscribers images
|
dump_jsonl: metadata subscribers images
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_galleries cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_galleries cascade;'
|
||||||
sed -i galleries.jsonl -e 's/\\\\/\\/g'
|
sed -i galleries.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: gallery_search_json
|
metadata: gallery_search_json
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=images.jsonl --output=$OPENSEARCH_URL --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names
|
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_images cascade;'
|
||||||
sed -i images.jsonl -e 's/\\\\/\\/g'
|
sed -i images.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: image_search_json
|
metadata: image_search_json
|
||||||
|
@ -84,7 +85,7 @@ tags: image_search_json
|
||||||
'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end),
|
'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end),
|
||||||
'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end),
|
'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end),
|
||||||
'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end),
|
'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end),
|
||||||
'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end),
|
'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end)
|
||||||
) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id;
|
) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id;
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=posts.jsonl --output=http://localhost:9200/ --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=posts.jsonl --output=$OPENSEARCH_URL --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata authors
|
dump_jsonl: metadata authors
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_posts cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_posts cascade;'
|
||||||
sed -i posts.jsonl -e 's/\\\\/\\/g'
|
sed -i posts.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: post_search_json
|
metadata: post_search_json
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=reports.jsonl --output=http://localhost:9200/ --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=reports.jsonl --output=$OPENSEARCH_URL --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata image_ids comment_image_ids
|
dump_jsonl: metadata image_ids comment_image_ids
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_reports cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_reports cascade;'
|
||||||
sed -i reports.jsonl -e 's/\\\\/\\/g'
|
sed -i reports.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: report_search_json
|
metadata: report_search_json
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
DATABASE ?= philomena
|
DATABASE ?= philomena
|
||||||
|
OPENSEARCH_URL ?= http://localhost:9200/
|
||||||
ELASTICDUMP ?= elasticdump
|
ELASTICDUMP ?= elasticdump
|
||||||
.ONESHELL:
|
.ONESHELL:
|
||||||
|
|
||||||
all: import_es
|
all: import_es
|
||||||
|
|
||||||
import_es: dump_jsonl
|
import_es: dump_jsonl
|
||||||
$(ELASTICDUMP) --input=tags.jsonl --output=http://localhost:9200/ --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
$(ELASTICDUMP) --input=tags.jsonl --output=$OPENSEARCH_URL --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
|
||||||
|
|
||||||
dump_jsonl: metadata aliases implied_tags implied_by_tags
|
dump_jsonl: metadata aliases implied_tags implied_by_tags
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl
|
||||||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_tags cascade;'
|
psql $(DATABASE) -v ON_ERROR_STOP=1 -c 'drop schema temp_tags cascade;'
|
||||||
sed -i tags.jsonl -e 's/\\\\/\\/g'
|
sed -i tags.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
metadata: tag_search_json
|
metadata: tag_search_json
|
||||||
|
|
Loading…
Reference in a new issue