mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 13:47:58 +01:00
add fast reindexer
This commit is contained in:
parent
4e70124f36
commit
0ecfa6c6c0
9 changed files with 402 additions and 1 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -41,3 +41,6 @@ npm-debug.log
|
||||||
|
|
||||||
# Intellij IDEA
|
# Intellij IDEA
|
||||||
.idea
|
.idea
|
||||||
|
|
||||||
|
# Index dumps
|
||||||
|
*.jsonl
|
||||||
|
|
22
index/all.mk
Normal file
22
index/all.mk
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
all: comments galleries images posts reports tags
|
||||||
|
|
||||||
|
comments:
|
||||||
|
$(MAKE) -f comments.mk
|
||||||
|
|
||||||
|
galleries:
|
||||||
|
$(MAKE) -f galleries.mk
|
||||||
|
|
||||||
|
images:
|
||||||
|
$(MAKE) -f images.mk
|
||||||
|
|
||||||
|
posts:
|
||||||
|
$(MAKE) -f posts.mk
|
||||||
|
|
||||||
|
reports:
|
||||||
|
$(MAKE) -f reports.mk
|
||||||
|
|
||||||
|
tags:
|
||||||
|
$(MAKE) -f tags.mk
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f ./*.jsonl
|
48
index/comments.mk
Normal file
48
index/comments.mk
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=comments.jsonl --output=http://localhost:9200/ --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata authors tags
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_comments cascade;'
|
||||||
|
sed -i comments.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: comment_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object(
|
||||||
|
'id', c.id,
|
||||||
|
'posted_at', c.created_at,
|
||||||
|
'ip', c.ip,
|
||||||
|
'fingerprint', c.fingerprint,
|
||||||
|
'image_id', c.image_id,
|
||||||
|
'user_id', c.user_id,
|
||||||
|
'anonymous', c.anonymous,
|
||||||
|
'body', c.body,
|
||||||
|
'hidden_from_users', (c.hidden_from_users or i.hidden_from_users)
|
||||||
|
) from comments c inner join images i on c.image_id=i.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
authors: comment_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object('author', (case when c.anonymous='t' then null else u.name end)) from comments c left join users u on c.user_id=u.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
tags: comment_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
create unlogged table temp_comments.image_tags (image_id bigint not null, tags jsonb not null);
|
||||||
|
insert into temp_comments.image_tags (image_id, tags) select it.image_id, jsonb_agg(it.tag_id) from image_taggings it group by it.image_id;
|
||||||
|
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object('image_tag_ids', it.tags) from comments c inner join temp_comments.image_tags it on c.image_id=it.image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
comment_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_comments cascade;
|
||||||
|
create schema temp_comments;
|
||||||
|
create unlogged table temp_comments.comment_search_json (comment_id bigint not null, object jsonb not null);
|
||||||
|
create or replace aggregate temp_comments.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
44
index/galleries.mk
Normal file
44
index/galleries.mk
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=galleries.jsonl --output=http://localhost:9200/ --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata subscribers images
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_galleries cascade;'
|
||||||
|
sed -i galleries.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: gallery_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_galleries.gallery_search_json (gallery_id, object) select g.id, jsonb_build_object(
|
||||||
|
'id', g.id,
|
||||||
|
'image_count', g.image_count,
|
||||||
|
'updated_at', g.updated_at,
|
||||||
|
'created_at', g.created_at,
|
||||||
|
'title', lower(g.title),
|
||||||
|
'creator', lower(u.name),
|
||||||
|
'description', g.description
|
||||||
|
) from galleries g left join users u on g.creator_id=u.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
subscribers: gallery_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_galleries.gallery_search_json (gallery_id, object) select gallery_id, json_build_object('watcher_ids', jsonb_agg(user_id), 'watcher_count', count(*)) from gallery_subscriptions group by gallery_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
images: gallery_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_galleries.gallery_search_json (gallery_id, object) select gallery_id, json_build_object('image_ids', jsonb_agg(image_id)) from gallery_interactions group by gallery_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
gallery_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_galleries cascade;
|
||||||
|
create schema temp_galleries;
|
||||||
|
create unlogged table temp_galleries.gallery_search_json (gallery_id bigint not null, object jsonb not null);
|
||||||
|
create or replace aggregate temp_galleries.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
132
index/images.mk
Normal file
132
index/images.mk
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvotes downvotes faves tag_names
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;'
|
||||||
|
sed -i images.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object(
|
||||||
|
'anonymous', anonymous,
|
||||||
|
'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8),
|
||||||
|
'comment_count', comments_count,
|
||||||
|
'created_at', created_at,
|
||||||
|
'deletion_reason', deletion_reason,
|
||||||
|
'description', description,
|
||||||
|
'downvotes', downvotes_count,
|
||||||
|
'duplicate_id', duplicate_id,
|
||||||
|
'faves', faves_count,
|
||||||
|
'file_name', image_name,
|
||||||
|
'fingerprint', fingerprint,
|
||||||
|
'first_seen_at', first_seen_at,
|
||||||
|
'height', image_height,
|
||||||
|
'hidden_from_users', hidden_from_users,
|
||||||
|
'id', id,
|
||||||
|
'ip', ip,
|
||||||
|
'mime_type', image_mime_type,
|
||||||
|
'orig_sha512_hash', image_orig_sha512_hash,
|
||||||
|
'original_format', image_format,
|
||||||
|
'pixels', cast(image_width as bigint)*cast(image_height as bigint),
|
||||||
|
'score', score,
|
||||||
|
'size', image_size,
|
||||||
|
'sha512_hash', image_sha512_hash,
|
||||||
|
'source_url', lower(source_url),
|
||||||
|
'updated_at', updated_at,
|
||||||
|
'upvotes', upvotes_count,
|
||||||
|
'width', image_width,
|
||||||
|
'wilson_score', temp_images.wilson_995(upvotes_count, downvotes_count)
|
||||||
|
) from images;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
true_uploaders: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('true_uploader_id', u.id, 'true_uploader', u.name) from images i left join users u on u.id = i.user_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
uploaders: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('uploader_id', (case when i.anonymous = 't' then null else u.id end), 'uploader', (case when i.anonymous = 't' then null else lower(u.name) end)) from images i left join users u on u.id = i.user_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
deleters: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('deleted_by_user_id', u.id, 'deleted_by_user', lower(u.name)) from images i left join users u on u.id = i.deleted_by_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
galleries: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('gallery_id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
tags: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
hides: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_ids', jsonb_agg(ih.user_id), 'hidden_by', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
downvotes: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoted_by_ids', jsonb_agg(iv.user_id), 'downvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
upvotes: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoted_by_ids', jsonb_agg(iv.user_id), 'upvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
faves: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('faved_by_ids', jsonb_agg(if.user_id), 'faved_by', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
tag_names: tags_with_aliases
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_images.image_search_json (image_id, object) select image_id, jsonb_build_object('namespaced_tags', jsonb_build_object('name', jsonb_agg(lower(tag_name)))) from temp_images.tags_with_aliases group by image_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
tags_with_aliases: image_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
create unlogged table if not exists temp_images.tags_with_aliases (image_id bigint not null, tag_name text not null);
|
||||||
|
truncate temp_images.tags_with_aliases;
|
||||||
|
insert into temp_images.tags_with_aliases (image_id, tag_name) select it.image_id, t.name from image_taggings it inner join tags t on t.id = it.tag_id;
|
||||||
|
insert into temp_images.tags_with_aliases (image_id, tag_name) select it.image_id, t.name from image_taggings it left outer join tags t on t.aliased_tag_id = it.tag_id where t.name is not null;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
image_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_images cascade;
|
||||||
|
create schema temp_images;
|
||||||
|
create unlogged table temp_images.image_search_json (image_id bigint not null, object jsonb not null);
|
||||||
|
create function temp_images.wilson_995(succ bigint, fail bigint) returns double precision as '
|
||||||
|
declare
|
||||||
|
n double precision;
|
||||||
|
p_hat double precision;
|
||||||
|
z double precision;
|
||||||
|
z2 double precision;
|
||||||
|
begin
|
||||||
|
if succ <= 0 then
|
||||||
|
return 0;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
n := succ + fail;
|
||||||
|
p_hat := succ / n;
|
||||||
|
z := 2.57583;
|
||||||
|
z2 := 6.634900189;
|
||||||
|
|
||||||
|
return (p_hat + z2 / (2 * n) - z * sqrt((p_hat * (1 - p_hat) + z2 / (4 * n)) / n)) / (1 + z2 / n);
|
||||||
|
end
|
||||||
|
' language plpgsql;
|
||||||
|
create aggregate temp_images.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
49
index/posts.mk
Normal file
49
index/posts.mk
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=posts.jsonl --output=http://localhost:9200/ --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata authors
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_posts cascade;'
|
||||||
|
sed -i posts.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: post_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_posts.post_search_json (post_id, object) select p.id, jsonb_build_object(
|
||||||
|
'id', p.id,
|
||||||
|
'topic_id', p.topic_id,
|
||||||
|
'body', p.body,
|
||||||
|
'subject', t.title,
|
||||||
|
'ip', p.ip,
|
||||||
|
'user_agent', p.user_agent,
|
||||||
|
'referrer', p.referrer,
|
||||||
|
'fingerprint', p.fingerprint,
|
||||||
|
'topic_position', p.topic_position,
|
||||||
|
'forum_id', t.forum_id,
|
||||||
|
'user_id', p.user_id,
|
||||||
|
'anonymous', p.anonymous,
|
||||||
|
'created_at', p.created_at,
|
||||||
|
'updated_at', p.updated_at,
|
||||||
|
'deleted', p.hidden_from_users,
|
||||||
|
'destroyed_content', p.destroyed_content,
|
||||||
|
'access_level', f.access_level
|
||||||
|
) from posts p inner join topics t on t.id=p.topic_id inner join forums f on f.id=t.forum_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
authors: post_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_posts.post_search_json (post_id, object) select p.id, jsonb_build_object('author', (case when p.anonymous='t' then null else u.name end)) from posts p left join users u on p.user_id=u.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
post_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_posts cascade;
|
||||||
|
create schema temp_posts;
|
||||||
|
create unlogged table temp_posts.post_search_json (post_id bigint not null, object jsonb not null);
|
||||||
|
create or replace aggregate temp_posts.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
50
index/reports.mk
Normal file
50
index/reports.mk
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=reports.jsonl --output=http://localhost:9200/ --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata image_ids comment_image_ids
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_reports cascade;'
|
||||||
|
sed -i reports.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: report_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object(
|
||||||
|
'id', r.id,
|
||||||
|
'created_at', r.created_at,
|
||||||
|
'ip', r.ip,
|
||||||
|
'state', r.state,
|
||||||
|
'user', lower(u.name),
|
||||||
|
'user_id', r.user_id,
|
||||||
|
'admin', lower(a.name),
|
||||||
|
'admin_id', r.admin_id,
|
||||||
|
'reportable_type', r.reportable_type,
|
||||||
|
'reportable_id', r.reportable_id,
|
||||||
|
'fingerprint', r.fingerprint,
|
||||||
|
'open', r.open,
|
||||||
|
'reason', r.reason
|
||||||
|
) from reports r left join users u on r.user_id=u.id left join users a on r.admin_id=a.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
image_ids: report_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object('image_id', r.reportable_id) from reports r where r.reportable_type = 'Image';
|
||||||
|
SQL
|
||||||
|
|
||||||
|
comment_image_ids: report_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object('image_id', c.image_id) from reports r inner join comments c on c.id = r.reportable_id where r.reportable_type = 'Comment';
|
||||||
|
SQL
|
||||||
|
|
||||||
|
report_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_reports cascade;
|
||||||
|
create schema temp_reports;
|
||||||
|
create unlogged table temp_reports.report_search_json (report_id bigint not null, object jsonb not null);
|
||||||
|
create or replace aggregate temp_reports.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
53
index/tags.mk
Normal file
53
index/tags.mk
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
DATABASE ?= philomena
|
||||||
|
ELASTICDUMP ?= elasticdump
|
||||||
|
.ONESHELL:
|
||||||
|
|
||||||
|
all: import_es
|
||||||
|
|
||||||
|
import_es: dump_jsonl
|
||||||
|
$(ELASTICDUMP) --input=tags.jsonl --output=http://localhost:9200/ --output-index=tags --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)"
|
||||||
|
|
||||||
|
dump_jsonl: metadata aliases implied_tags implied_by_tags
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_tags.jsonb_object_agg(object) from temp_tags.tag_search_json group by tag_id) to stdout;' > tags.jsonl
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_tags cascade;'
|
||||||
|
sed -i tags.jsonl -e 's/\\\\/\\/g'
|
||||||
|
|
||||||
|
metadata: tag_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_tags.tag_search_json (tag_id, object) select t.id, jsonb_build_object(
|
||||||
|
'id', t.id,
|
||||||
|
'slug', t.slug,
|
||||||
|
'name', t.name,
|
||||||
|
'name_in_namespace', t.name_in_namespace,
|
||||||
|
'namespace', t.namespace,
|
||||||
|
'analyzed_name', t.name,
|
||||||
|
'aliased_tag', at.name,
|
||||||
|
'category', t.category,
|
||||||
|
'aliased', (t.aliased_tag_id is not null),
|
||||||
|
'description', t.description,
|
||||||
|
'short_description', t.short_description
|
||||||
|
) from tags t left join tags at on t.aliased_tag_id=at.id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
aliases: tag_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_tags.tag_search_json (tag_id, object) select t.aliased_tag_id, jsonb_build_object('aliases', jsonb_agg(t.name)) from tags t inner join tags at on t.aliased_tag_id=t.id group by t.aliased_tag_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
implied_tags: tag_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_tags.tag_search_json (tag_id, object) select it.tag_id, jsonb_build_object('implied_tag_ids', jsonb_agg(it.implied_tag_id), 'implied_tags', jsonb_agg(t.name)) from tags_implied_tags it inner join tags t on t.id=it.implied_tag_id group by it.tag_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
implied_by_tags: tag_search_json
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
insert into temp_tags.tag_search_json (tag_id, object) select it.implied_tag_id, jsonb_build_object('implied_by_tags', jsonb_agg(t.name)) from tags_implied_tags it inner join tags t on t.id=it.tag_id group by it.implied_tag_id;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
tag_search_json:
|
||||||
|
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
|
||||||
|
drop schema if exists temp_tags cascade;
|
||||||
|
create schema temp_tags;
|
||||||
|
create unlogged table temp_tags.tag_search_json (tag_id bigint not null, object jsonb not null);
|
||||||
|
create or replace aggregate temp_tags.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}');
|
||||||
|
SQL
|
|
@ -19,7 +19,7 @@ defmodule Mix.Tasks.ReindexAll do
|
||||||
|
|
||||||
@shortdoc "Destroys and recreates all Elasticsearch indices."
|
@shortdoc "Destroys and recreates all Elasticsearch indices."
|
||||||
def run(_) do
|
def run(_) do
|
||||||
if Mix.env() == :prod do
|
if Mix.env() == :prod and not Enum.member?(System.argv(), "--i-know-what-im-doing") do
|
||||||
raise "do not run this task in production"
|
raise "do not run this task in production"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue