Fix images fast indexer

This commit is contained in:
Liam 2024-04-21 20:44:08 -04:00
parent 7ba9579078
commit 74daa662ac

View file

@ -7,7 +7,7 @@ all: import_es
import_es: dump_jsonl import_es: dump_jsonl
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" $(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvotes downvotes faves tag_names dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;' psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;'
sed -i images.jsonl -e 's/\\\\/\\/g' sed -i images.jsonl -e 's/\\\\/\\/g'
@ -15,6 +15,8 @@ dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvo
metadata: image_search_json metadata: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object( insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object(
'approved', approved,
'animated', is_animated,
'anonymous', anonymous, 'anonymous', anonymous,
'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8), 'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8),
'comment_count', comments_count, 'comment_count', comments_count,
@ -23,6 +25,7 @@ metadata: image_search_json
'description', description, 'description', description,
'downvotes', downvotes_count, 'downvotes', downvotes_count,
'duplicate_id', duplicate_id, 'duplicate_id', duplicate_id,
'duration', (case when is_animated then image_duration else 0::float end),
'faves', faves_count, 'faves', faves_count,
'file_name', image_name, 'file_name', image_name,
'fingerprint', fingerprint, 'fingerprint', fingerprint,
@ -35,10 +38,11 @@ metadata: image_search_json
'orig_sha512_hash', image_orig_sha512_hash, 'orig_sha512_hash', image_orig_sha512_hash,
'original_format', image_format, 'original_format', image_format,
'pixels', cast(image_width as bigint)*cast(image_height as bigint), 'pixels', cast(image_width as bigint)*cast(image_height as bigint),
'processed', processed,
'score', score, 'score', score,
'size', image_size, 'size', image_size,
'sha512_hash', image_sha512_hash, 'sha512_hash', image_sha512_hash,
'source_url', lower(source_url), 'thumbnails_generated', thumbnails_generated,
'updated_at', updated_at, 'updated_at', updated_at,
'upvotes', upvotes_count, 'upvotes', upvotes_count,
'width', image_width, 'width', image_width,
@ -64,8 +68,6 @@ deleters: image_search_json
galleries: image_search_json galleries: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id; insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id;
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_id', jsonb_agg(gi.gallery_id)) from gallery_interactions gi group by image_id;
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_position', jsonb_object_agg(gi.gallery_id, gi.position)) from gallery_interactions gi group by image_id;
SQL SQL
tags: image_search_json tags: image_search_json
@ -73,24 +75,29 @@ tags: image_search_json
insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id; insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id;
SQL SQL
sources: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select s.image_id, jsonb_build_object('source_url', jsonb_agg(lower(s.source)), 'source_count', count(*)) from image_sources s group by image_id;
SQL
hides: image_search_json hides: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_ids', jsonb_agg(ih.user_id), 'hidden_by', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id; insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_user_ids', jsonb_agg(ih.user_id), 'hidden_by_users', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id;
SQL SQL
downvotes: image_search_json downvotes: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoted_by_ids', jsonb_agg(iv.user_id), 'downvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id; insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoter_ids', jsonb_agg(iv.user_id), 'downvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id;
SQL SQL
upvotes: image_search_json upvotes: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoted_by_ids', jsonb_agg(iv.user_id), 'upvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id; insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoter_ids', jsonb_agg(iv.user_id), 'upvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id;
SQL SQL
faves: image_search_json faves: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('faved_by_ids', jsonb_agg(if.user_id), 'faved_by', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id; insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('favourited_by_user_ids', jsonb_agg(if.user_id), 'favourited_by_users', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id;
SQL SQL
tag_names: tags_with_aliases tag_names: tags_with_aliases