ServiceWorker streaming zip download

This commit is contained in:
byte[] 2021-10-25 23:42:29 -04:00
parent d43ae04c1c
commit 55e4e582f1
13 changed files with 383 additions and 5 deletions

View file

@ -259,3 +259,8 @@ overrides:
- '*.js'
rules:
'@typescript-eslint/explicit-module-boundary-types': 0
- files:
- '*.ts'
rules:
'no-undef': 0
'no-constant-condition': 0

View file

@ -101,6 +101,11 @@ function loadBooruData() {
// CSRF
window.booru.csrfToken = $('meta[name="csrf-token"]').content;
// ServiceWorker
if ('serviceWorker' in navigator && window.booru.workerPath) {
navigator.serviceWorker.register(window.booru.workerPath);
}
}
function BooruOnRails() {

View file

@ -10,4 +10,24 @@ function arraysEqual(array1, array2) {
return true;
}
export { moveElement, arraysEqual };
/**
* @template T
* @param {T[]} array
* @param {number} numBins
* @returns {T[][]}
*/
function evenlyDivide(array, numBins) {
const bins = [];
for (let i = 0; i < numBins; i++) {
bins[i] = [];
}
for (let i = 0; i < array.length; i++) {
bins[i % numBins].push(array[i]);
}
return bins;
}
export { moveElement, arraysEqual, evenlyDivide };

45
assets/js/utils/binary.ts Normal file
View file

@ -0,0 +1,45 @@
// https://stackoverflow.com/q/21001659
export function crc32(buf: ArrayBuffer): number {
const view = new DataView(buf);
let crc = 0 ^ -1;
for (let i = 0; i < view.byteLength; i++) {
crc ^= view.getUint8(i);
for (let j = 0; j < 8; j++) {
crc = (crc >>> 1) ^ (0xedb88320 & -(crc & 1));
}
}
return ~crc;
}
// https://caniuse.com/textencoder
export function asciiEncode(s: string): ArrayBuffer {
const buf = new ArrayBuffer(s.length);
const view = new DataView(buf);
for (let i = 0; i < s.length; i++) {
view.setUint8(i, s.charCodeAt(i) & 0xff);
}
return buf;
}
export type LEInt = [1 | 2 | 4 | 8, number];
export function serialize(values: LEInt[]): ArrayBuffer {
const bufSize = values.reduce((acc, int) => acc + int[0], 0);
const buf = new ArrayBuffer(bufSize);
const view = new DataView(buf);
let offset = 0;
for (const [size, value] of values) {
if (size === 1) view.setUint8(offset, value);
if (size === 2) view.setUint16(offset, value, true);
if (size === 4) view.setUint32(offset, value, true);
if (size === 8) view.setBigUint64(offset, BigInt(value), true);
offset += size;
}
return buf;
}

View file

@ -38,4 +38,27 @@ function handleError(response) {
return response;
}
export { fetchJson, fetchHtml, handleError };
/** @returns {Promise<Response>} */
function fetchBackoff(...fetchArgs) {
/**
* @param timeout {number}
* @returns {Promise<Response>}
*/
function fetchBackoffTimeout(timeout) {
// Adjust timeout
const newTimeout = Math.min(timeout * 2, 300000);
// Try to fetch the thing
return fetch(...fetchArgs)
.then(handleError)
.catch(() =>
new Promise(resolve =>
setTimeout(() => resolve(fetchBackoffTimeout(newTimeout)), timeout)
)
);
}
return fetchBackoffTimeout(5000);
}
export { fetchJson, fetchHtml, fetchBackoff, handleError };

136
assets/js/utils/zip.ts Normal file
View file

@ -0,0 +1,136 @@
import { crc32, asciiEncode, serialize } from './binary';
interface FileInfo {
headerOffset: number;
byteLength: number;
crc32: number;
name: ArrayBuffer;
}
// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
// for full details of the ZIP format.
export class Zip {
fileInfo: { [key: string]: FileInfo };
offset: number;
constructor() {
this.fileInfo = {};
this.offset = 0;
}
storeFile(name: string, file: ArrayBuffer): Blob {
const crc = crc32(file);
const ns = asciiEncode(name);
this.fileInfo[name] = {
headerOffset: this.offset,
byteLength: file.byteLength,
crc32: crc,
name: ns
};
const localField = serialize([
[2, 0x0001], /* zip64 local field */
[2, 0x0010], /* local field length (excl. header) */
[8, file.byteLength], /* compressed size */
[8, file.byteLength] /* uncompressed size */
]);
const header = serialize([
[4, 0x04034b50], /* local header signature */
[2, 0x002d], /* version = zip64 */
[2, 0x0000], /* flags = none */
[2, 0x0000], /* compression = store */
[2, 0x0000], /* time = 00:00 */
[2, 0x0000], /* date = 1980-01-01 */
[4, crc], /* file crc32 */
[4, 0xffffffff], /* zip64 compressed size */
[4, 0xffffffff], /* zip64 uncompressed size */
[2, ns.byteLength], /* length of name */
[2, localField.byteLength] /* length of local field */
]);
this.offset += header.byteLength + ns.byteLength + localField.byteLength + file.byteLength;
return new Blob([header, ns, localField, file]);
}
finalize(): Blob {
const segments = [];
const cdOff = this.offset;
let numFiles = 0;
for (const name in this.fileInfo) {
const info = this.fileInfo[name];
const cdField = serialize([
[2, 0x0001], /* zip64 central field */
[2, 0x0018], /* central field length (excl. header) */
[8, info.byteLength], /* compressed size */
[8, info.byteLength], /* uncompressed size */
[8, info.headerOffset] /* local header offset */
]);
const cdEntry = serialize([
[4, 0x02014b50], /* CD entry signature */
[2, 0x002d], /* created with zip64 */
[2, 0x002d], /* extract with zip64 */
[2, 0x0000], /* flags = none */
[2, 0x0000], /* compression = store */
[2, 0x0000], /* time = 00:00 */
[2, 0x0000], /* date = 1980-01-01 */
[4, info.crc32], /* file crc32 */
[4, 0xffffffff], /* zip64 compressed size */
[4, 0xffffffff], /* zip64 uncompressed size */
[2, info.name.byteLength], /* length of name */
[2, cdField.byteLength], /* length of central field */
[2, 0x0000], /* comment length */
[2, 0x0000], /* disk number */
[2, 0x0000], /* internal attributes */
[4, 0x00000000], /* external attributes */
[4, 0xffffffff], /* zip64 local header offset */
]);
this.offset += cdEntry.byteLength + info.name.byteLength + cdField.byteLength;
segments.push(cdEntry, info.name, cdField);
numFiles++;
}
const endCdOff = this.offset;
const endCd64 = serialize([
[4, 0x06064b50], /* zip64 end of CD signature */
[8, 44], /* size of end of CD */
[2, 0x002d], /* created with zip64 */
[2, 0x002d], /* extract with zip64 */
[4, 0x00000000], /* this disk number */
[4, 0x00000000], /* starting disk number */
[8, numFiles], /* number of files on this disk */
[8, numFiles], /* total number of files */
[8, endCdOff - cdOff], /* size of CD */
[8, cdOff] /* location of CD */
]);
const endLoc64 = serialize([
[4, 0x07064b50], /* zip64 end of CD locator */
[4, 0x00000000], /* disk number of CD */
[8, endCdOff], /* location of end of CD */
[4, 1] /* number of disks */
]);
const endCd = serialize([
[4, 0x06054b50], /* end of CD */
[2, 0x0000], /* this disk number */
[2, 0x0000], /* starting disk number */
[2, numFiles], /* number of files on this disk */
[2, numFiles], /* total number of files */
[4, endCdOff - cdOff], /* size of CD */
[4, 0xffffffff], /* zip64 location of CD */
[2, 0x0000] /* comment length */
]);
this.offset += endCd64.byteLength + endLoc64.byteLength + endCd.byteLength;
segments.push(endCd64, endLoc64, endCd);
return new Blob(segments);
}
}

106
assets/js/worker.ts Normal file
View file

@ -0,0 +1,106 @@
/// <reference lib="WebWorker" />
import { evenlyDivide } from 'utils/array';
import { fetchBackoff } from 'utils/requests';
import { Zip } from 'utils/zip';
declare const self: ServiceWorkerGlobalScope;
const wait = (ms: number): Promise<void> => new Promise(resolve => setTimeout(resolve, ms));
const buffer = (blob: Blob) => blob.arrayBuffer().then(buf => new Uint8Array(buf));
const json = (resp: Response) => resp.json();
const blob = (resp: Response) => resp.blob();
interface Image {
id: number;
name: string;
view_url: string; // eslint-disable-line camelcase
}
interface PageResult {
images: Image[];
total: number;
}
function handleStream(event: FetchEvent, url: URL): void {
const concurrency = parseInt(url.searchParams.get('concurrency') || '1', 5);
const queryString = url.searchParams.get('q');
const failures = [];
const zipper = new Zip();
if (!queryString) {
return event.respondWith(new Response('No query specified', { status: 400 }));
}
// Maximum ID to fetch -- start with largest possible ID
let maxId = (2 ** 31) - 1;
const stream = new ReadableStream({
pull(controller) {
// Path to fetch next
const nextQuery = encodeURIComponent(`(${queryString}),id.lte:${maxId}`);
return fetchBackoff(`/search/download?q=${nextQuery}`)
.then(json)
.then(({ images, total }: PageResult): Promise<void> => {
if (total === 0) {
// Done, no results left
// Finalize zip and close stream to prevent any further pulls
return buffer(zipper.finalize())
.then(buf => {
controller.enqueue(buf);
controller.close();
});
}
// Decrease maximum ID for next round below current minimum
maxId = images[images.length - 1].id - 1;
// Set up concurrent fetches
const imageBins = evenlyDivide(images, concurrency);
const fetchers = imageBins.map(downloadIntoZip);
// Run all concurrent fetches
return Promise
.all(fetchers)
.then(() => wait(5000));
});
// Function to fetch each image and push it into the zip stream
function downloadIntoZip(images: Image[]): Promise<void> {
let promise = Promise.resolve();
// eslint-disable-next-line camelcase
for (const { name, view_url } of images) {
promise = promise
.then(() => fetchBackoff(view_url)).then(blob).then(buffer)
.then(file => zipper.storeFile(name, file.buffer)).then(buffer)
.then(entry => controller.enqueue(entry))
.catch(() => { failures.push(view_url); });
}
return promise;
}
}
});
event.respondWith(new Response(stream, {
headers: {
'content-type': 'application/x-zip',
'content-disposition': 'attachment; filename="image_export.zip"'
}
}));
}
self.addEventListener('fetch', event => {
const url = new URL(event.request.url);
// Streaming path
if (url.pathname === '/js/stream') return handleStream(event, url);
// Otherwise, not destined for us
return event.respondWith(fetch(event.request));
});
export default null;

View file

@ -60,6 +60,7 @@ module.exports = {
mode: isDevelopment ? 'development' : 'production',
entry: {
'js/app.js': './js/app.js',
'js/worker.js': './js/worker.ts',
...themes
},
output: {
@ -92,7 +93,7 @@ module.exports = {
},
},
{
test: /app\.js/,
test: /(app\.js|worker\.ts)/,
use: [
{
loader: 'webpack-rollup-loader',

View file

@ -0,0 +1,31 @@
defmodule PhilomenaWeb.Search.DownloadController do
use PhilomenaWeb, :controller
alias PhilomenaWeb.ImageLoader
alias Philomena.Elasticsearch
alias Philomena.Images.Image
import Ecto.Query
def index(conn, params) do
options = [pagination: %{page_number: 1, page_size: 50}]
queryable = Image |> preload([:user, :intensity, tags: :aliases])
case ImageLoader.search_string(conn, params["q"], options) do
{:ok, {images, _tags}} ->
images = Elasticsearch.search_records(images, queryable)
conn
|> put_view(PhilomenaWeb.Api.Json.ImageView)
|> render("index.json",
images: images,
total: images.total_entries,
interactions: []
)
{:error, msg} ->
conn
|> Plug.Conn.put_status(:bad_request)
|> json(%{error: msg})
end
end
end

View file

@ -462,6 +462,7 @@ defmodule PhilomenaWeb.Router do
scope "/search", Search, as: :search do
resources "/reverse", ReverseController, only: [:index, :create]
resources "/download", DownloadController, only: [:index]
end
resources "/search", SearchController, only: [:index]

View file

@ -17,6 +17,10 @@ elixir:
.page__pagination = pagination
.flex__right.page__info
a.js-download href="#" data-query=@conn.params["q"] title="Download"
i.fa.fa-download>
span.hide-mobile.hide-limited-desktop Download
= random_button @conn, params
= hidden_toggle @conn, route, params
= deleted_toggle @conn, route, params

View file

@ -118,7 +118,7 @@ defmodule PhilomenaWeb.ImageView do
"#{root}/#{view}/#{year}/#{month}/#{day}/#{filename}.#{format}"
end
defp verbose_file_name(image) do
def verbose_file_name(image) do
# Truncate filename to 150 characters, making room for the path + filename on Windows
# https://stackoverflow.com/questions/265769/maximum-filename-length-in-ntfs-windows-xp-and-windows-vista
file_name_slug_fragment =

View file

@ -52,7 +52,8 @@ defmodule PhilomenaWeb.LayoutView do
fancy_tag_upload: if(user, do: user.fancy_tag_field_on_upload, else: true),
interactions: Jason.encode!(interactions),
ignored_tag_list: Jason.encode!(ignored_tag_list(conn.assigns[:tags])),
hide_staff_tools: conn.cookies["hide_staff_tools"]
hide_staff_tools: conn.cookies["hide_staff_tools"],
worker_path: Routes.static_path(conn, "/js/worker.js")
]
data = Keyword.merge(data, extra)