From 55e4e582f19682768ee78a8547288bf2da7186c9 Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Mon, 25 Oct 2021 23:42:29 -0400 Subject: [PATCH] ServiceWorker streaming zip download --- assets/.eslintrc.yml | 5 + assets/js/booru.js | 5 + assets/js/utils/array.js | 22 ++- assets/js/utils/binary.ts | 45 ++++++ assets/js/utils/requests.js | 25 +++- assets/js/utils/zip.ts | 136 ++++++++++++++++++ assets/js/worker.ts | 106 ++++++++++++++ assets/webpack.config.js | 3 +- .../controllers/search/download_controller.ex | 31 ++++ lib/philomena_web/router.ex | 1 + .../templates/image/index.html.slime | 4 + lib/philomena_web/views/image_view.ex | 2 +- lib/philomena_web/views/layout_view.ex | 3 +- 13 files changed, 383 insertions(+), 5 deletions(-) create mode 100644 assets/js/utils/binary.ts create mode 100644 assets/js/utils/zip.ts create mode 100644 assets/js/worker.ts create mode 100644 lib/philomena_web/controllers/search/download_controller.ex diff --git a/assets/.eslintrc.yml b/assets/.eslintrc.yml index a0022660..21ef3b77 100644 --- a/assets/.eslintrc.yml +++ b/assets/.eslintrc.yml @@ -259,3 +259,8 @@ overrides: - '*.js' rules: '@typescript-eslint/explicit-module-boundary-types': 0 + - files: + - '*.ts' + rules: + 'no-undef': 0 + 'no-constant-condition': 0 diff --git a/assets/js/booru.js b/assets/js/booru.js index 3197f353..8ab0ba10 100644 --- a/assets/js/booru.js +++ b/assets/js/booru.js @@ -101,6 +101,11 @@ function loadBooruData() { // CSRF window.booru.csrfToken = $('meta[name="csrf-token"]').content; + + // ServiceWorker + if ('serviceWorker' in navigator && window.booru.workerPath) { + navigator.serviceWorker.register(window.booru.workerPath); + } } function BooruOnRails() { diff --git a/assets/js/utils/array.js b/assets/js/utils/array.js index 61f1edf1..e0c89986 100644 --- a/assets/js/utils/array.js +++ b/assets/js/utils/array.js @@ -10,4 +10,24 @@ function arraysEqual(array1, array2) { return true; } -export { moveElement, arraysEqual }; +/** + * @template T + * @param {T[]} array + * @param {number} numBins + * @returns {T[][]} + */ +function evenlyDivide(array, numBins) { + const bins = []; + + for (let i = 0; i < numBins; i++) { + bins[i] = []; + } + + for (let i = 0; i < array.length; i++) { + bins[i % numBins].push(array[i]); + } + + return bins; +} + +export { moveElement, arraysEqual, evenlyDivide }; diff --git a/assets/js/utils/binary.ts b/assets/js/utils/binary.ts new file mode 100644 index 00000000..2a5cf967 --- /dev/null +++ b/assets/js/utils/binary.ts @@ -0,0 +1,45 @@ +// https://stackoverflow.com/q/21001659 +export function crc32(buf: ArrayBuffer): number { + const view = new DataView(buf); + let crc = 0 ^ -1; + + for (let i = 0; i < view.byteLength; i++) { + crc ^= view.getUint8(i); + for (let j = 0; j < 8; j++) { + crc = (crc >>> 1) ^ (0xedb88320 & -(crc & 1)); + } + } + + return ~crc; +} + +// https://caniuse.com/textencoder +export function asciiEncode(s: string): ArrayBuffer { + const buf = new ArrayBuffer(s.length); + const view = new DataView(buf); + + for (let i = 0; i < s.length; i++) { + view.setUint8(i, s.charCodeAt(i) & 0xff); + } + + return buf; +} + +export type LEInt = [1 | 2 | 4 | 8, number]; +export function serialize(values: LEInt[]): ArrayBuffer { + const bufSize = values.reduce((acc, int) => acc + int[0], 0); + const buf = new ArrayBuffer(bufSize); + const view = new DataView(buf); + let offset = 0; + + for (const [size, value] of values) { + if (size === 1) view.setUint8(offset, value); + if (size === 2) view.setUint16(offset, value, true); + if (size === 4) view.setUint32(offset, value, true); + if (size === 8) view.setBigUint64(offset, BigInt(value), true); + + offset += size; + } + + return buf; +} diff --git a/assets/js/utils/requests.js b/assets/js/utils/requests.js index da0a1524..6442f72f 100644 --- a/assets/js/utils/requests.js +++ b/assets/js/utils/requests.js @@ -38,4 +38,27 @@ function handleError(response) { return response; } -export { fetchJson, fetchHtml, handleError }; +/** @returns {Promise} */ +function fetchBackoff(...fetchArgs) { + /** + * @param timeout {number} + * @returns {Promise} + */ + function fetchBackoffTimeout(timeout) { + // Adjust timeout + const newTimeout = Math.min(timeout * 2, 300000); + + // Try to fetch the thing + return fetch(...fetchArgs) + .then(handleError) + .catch(() => + new Promise(resolve => + setTimeout(() => resolve(fetchBackoffTimeout(newTimeout)), timeout) + ) + ); + } + + return fetchBackoffTimeout(5000); +} + +export { fetchJson, fetchHtml, fetchBackoff, handleError }; diff --git a/assets/js/utils/zip.ts b/assets/js/utils/zip.ts new file mode 100644 index 00000000..88fc5774 --- /dev/null +++ b/assets/js/utils/zip.ts @@ -0,0 +1,136 @@ +import { crc32, asciiEncode, serialize } from './binary'; + +interface FileInfo { + headerOffset: number; + byteLength: number; + crc32: number; + name: ArrayBuffer; +} + +// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT +// for full details of the ZIP format. +export class Zip { + fileInfo: { [key: string]: FileInfo }; + offset: number; + + constructor() { + this.fileInfo = {}; + this.offset = 0; + } + + storeFile(name: string, file: ArrayBuffer): Blob { + const crc = crc32(file); + const ns = asciiEncode(name); + + this.fileInfo[name] = { + headerOffset: this.offset, + byteLength: file.byteLength, + crc32: crc, + name: ns + }; + + const localField = serialize([ + [2, 0x0001], /* zip64 local field */ + [2, 0x0010], /* local field length (excl. header) */ + [8, file.byteLength], /* compressed size */ + [8, file.byteLength] /* uncompressed size */ + ]); + + const header = serialize([ + [4, 0x04034b50], /* local header signature */ + [2, 0x002d], /* version = zip64 */ + [2, 0x0000], /* flags = none */ + [2, 0x0000], /* compression = store */ + [2, 0x0000], /* time = 00:00 */ + [2, 0x0000], /* date = 1980-01-01 */ + [4, crc], /* file crc32 */ + [4, 0xffffffff], /* zip64 compressed size */ + [4, 0xffffffff], /* zip64 uncompressed size */ + [2, ns.byteLength], /* length of name */ + [2, localField.byteLength] /* length of local field */ + ]); + + this.offset += header.byteLength + ns.byteLength + localField.byteLength + file.byteLength; + return new Blob([header, ns, localField, file]); + } + + finalize(): Blob { + const segments = []; + const cdOff = this.offset; + let numFiles = 0; + + for (const name in this.fileInfo) { + const info = this.fileInfo[name]; + + const cdField = serialize([ + [2, 0x0001], /* zip64 central field */ + [2, 0x0018], /* central field length (excl. header) */ + [8, info.byteLength], /* compressed size */ + [8, info.byteLength], /* uncompressed size */ + [8, info.headerOffset] /* local header offset */ + ]); + + const cdEntry = serialize([ + [4, 0x02014b50], /* CD entry signature */ + [2, 0x002d], /* created with zip64 */ + [2, 0x002d], /* extract with zip64 */ + [2, 0x0000], /* flags = none */ + [2, 0x0000], /* compression = store */ + [2, 0x0000], /* time = 00:00 */ + [2, 0x0000], /* date = 1980-01-01 */ + [4, info.crc32], /* file crc32 */ + [4, 0xffffffff], /* zip64 compressed size */ + [4, 0xffffffff], /* zip64 uncompressed size */ + [2, info.name.byteLength], /* length of name */ + [2, cdField.byteLength], /* length of central field */ + [2, 0x0000], /* comment length */ + [2, 0x0000], /* disk number */ + [2, 0x0000], /* internal attributes */ + [4, 0x00000000], /* external attributes */ + [4, 0xffffffff], /* zip64 local header offset */ + ]); + + this.offset += cdEntry.byteLength + info.name.byteLength + cdField.byteLength; + segments.push(cdEntry, info.name, cdField); + + numFiles++; + } + + const endCdOff = this.offset; + const endCd64 = serialize([ + [4, 0x06064b50], /* zip64 end of CD signature */ + [8, 44], /* size of end of CD */ + [2, 0x002d], /* created with zip64 */ + [2, 0x002d], /* extract with zip64 */ + [4, 0x00000000], /* this disk number */ + [4, 0x00000000], /* starting disk number */ + [8, numFiles], /* number of files on this disk */ + [8, numFiles], /* total number of files */ + [8, endCdOff - cdOff], /* size of CD */ + [8, cdOff] /* location of CD */ + ]); + + const endLoc64 = serialize([ + [4, 0x07064b50], /* zip64 end of CD locator */ + [4, 0x00000000], /* disk number of CD */ + [8, endCdOff], /* location of end of CD */ + [4, 1] /* number of disks */ + ]); + + const endCd = serialize([ + [4, 0x06054b50], /* end of CD */ + [2, 0x0000], /* this disk number */ + [2, 0x0000], /* starting disk number */ + [2, numFiles], /* number of files on this disk */ + [2, numFiles], /* total number of files */ + [4, endCdOff - cdOff], /* size of CD */ + [4, 0xffffffff], /* zip64 location of CD */ + [2, 0x0000] /* comment length */ + ]); + + this.offset += endCd64.byteLength + endLoc64.byteLength + endCd.byteLength; + segments.push(endCd64, endLoc64, endCd); + + return new Blob(segments); + } +} diff --git a/assets/js/worker.ts b/assets/js/worker.ts new file mode 100644 index 00000000..8ed02008 --- /dev/null +++ b/assets/js/worker.ts @@ -0,0 +1,106 @@ +/// + +import { evenlyDivide } from 'utils/array'; +import { fetchBackoff } from 'utils/requests'; +import { Zip } from 'utils/zip'; + +declare const self: ServiceWorkerGlobalScope; + +const wait = (ms: number): Promise => new Promise(resolve => setTimeout(resolve, ms)); +const buffer = (blob: Blob) => blob.arrayBuffer().then(buf => new Uint8Array(buf)); +const json = (resp: Response) => resp.json(); +const blob = (resp: Response) => resp.blob(); + +interface Image { + id: number; + name: string; + view_url: string; // eslint-disable-line camelcase +} + +interface PageResult { + images: Image[]; + total: number; +} + +function handleStream(event: FetchEvent, url: URL): void { + const concurrency = parseInt(url.searchParams.get('concurrency') || '1', 5); + const queryString = url.searchParams.get('q'); + const failures = []; + const zipper = new Zip(); + + if (!queryString) { + return event.respondWith(new Response('No query specified', { status: 400 })); + } + + // Maximum ID to fetch -- start with largest possible ID + let maxId = (2 ** 31) - 1; + + const stream = new ReadableStream({ + pull(controller) { + // Path to fetch next + const nextQuery = encodeURIComponent(`(${queryString}),id.lte:${maxId}`); + + return fetchBackoff(`/search/download?q=${nextQuery}`) + .then(json) + .then(({ images, total }: PageResult): Promise => { + if (total === 0) { + // Done, no results left + // Finalize zip and close stream to prevent any further pulls + return buffer(zipper.finalize()) + .then(buf => { + controller.enqueue(buf); + controller.close(); + }); + } + + // Decrease maximum ID for next round below current minimum + maxId = images[images.length - 1].id - 1; + + // Set up concurrent fetches + const imageBins = evenlyDivide(images, concurrency); + const fetchers = imageBins.map(downloadIntoZip); + + // Run all concurrent fetches + return Promise + .all(fetchers) + .then(() => wait(5000)); + }); + + + // Function to fetch each image and push it into the zip stream + function downloadIntoZip(images: Image[]): Promise { + let promise = Promise.resolve(); + + // eslint-disable-next-line camelcase + for (const { name, view_url } of images) { + promise = promise + .then(() => fetchBackoff(view_url)).then(blob).then(buffer) + .then(file => zipper.storeFile(name, file.buffer)).then(buffer) + .then(entry => controller.enqueue(entry)) + .catch(() => { failures.push(view_url); }); + } + + return promise; + } + } + }); + + event.respondWith(new Response(stream, { + headers: { + 'content-type': 'application/x-zip', + 'content-disposition': 'attachment; filename="image_export.zip"' + } + })); +} + +self.addEventListener('fetch', event => { + const url = new URL(event.request.url); + + // Streaming path + if (url.pathname === '/js/stream') return handleStream(event, url); + + // Otherwise, not destined for us + return event.respondWith(fetch(event.request)); +}); + +export default null; diff --git a/assets/webpack.config.js b/assets/webpack.config.js index 54a254a6..f03eb2fa 100644 --- a/assets/webpack.config.js +++ b/assets/webpack.config.js @@ -60,6 +60,7 @@ module.exports = { mode: isDevelopment ? 'development' : 'production', entry: { 'js/app.js': './js/app.js', + 'js/worker.js': './js/worker.ts', ...themes }, output: { @@ -92,7 +93,7 @@ module.exports = { }, }, { - test: /app\.js/, + test: /(app\.js|worker\.ts)/, use: [ { loader: 'webpack-rollup-loader', diff --git a/lib/philomena_web/controllers/search/download_controller.ex b/lib/philomena_web/controllers/search/download_controller.ex new file mode 100644 index 00000000..89b1fad4 --- /dev/null +++ b/lib/philomena_web/controllers/search/download_controller.ex @@ -0,0 +1,31 @@ +defmodule PhilomenaWeb.Search.DownloadController do + use PhilomenaWeb, :controller + + alias PhilomenaWeb.ImageLoader + alias Philomena.Elasticsearch + alias Philomena.Images.Image + import Ecto.Query + + def index(conn, params) do + options = [pagination: %{page_number: 1, page_size: 50}] + queryable = Image |> preload([:user, :intensity, tags: :aliases]) + + case ImageLoader.search_string(conn, params["q"], options) do + {:ok, {images, _tags}} -> + images = Elasticsearch.search_records(images, queryable) + + conn + |> put_view(PhilomenaWeb.Api.Json.ImageView) + |> render("index.json", + images: images, + total: images.total_entries, + interactions: [] + ) + + {:error, msg} -> + conn + |> Plug.Conn.put_status(:bad_request) + |> json(%{error: msg}) + end + end +end diff --git a/lib/philomena_web/router.ex b/lib/philomena_web/router.ex index 2479a2d1..090691b7 100644 --- a/lib/philomena_web/router.ex +++ b/lib/philomena_web/router.ex @@ -462,6 +462,7 @@ defmodule PhilomenaWeb.Router do scope "/search", Search, as: :search do resources "/reverse", ReverseController, only: [:index, :create] + resources "/download", DownloadController, only: [:index] end resources "/search", SearchController, only: [:index] diff --git a/lib/philomena_web/templates/image/index.html.slime b/lib/philomena_web/templates/image/index.html.slime index a61acf49..2fbadb27 100644 --- a/lib/philomena_web/templates/image/index.html.slime +++ b/lib/philomena_web/templates/image/index.html.slime @@ -17,6 +17,10 @@ elixir: .page__pagination = pagination .flex__right.page__info + a.js-download href="#" data-query=@conn.params["q"] title="Download" + i.fa.fa-download> + span.hide-mobile.hide-limited-desktop Download + = random_button @conn, params = hidden_toggle @conn, route, params = deleted_toggle @conn, route, params diff --git a/lib/philomena_web/views/image_view.ex b/lib/philomena_web/views/image_view.ex index 071e3b8b..77b5a54b 100644 --- a/lib/philomena_web/views/image_view.ex +++ b/lib/philomena_web/views/image_view.ex @@ -118,7 +118,7 @@ defmodule PhilomenaWeb.ImageView do "#{root}/#{view}/#{year}/#{month}/#{day}/#{filename}.#{format}" end - defp verbose_file_name(image) do + def verbose_file_name(image) do # Truncate filename to 150 characters, making room for the path + filename on Windows # https://stackoverflow.com/questions/265769/maximum-filename-length-in-ntfs-windows-xp-and-windows-vista file_name_slug_fragment = diff --git a/lib/philomena_web/views/layout_view.ex b/lib/philomena_web/views/layout_view.ex index f53db5d5..51367216 100644 --- a/lib/philomena_web/views/layout_view.ex +++ b/lib/philomena_web/views/layout_view.ex @@ -52,7 +52,8 @@ defmodule PhilomenaWeb.LayoutView do fancy_tag_upload: if(user, do: user.fancy_tag_field_on_upload, else: true), interactions: Jason.encode!(interactions), ignored_tag_list: Jason.encode!(ignored_tag_list(conn.assigns[:tags])), - hide_staff_tools: conn.cookies["hide_staff_tools"] + hide_staff_tools: conn.cookies["hide_staff_tools"], + worker_path: Routes.static_path(conn, "/js/worker.js") ] data = Keyword.merge(data, extra)