diff --git a/fimfarchive/tasks/update.py b/fimfarchive/tasks/update.py new file mode 100644 index 0000000..ed9d7b4 --- /dev/null +++ b/fimfarchive/tasks/update.py @@ -0,0 +1,213 @@ +""" +Update task. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import os +import time +from typing import Optional + +from fimfarchive.exceptions import InvalidStoryError +from fimfarchive.fetchers import Fetcher +from fimfarchive.flavors import DataFormat, UpdateStatus +from fimfarchive.mappers import StoryPathMapper +from fimfarchive.selectors import UpdateSelector +from fimfarchive.signals import Signal, SignalSender +from fimfarchive.stories import Story +from fimfarchive.utils import PersistedDict +from fimfarchive.writers import DirectoryWriter + + +DEFAULT_WORKDIR = 'worktree/update' +DEFAULT_RETRIES = 10 +DEFAULT_SKIPS = 500 + + +# +# Lowering request delays could jeopordize the future of the archive. +# +# These timings are here so that the updater does not flood Fimfiction +# with requests. One spammy synchronous client would not take down the +# site. It might however make the site owners want to prevent archiving +# in the future. Consider downloading a Fimfarchive release instead. +# +# http://www.fimfarchive.net/ +# + +SUCCESS_DELAY = 10 +SKIPPED_DELAY = 2 +FAILURE_DELAY = 300 + + +class UpdateTask(SignalSender): + """ + Updates Fimfarchive. + """ + on_attempt = Signal('key', 'skips', 'retries') + on_success = Signal('key', 'story') + on_skipped = Signal('key', 'story') + on_failure = Signal('key', 'error') + + state_file = 'state.json' + state_vars = {'key': 0} + + def __init__( + self, + fimfarchive: Fetcher, + fimfiction: Fetcher, + workdir: str = DEFAULT_WORKDIR, + retries: int = DEFAULT_RETRIES, + skips: int = DEFAULT_SKIPS, + ) -> None: + """ + Constructor. + + Args: + fimfarchive: Fetcher for the old release. + fimfiction: Fetcher for the new release. + workdir: Path for storage of state and stories. + retries: Number of retries before giving up. + skips: Number of skips before giving up. + """ + super().__init__() + + self.fimfarchive = fimfarchive + self.fimfiction = fimfiction + self.workdir = workdir + self.retries = retries + self.skips = skips + + os.makedirs(self.workdir, exist_ok=True) + state_path = os.path.join(self.workdir, self.state_file) + self.state = PersistedDict(state_path, self.state_vars) + + self.select = UpdateSelector() + + meta_mapper = self.get_mapper('meta') + skip_mapper = self.get_mapper('skip') + epub_mapper = self.get_mapper('epub') + html_mapper = self.get_mapper('html') + + self.skip_writer = DirectoryWriter(skip_mapper) + self.epub_writer = DirectoryWriter(meta_mapper, epub_mapper) + self.html_writer = DirectoryWriter(meta_mapper, html_mapper) + + def get_mapper(self, subdir: str) -> StoryPathMapper: + """ + Creates a mapper to the specified subdirectory. + + Args: + subdir: Subdirectory for the mapper. + """ + directory = os.path.join(self.workdir, subdir) + return StoryPathMapper(directory) + + def fetch(self, fetcher: Fetcher, key: int) -> Optional[Story]: + """ + Fetches a story unless invalid. + + Args: + fetcher: Source for the story. + key: Primary key of the story. + + Raises: + StorySourceError: If the fetcher fails. + """ + try: + return fetcher.fetch(key) + except InvalidStoryError: + return None + + def write(self, story: Story) -> None: + """ + Passes the story to the appropriate writer. + + Args: + story: Object to write. + + Raises: + ValueError: If story flavor is unsupported. + """ + if DataFormat.HTML in story.flavors: + self.html_writer.write(story) + elif DataFormat.EPUB in story.flavors: + self.epub_writer.write(story) + else: + raise ValueError("Unsupported story flavor.") + + def update(self, key: int) -> Optional[Story]: + """ + Updates the specified story. + + args: + key: Primary key of the story to update. + + Raises: + StorySourceError: If any fetcher fails. + """ + old = self.fetch(self.fimfarchive, key) + new = self.fetch(self.fimfiction, key) + selected = self.select(old, new) + + if selected and UpdateStatus.REVIVED in selected.flavors: + selected = selected.merge(meta=new.meta) + + if selected: + self.write(selected) + elif new: + self.skip_writer.write(new) + elif old: + self.skip_writer.write(old) + + return selected + + def run(self) -> None: + """ + Runs the updater task. + """ + retried = 0 + skipped = 0 + + while skipped < self.skips and retried < self.retries: + key = self.state['key'] + + self.on_attempt(key, skipped, retried) + + try: + story = self.update(key) + except Exception as e: + retried += 1 + self.on_failure(key, e) + time.sleep(FAILURE_DELAY) + else: + retried = 0 + self.state['key'] += 1 + self.state.save() + + if story: + skipped = 0 + self.on_success(key, story) + time.sleep(SUCCESS_DELAY) + else: + skipped += 1 + self.on_skipped(key, story) + time.sleep(SKIPPED_DELAY) diff --git a/tests/tasks/test_update.py b/tests/tasks/test_update.py new file mode 100644 index 0000000..8939968 --- /dev/null +++ b/tests/tasks/test_update.py @@ -0,0 +1,269 @@ +""" +Update task tests. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from unittest.mock import MagicMock, call, patch + +import pytest + +from fimfarchive.exceptions import InvalidStoryError, StorySourceError +from fimfarchive.fetchers import Fetcher +from fimfarchive.flavors import DataFormat, UpdateStatus +from fimfarchive.stories import Story +from fimfarchive.tasks.update import ( + UpdateTask, SUCCESS_DELAY, SKIPPED_DELAY, FAILURE_DELAY, +) + + +class DummyFetcher(Fetcher): + """ + Fetcher with local instance storage. + """ + + def __init__(self): + """ + Constructor. + """ + self.stories = dict() + + def add(self, key, date, flavors=()): + """ + Adds a story to the fetcher. + """ + story = Story( + key=key, + flavors=flavors, + data=f'Story {key}', + meta={ + 'id': key, + 'date_modified': date, + 'chapters': [ + {'id': key}, + ], + }, + ) + + self.stories[key] = story + + return story + + def fetch(self, key): + """ + Returns a previously stored story. + """ + try: + return self.stories[key] + except KeyError: + raise InvalidStoryError() + + +class TestUpdateTask: + """ + Tests update task. + """ + + @pytest.fixture + def fimfiction(self): + """ + Returns a `Fetcher` simulating Fimfiction. + """ + return DummyFetcher() + + @pytest.fixture + def fimfarchive(self): + """ + Returns a `Fetcher` simulating Fimfarchive. + """ + return DummyFetcher() + + @pytest.fixture + def task(self, fimfarchive, fimfiction, tmpdir): + """ + Returns an `UpdateTask` instance. + """ + return UpdateTask( + fimfiction=fimfiction, + fimfarchive=fimfarchive, + workdir=str(tmpdir), + retries=2, + skips=2, + ) + + def verify_run(self, task, delays): + """ + Runs the task and verifies delays. + """ + calls = [call(delay) for delay in delays] + + with patch('time.sleep') as m: + task.run() + m.assert_has_calls(calls) + + def verify_fetch(self, task, target, status): + """ + Runs the task and verifies a regular fetch. + """ + task.write = MagicMock(side_effect=lambda story: story) + + delays = ( + SKIPPED_DELAY, + SUCCESS_DELAY, + SKIPPED_DELAY, + SKIPPED_DELAY, + ) + + self.verify_run(task, delays) + task.write.assert_called_once_with(target) + assert status in target.flavors + assert task.state['key'] == 4 + + def verify_empty(self, task, fetcher): + """ + Runs the task and verifies an empty fetch. + """ + task.skip_writer.write = MagicMock() + + target = fetcher.add(key=1, date=1) + target.meta['chapters'].clear() + + delays = ( + SKIPPED_DELAY, + SKIPPED_DELAY, + ) + + self.verify_run(task, delays) + task.skip_writer.write.assert_called_once_with(target) + + def verify_failure(self, task, fetcher): + """ + Runs the task and verifies a failed fetch. + """ + task.write = MagicMock(side_effect=lambda story: story) + fetcher.fetch = MagicMock(side_effect=StorySourceError) + + delays = ( + FAILURE_DELAY, + FAILURE_DELAY, + ) + + self.verify_run(task, delays) + task.write.assert_not_called() + + def test_created_story(self, task, fimfiction): + """ + Tests updating for a created story. + """ + target = fimfiction.add(key=1, date=1) + + self.verify_fetch(task, target, UpdateStatus.CREATED) + + def test_revived_story(self, task, fimfarchive, fimfiction): + """ + Tests updating for a revived story. + """ + target = fimfarchive.add(key=1, date=1) + other = fimfiction.add(key=1, date=1) + + target.merge = MagicMock(return_value=target) + self.verify_fetch(task, target, UpdateStatus.REVIVED) + target.merge.assert_called_once_with(meta=other.meta) + + def test_updated_story(self, task, fimfarchive, fimfiction): + """ + Tests updating for an updated story. + """ + fimfarchive.add(key=1, date=0) + target = fimfiction.add(key=1, date=1) + + self.verify_fetch(task, target, UpdateStatus.UPDATED) + + def test_deleted_story(self, task, fimfarchive): + """ + Test updating for a deleted story. + """ + target = fimfarchive.add(key=1, date=1) + + self.verify_fetch(task, target, UpdateStatus.DELETED) + + def test_cleared_story(self, task, fimfarchive, fimfiction): + """ + Tests updating for a cleared story. + """ + target = fimfarchive.add(key=1, date=0) + other = fimfiction.add(key=1, date=1) + other.meta['chapters'].clear() + + self.verify_fetch(task, target, UpdateStatus.DELETED) + + def test_empty_fimfiction_story(self, task, fimfiction): + """ + Tests updating for an empty story from fimfiction. + """ + self.verify_empty(task, fimfiction) + + def test_empty_fimfarchive_story(self, task, fimfarchive): + """ + Tests updating for an empty story from fimfarchive. + """ + self.verify_empty(task, fimfarchive) + + def test_fimfarchive_failure(self, task, fimfarchive): + """ + Tests handling of a failure in Fimfarchive. + """ + self.verify_failure(task, fimfarchive) + + def test_fimfiction_failure(self, task, fimfiction): + """ + Tests handling of a failure in Fimfiction. + """ + self.verify_failure(task, fimfiction) + + def test_write_epub(self, task, story): + """ + Tests writing of a story in EPUB format. + """ + story = story.merge(flavors=[DataFormat.EPUB]) + task.epub_writer.write = MagicMock() + + task.write(story) + task.epub_writer.write.assert_called_once_with(story) + + def test_write_html(self, task, story): + """ + Tests writing of a story in HTML format. + """ + story = story.merge(flavors=[DataFormat.HTML]) + task.html_writer.write = MagicMock() + + task.write(story) + task.html_writer.write.assert_called_once_with(story) + + def test_write_unsupported(self, task, story): + """ + Tests `ValueError` is raised for unknown data formats. + """ + story = story.merge(flavors=[DataFormat.FPUB]) + + with pytest.raises(ValueError): + task.write(story)