Add update task

This commit is contained in:
Joakim Soderlund 2017-08-01 22:20:45 +02:00
parent a2473d31ae
commit c41e7be45d
2 changed files with 482 additions and 0 deletions

213
fimfarchive/tasks/update.py Normal file
View file

@ -0,0 +1,213 @@
"""
Update task.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import time
from typing import Optional
from fimfarchive.exceptions import InvalidStoryError
from fimfarchive.fetchers import Fetcher
from fimfarchive.flavors import DataFormat, UpdateStatus
from fimfarchive.mappers import StoryPathMapper
from fimfarchive.selectors import UpdateSelector
from fimfarchive.signals import Signal, SignalSender
from fimfarchive.stories import Story
from fimfarchive.utils import PersistedDict
from fimfarchive.writers import DirectoryWriter
DEFAULT_WORKDIR = 'worktree/update'
DEFAULT_RETRIES = 10
DEFAULT_SKIPS = 500
#
# Lowering request delays could jeopordize the future of the archive.
#
# These timings are here so that the updater does not flood Fimfiction
# with requests. One spammy synchronous client would not take down the
# site. It might however make the site owners want to prevent archiving
# in the future. Consider downloading a Fimfarchive release instead.
#
# http://www.fimfarchive.net/
#
SUCCESS_DELAY = 10
SKIPPED_DELAY = 2
FAILURE_DELAY = 300
class UpdateTask(SignalSender):
"""
Updates Fimfarchive.
"""
on_attempt = Signal('key', 'skips', 'retries')
on_success = Signal('key', 'story')
on_skipped = Signal('key', 'story')
on_failure = Signal('key', 'error')
state_file = 'state.json'
state_vars = {'key': 0}
def __init__(
self,
fimfarchive: Fetcher,
fimfiction: Fetcher,
workdir: str = DEFAULT_WORKDIR,
retries: int = DEFAULT_RETRIES,
skips: int = DEFAULT_SKIPS,
) -> None:
"""
Constructor.
Args:
fimfarchive: Fetcher for the old release.
fimfiction: Fetcher for the new release.
workdir: Path for storage of state and stories.
retries: Number of retries before giving up.
skips: Number of skips before giving up.
"""
super().__init__()
self.fimfarchive = fimfarchive
self.fimfiction = fimfiction
self.workdir = workdir
self.retries = retries
self.skips = skips
os.makedirs(self.workdir, exist_ok=True)
state_path = os.path.join(self.workdir, self.state_file)
self.state = PersistedDict(state_path, self.state_vars)
self.select = UpdateSelector()
meta_mapper = self.get_mapper('meta')
skip_mapper = self.get_mapper('skip')
epub_mapper = self.get_mapper('epub')
html_mapper = self.get_mapper('html')
self.skip_writer = DirectoryWriter(skip_mapper)
self.epub_writer = DirectoryWriter(meta_mapper, epub_mapper)
self.html_writer = DirectoryWriter(meta_mapper, html_mapper)
def get_mapper(self, subdir: str) -> StoryPathMapper:
"""
Creates a mapper to the specified subdirectory.
Args:
subdir: Subdirectory for the mapper.
"""
directory = os.path.join(self.workdir, subdir)
return StoryPathMapper(directory)
def fetch(self, fetcher: Fetcher, key: int) -> Optional[Story]:
"""
Fetches a story unless invalid.
Args:
fetcher: Source for the story.
key: Primary key of the story.
Raises:
StorySourceError: If the fetcher fails.
"""
try:
return fetcher.fetch(key)
except InvalidStoryError:
return None
def write(self, story: Story) -> None:
"""
Passes the story to the appropriate writer.
Args:
story: Object to write.
Raises:
ValueError: If story flavor is unsupported.
"""
if DataFormat.HTML in story.flavors:
self.html_writer.write(story)
elif DataFormat.EPUB in story.flavors:
self.epub_writer.write(story)
else:
raise ValueError("Unsupported story flavor.")
def update(self, key: int) -> Optional[Story]:
"""
Updates the specified story.
args:
key: Primary key of the story to update.
Raises:
StorySourceError: If any fetcher fails.
"""
old = self.fetch(self.fimfarchive, key)
new = self.fetch(self.fimfiction, key)
selected = self.select(old, new)
if selected and UpdateStatus.REVIVED in selected.flavors:
selected = selected.merge(meta=new.meta)
if selected:
self.write(selected)
elif new:
self.skip_writer.write(new)
elif old:
self.skip_writer.write(old)
return selected
def run(self) -> None:
"""
Runs the updater task.
"""
retried = 0
skipped = 0
while skipped < self.skips and retried < self.retries:
key = self.state['key']
self.on_attempt(key, skipped, retried)
try:
story = self.update(key)
except Exception as e:
retried += 1
self.on_failure(key, e)
time.sleep(FAILURE_DELAY)
else:
retried = 0
self.state['key'] += 1
self.state.save()
if story:
skipped = 0
self.on_success(key, story)
time.sleep(SUCCESS_DELAY)
else:
skipped += 1
self.on_skipped(key, story)
time.sleep(SKIPPED_DELAY)

269
tests/tasks/test_update.py Normal file
View file

@ -0,0 +1,269 @@
"""
Update task tests.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from unittest.mock import MagicMock, call, patch
import pytest
from fimfarchive.exceptions import InvalidStoryError, StorySourceError
from fimfarchive.fetchers import Fetcher
from fimfarchive.flavors import DataFormat, UpdateStatus
from fimfarchive.stories import Story
from fimfarchive.tasks.update import (
UpdateTask, SUCCESS_DELAY, SKIPPED_DELAY, FAILURE_DELAY,
)
class DummyFetcher(Fetcher):
"""
Fetcher with local instance storage.
"""
def __init__(self):
"""
Constructor.
"""
self.stories = dict()
def add(self, key, date, flavors=()):
"""
Adds a story to the fetcher.
"""
story = Story(
key=key,
flavors=flavors,
data=f'Story {key}',
meta={
'id': key,
'date_modified': date,
'chapters': [
{'id': key},
],
},
)
self.stories[key] = story
return story
def fetch(self, key):
"""
Returns a previously stored story.
"""
try:
return self.stories[key]
except KeyError:
raise InvalidStoryError()
class TestUpdateTask:
"""
Tests update task.
"""
@pytest.fixture
def fimfiction(self):
"""
Returns a `Fetcher` simulating Fimfiction.
"""
return DummyFetcher()
@pytest.fixture
def fimfarchive(self):
"""
Returns a `Fetcher` simulating Fimfarchive.
"""
return DummyFetcher()
@pytest.fixture
def task(self, fimfarchive, fimfiction, tmpdir):
"""
Returns an `UpdateTask` instance.
"""
return UpdateTask(
fimfiction=fimfiction,
fimfarchive=fimfarchive,
workdir=str(tmpdir),
retries=2,
skips=2,
)
def verify_run(self, task, delays):
"""
Runs the task and verifies delays.
"""
calls = [call(delay) for delay in delays]
with patch('time.sleep') as m:
task.run()
m.assert_has_calls(calls)
def verify_fetch(self, task, target, status):
"""
Runs the task and verifies a regular fetch.
"""
task.write = MagicMock(side_effect=lambda story: story)
delays = (
SKIPPED_DELAY,
SUCCESS_DELAY,
SKIPPED_DELAY,
SKIPPED_DELAY,
)
self.verify_run(task, delays)
task.write.assert_called_once_with(target)
assert status in target.flavors
assert task.state['key'] == 4
def verify_empty(self, task, fetcher):
"""
Runs the task and verifies an empty fetch.
"""
task.skip_writer.write = MagicMock()
target = fetcher.add(key=1, date=1)
target.meta['chapters'].clear()
delays = (
SKIPPED_DELAY,
SKIPPED_DELAY,
)
self.verify_run(task, delays)
task.skip_writer.write.assert_called_once_with(target)
def verify_failure(self, task, fetcher):
"""
Runs the task and verifies a failed fetch.
"""
task.write = MagicMock(side_effect=lambda story: story)
fetcher.fetch = MagicMock(side_effect=StorySourceError)
delays = (
FAILURE_DELAY,
FAILURE_DELAY,
)
self.verify_run(task, delays)
task.write.assert_not_called()
def test_created_story(self, task, fimfiction):
"""
Tests updating for a created story.
"""
target = fimfiction.add(key=1, date=1)
self.verify_fetch(task, target, UpdateStatus.CREATED)
def test_revived_story(self, task, fimfarchive, fimfiction):
"""
Tests updating for a revived story.
"""
target = fimfarchive.add(key=1, date=1)
other = fimfiction.add(key=1, date=1)
target.merge = MagicMock(return_value=target)
self.verify_fetch(task, target, UpdateStatus.REVIVED)
target.merge.assert_called_once_with(meta=other.meta)
def test_updated_story(self, task, fimfarchive, fimfiction):
"""
Tests updating for an updated story.
"""
fimfarchive.add(key=1, date=0)
target = fimfiction.add(key=1, date=1)
self.verify_fetch(task, target, UpdateStatus.UPDATED)
def test_deleted_story(self, task, fimfarchive):
"""
Test updating for a deleted story.
"""
target = fimfarchive.add(key=1, date=1)
self.verify_fetch(task, target, UpdateStatus.DELETED)
def test_cleared_story(self, task, fimfarchive, fimfiction):
"""
Tests updating for a cleared story.
"""
target = fimfarchive.add(key=1, date=0)
other = fimfiction.add(key=1, date=1)
other.meta['chapters'].clear()
self.verify_fetch(task, target, UpdateStatus.DELETED)
def test_empty_fimfiction_story(self, task, fimfiction):
"""
Tests updating for an empty story from fimfiction.
"""
self.verify_empty(task, fimfiction)
def test_empty_fimfarchive_story(self, task, fimfarchive):
"""
Tests updating for an empty story from fimfarchive.
"""
self.verify_empty(task, fimfarchive)
def test_fimfarchive_failure(self, task, fimfarchive):
"""
Tests handling of a failure in Fimfarchive.
"""
self.verify_failure(task, fimfarchive)
def test_fimfiction_failure(self, task, fimfiction):
"""
Tests handling of a failure in Fimfiction.
"""
self.verify_failure(task, fimfiction)
def test_write_epub(self, task, story):
"""
Tests writing of a story in EPUB format.
"""
story = story.merge(flavors=[DataFormat.EPUB])
task.epub_writer.write = MagicMock()
task.write(story)
task.epub_writer.write.assert_called_once_with(story)
def test_write_html(self, task, story):
"""
Tests writing of a story in HTML format.
"""
story = story.merge(flavors=[DataFormat.HTML])
task.html_writer.write = MagicMock()
task.write(story)
task.html_writer.write.assert_called_once_with(story)
def test_write_unsupported(self, task, story):
"""
Tests `ValueError` is raised for unknown data formats.
"""
story = story.merge(flavors=[DataFormat.FPUB])
with pytest.raises(ValueError):
task.write(story)