mirror of
https://github.com/JockeTF/fimfarchive.git
synced 2024-11-29 16:18:00 +01:00
Add update task
This commit is contained in:
parent
a2473d31ae
commit
c41e7be45d
2 changed files with 482 additions and 0 deletions
213
fimfarchive/tasks/update.py
Normal file
213
fimfarchive/tasks/update.py
Normal file
|
@ -0,0 +1,213 @@
|
||||||
|
"""
|
||||||
|
Update task.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fimfarchive, preserves stories from Fimfiction.
|
||||||
|
# Copyright (C) 2015 Joakim Soderlund
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fimfarchive.exceptions import InvalidStoryError
|
||||||
|
from fimfarchive.fetchers import Fetcher
|
||||||
|
from fimfarchive.flavors import DataFormat, UpdateStatus
|
||||||
|
from fimfarchive.mappers import StoryPathMapper
|
||||||
|
from fimfarchive.selectors import UpdateSelector
|
||||||
|
from fimfarchive.signals import Signal, SignalSender
|
||||||
|
from fimfarchive.stories import Story
|
||||||
|
from fimfarchive.utils import PersistedDict
|
||||||
|
from fimfarchive.writers import DirectoryWriter
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_WORKDIR = 'worktree/update'
|
||||||
|
DEFAULT_RETRIES = 10
|
||||||
|
DEFAULT_SKIPS = 500
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Lowering request delays could jeopordize the future of the archive.
|
||||||
|
#
|
||||||
|
# These timings are here so that the updater does not flood Fimfiction
|
||||||
|
# with requests. One spammy synchronous client would not take down the
|
||||||
|
# site. It might however make the site owners want to prevent archiving
|
||||||
|
# in the future. Consider downloading a Fimfarchive release instead.
|
||||||
|
#
|
||||||
|
# http://www.fimfarchive.net/
|
||||||
|
#
|
||||||
|
|
||||||
|
SUCCESS_DELAY = 10
|
||||||
|
SKIPPED_DELAY = 2
|
||||||
|
FAILURE_DELAY = 300
|
||||||
|
|
||||||
|
|
||||||
|
class UpdateTask(SignalSender):
|
||||||
|
"""
|
||||||
|
Updates Fimfarchive.
|
||||||
|
"""
|
||||||
|
on_attempt = Signal('key', 'skips', 'retries')
|
||||||
|
on_success = Signal('key', 'story')
|
||||||
|
on_skipped = Signal('key', 'story')
|
||||||
|
on_failure = Signal('key', 'error')
|
||||||
|
|
||||||
|
state_file = 'state.json'
|
||||||
|
state_vars = {'key': 0}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
fimfarchive: Fetcher,
|
||||||
|
fimfiction: Fetcher,
|
||||||
|
workdir: str = DEFAULT_WORKDIR,
|
||||||
|
retries: int = DEFAULT_RETRIES,
|
||||||
|
skips: int = DEFAULT_SKIPS,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fimfarchive: Fetcher for the old release.
|
||||||
|
fimfiction: Fetcher for the new release.
|
||||||
|
workdir: Path for storage of state and stories.
|
||||||
|
retries: Number of retries before giving up.
|
||||||
|
skips: Number of skips before giving up.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.fimfarchive = fimfarchive
|
||||||
|
self.fimfiction = fimfiction
|
||||||
|
self.workdir = workdir
|
||||||
|
self.retries = retries
|
||||||
|
self.skips = skips
|
||||||
|
|
||||||
|
os.makedirs(self.workdir, exist_ok=True)
|
||||||
|
state_path = os.path.join(self.workdir, self.state_file)
|
||||||
|
self.state = PersistedDict(state_path, self.state_vars)
|
||||||
|
|
||||||
|
self.select = UpdateSelector()
|
||||||
|
|
||||||
|
meta_mapper = self.get_mapper('meta')
|
||||||
|
skip_mapper = self.get_mapper('skip')
|
||||||
|
epub_mapper = self.get_mapper('epub')
|
||||||
|
html_mapper = self.get_mapper('html')
|
||||||
|
|
||||||
|
self.skip_writer = DirectoryWriter(skip_mapper)
|
||||||
|
self.epub_writer = DirectoryWriter(meta_mapper, epub_mapper)
|
||||||
|
self.html_writer = DirectoryWriter(meta_mapper, html_mapper)
|
||||||
|
|
||||||
|
def get_mapper(self, subdir: str) -> StoryPathMapper:
|
||||||
|
"""
|
||||||
|
Creates a mapper to the specified subdirectory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subdir: Subdirectory for the mapper.
|
||||||
|
"""
|
||||||
|
directory = os.path.join(self.workdir, subdir)
|
||||||
|
return StoryPathMapper(directory)
|
||||||
|
|
||||||
|
def fetch(self, fetcher: Fetcher, key: int) -> Optional[Story]:
|
||||||
|
"""
|
||||||
|
Fetches a story unless invalid.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fetcher: Source for the story.
|
||||||
|
key: Primary key of the story.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
StorySourceError: If the fetcher fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return fetcher.fetch(key)
|
||||||
|
except InvalidStoryError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def write(self, story: Story) -> None:
|
||||||
|
"""
|
||||||
|
Passes the story to the appropriate writer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
story: Object to write.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If story flavor is unsupported.
|
||||||
|
"""
|
||||||
|
if DataFormat.HTML in story.flavors:
|
||||||
|
self.html_writer.write(story)
|
||||||
|
elif DataFormat.EPUB in story.flavors:
|
||||||
|
self.epub_writer.write(story)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unsupported story flavor.")
|
||||||
|
|
||||||
|
def update(self, key: int) -> Optional[Story]:
|
||||||
|
"""
|
||||||
|
Updates the specified story.
|
||||||
|
|
||||||
|
args:
|
||||||
|
key: Primary key of the story to update.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
StorySourceError: If any fetcher fails.
|
||||||
|
"""
|
||||||
|
old = self.fetch(self.fimfarchive, key)
|
||||||
|
new = self.fetch(self.fimfiction, key)
|
||||||
|
selected = self.select(old, new)
|
||||||
|
|
||||||
|
if selected and UpdateStatus.REVIVED in selected.flavors:
|
||||||
|
selected = selected.merge(meta=new.meta)
|
||||||
|
|
||||||
|
if selected:
|
||||||
|
self.write(selected)
|
||||||
|
elif new:
|
||||||
|
self.skip_writer.write(new)
|
||||||
|
elif old:
|
||||||
|
self.skip_writer.write(old)
|
||||||
|
|
||||||
|
return selected
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
"""
|
||||||
|
Runs the updater task.
|
||||||
|
"""
|
||||||
|
retried = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
while skipped < self.skips and retried < self.retries:
|
||||||
|
key = self.state['key']
|
||||||
|
|
||||||
|
self.on_attempt(key, skipped, retried)
|
||||||
|
|
||||||
|
try:
|
||||||
|
story = self.update(key)
|
||||||
|
except Exception as e:
|
||||||
|
retried += 1
|
||||||
|
self.on_failure(key, e)
|
||||||
|
time.sleep(FAILURE_DELAY)
|
||||||
|
else:
|
||||||
|
retried = 0
|
||||||
|
self.state['key'] += 1
|
||||||
|
self.state.save()
|
||||||
|
|
||||||
|
if story:
|
||||||
|
skipped = 0
|
||||||
|
self.on_success(key, story)
|
||||||
|
time.sleep(SUCCESS_DELAY)
|
||||||
|
else:
|
||||||
|
skipped += 1
|
||||||
|
self.on_skipped(key, story)
|
||||||
|
time.sleep(SKIPPED_DELAY)
|
269
tests/tasks/test_update.py
Normal file
269
tests/tasks/test_update.py
Normal file
|
@ -0,0 +1,269 @@
|
||||||
|
"""
|
||||||
|
Update task tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fimfarchive, preserves stories from Fimfiction.
|
||||||
|
# Copyright (C) 2015 Joakim Soderlund
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, call, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from fimfarchive.exceptions import InvalidStoryError, StorySourceError
|
||||||
|
from fimfarchive.fetchers import Fetcher
|
||||||
|
from fimfarchive.flavors import DataFormat, UpdateStatus
|
||||||
|
from fimfarchive.stories import Story
|
||||||
|
from fimfarchive.tasks.update import (
|
||||||
|
UpdateTask, SUCCESS_DELAY, SKIPPED_DELAY, FAILURE_DELAY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DummyFetcher(Fetcher):
|
||||||
|
"""
|
||||||
|
Fetcher with local instance storage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
"""
|
||||||
|
self.stories = dict()
|
||||||
|
|
||||||
|
def add(self, key, date, flavors=()):
|
||||||
|
"""
|
||||||
|
Adds a story to the fetcher.
|
||||||
|
"""
|
||||||
|
story = Story(
|
||||||
|
key=key,
|
||||||
|
flavors=flavors,
|
||||||
|
data=f'Story {key}',
|
||||||
|
meta={
|
||||||
|
'id': key,
|
||||||
|
'date_modified': date,
|
||||||
|
'chapters': [
|
||||||
|
{'id': key},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.stories[key] = story
|
||||||
|
|
||||||
|
return story
|
||||||
|
|
||||||
|
def fetch(self, key):
|
||||||
|
"""
|
||||||
|
Returns a previously stored story.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.stories[key]
|
||||||
|
except KeyError:
|
||||||
|
raise InvalidStoryError()
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateTask:
|
||||||
|
"""
|
||||||
|
Tests update task.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fimfiction(self):
|
||||||
|
"""
|
||||||
|
Returns a `Fetcher` simulating Fimfiction.
|
||||||
|
"""
|
||||||
|
return DummyFetcher()
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fimfarchive(self):
|
||||||
|
"""
|
||||||
|
Returns a `Fetcher` simulating Fimfarchive.
|
||||||
|
"""
|
||||||
|
return DummyFetcher()
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def task(self, fimfarchive, fimfiction, tmpdir):
|
||||||
|
"""
|
||||||
|
Returns an `UpdateTask` instance.
|
||||||
|
"""
|
||||||
|
return UpdateTask(
|
||||||
|
fimfiction=fimfiction,
|
||||||
|
fimfarchive=fimfarchive,
|
||||||
|
workdir=str(tmpdir),
|
||||||
|
retries=2,
|
||||||
|
skips=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
def verify_run(self, task, delays):
|
||||||
|
"""
|
||||||
|
Runs the task and verifies delays.
|
||||||
|
"""
|
||||||
|
calls = [call(delay) for delay in delays]
|
||||||
|
|
||||||
|
with patch('time.sleep') as m:
|
||||||
|
task.run()
|
||||||
|
m.assert_has_calls(calls)
|
||||||
|
|
||||||
|
def verify_fetch(self, task, target, status):
|
||||||
|
"""
|
||||||
|
Runs the task and verifies a regular fetch.
|
||||||
|
"""
|
||||||
|
task.write = MagicMock(side_effect=lambda story: story)
|
||||||
|
|
||||||
|
delays = (
|
||||||
|
SKIPPED_DELAY,
|
||||||
|
SUCCESS_DELAY,
|
||||||
|
SKIPPED_DELAY,
|
||||||
|
SKIPPED_DELAY,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.verify_run(task, delays)
|
||||||
|
task.write.assert_called_once_with(target)
|
||||||
|
assert status in target.flavors
|
||||||
|
assert task.state['key'] == 4
|
||||||
|
|
||||||
|
def verify_empty(self, task, fetcher):
|
||||||
|
"""
|
||||||
|
Runs the task and verifies an empty fetch.
|
||||||
|
"""
|
||||||
|
task.skip_writer.write = MagicMock()
|
||||||
|
|
||||||
|
target = fetcher.add(key=1, date=1)
|
||||||
|
target.meta['chapters'].clear()
|
||||||
|
|
||||||
|
delays = (
|
||||||
|
SKIPPED_DELAY,
|
||||||
|
SKIPPED_DELAY,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.verify_run(task, delays)
|
||||||
|
task.skip_writer.write.assert_called_once_with(target)
|
||||||
|
|
||||||
|
def verify_failure(self, task, fetcher):
|
||||||
|
"""
|
||||||
|
Runs the task and verifies a failed fetch.
|
||||||
|
"""
|
||||||
|
task.write = MagicMock(side_effect=lambda story: story)
|
||||||
|
fetcher.fetch = MagicMock(side_effect=StorySourceError)
|
||||||
|
|
||||||
|
delays = (
|
||||||
|
FAILURE_DELAY,
|
||||||
|
FAILURE_DELAY,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.verify_run(task, delays)
|
||||||
|
task.write.assert_not_called()
|
||||||
|
|
||||||
|
def test_created_story(self, task, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests updating for a created story.
|
||||||
|
"""
|
||||||
|
target = fimfiction.add(key=1, date=1)
|
||||||
|
|
||||||
|
self.verify_fetch(task, target, UpdateStatus.CREATED)
|
||||||
|
|
||||||
|
def test_revived_story(self, task, fimfarchive, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests updating for a revived story.
|
||||||
|
"""
|
||||||
|
target = fimfarchive.add(key=1, date=1)
|
||||||
|
other = fimfiction.add(key=1, date=1)
|
||||||
|
|
||||||
|
target.merge = MagicMock(return_value=target)
|
||||||
|
self.verify_fetch(task, target, UpdateStatus.REVIVED)
|
||||||
|
target.merge.assert_called_once_with(meta=other.meta)
|
||||||
|
|
||||||
|
def test_updated_story(self, task, fimfarchive, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests updating for an updated story.
|
||||||
|
"""
|
||||||
|
fimfarchive.add(key=1, date=0)
|
||||||
|
target = fimfiction.add(key=1, date=1)
|
||||||
|
|
||||||
|
self.verify_fetch(task, target, UpdateStatus.UPDATED)
|
||||||
|
|
||||||
|
def test_deleted_story(self, task, fimfarchive):
|
||||||
|
"""
|
||||||
|
Test updating for a deleted story.
|
||||||
|
"""
|
||||||
|
target = fimfarchive.add(key=1, date=1)
|
||||||
|
|
||||||
|
self.verify_fetch(task, target, UpdateStatus.DELETED)
|
||||||
|
|
||||||
|
def test_cleared_story(self, task, fimfarchive, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests updating for a cleared story.
|
||||||
|
"""
|
||||||
|
target = fimfarchive.add(key=1, date=0)
|
||||||
|
other = fimfiction.add(key=1, date=1)
|
||||||
|
other.meta['chapters'].clear()
|
||||||
|
|
||||||
|
self.verify_fetch(task, target, UpdateStatus.DELETED)
|
||||||
|
|
||||||
|
def test_empty_fimfiction_story(self, task, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests updating for an empty story from fimfiction.
|
||||||
|
"""
|
||||||
|
self.verify_empty(task, fimfiction)
|
||||||
|
|
||||||
|
def test_empty_fimfarchive_story(self, task, fimfarchive):
|
||||||
|
"""
|
||||||
|
Tests updating for an empty story from fimfarchive.
|
||||||
|
"""
|
||||||
|
self.verify_empty(task, fimfarchive)
|
||||||
|
|
||||||
|
def test_fimfarchive_failure(self, task, fimfarchive):
|
||||||
|
"""
|
||||||
|
Tests handling of a failure in Fimfarchive.
|
||||||
|
"""
|
||||||
|
self.verify_failure(task, fimfarchive)
|
||||||
|
|
||||||
|
def test_fimfiction_failure(self, task, fimfiction):
|
||||||
|
"""
|
||||||
|
Tests handling of a failure in Fimfiction.
|
||||||
|
"""
|
||||||
|
self.verify_failure(task, fimfiction)
|
||||||
|
|
||||||
|
def test_write_epub(self, task, story):
|
||||||
|
"""
|
||||||
|
Tests writing of a story in EPUB format.
|
||||||
|
"""
|
||||||
|
story = story.merge(flavors=[DataFormat.EPUB])
|
||||||
|
task.epub_writer.write = MagicMock()
|
||||||
|
|
||||||
|
task.write(story)
|
||||||
|
task.epub_writer.write.assert_called_once_with(story)
|
||||||
|
|
||||||
|
def test_write_html(self, task, story):
|
||||||
|
"""
|
||||||
|
Tests writing of a story in HTML format.
|
||||||
|
"""
|
||||||
|
story = story.merge(flavors=[DataFormat.HTML])
|
||||||
|
task.html_writer.write = MagicMock()
|
||||||
|
|
||||||
|
task.write(story)
|
||||||
|
task.html_writer.write.assert_called_once_with(story)
|
||||||
|
|
||||||
|
def test_write_unsupported(self, task, story):
|
||||||
|
"""
|
||||||
|
Tests `ValueError` is raised for unknown data formats.
|
||||||
|
"""
|
||||||
|
story = story.merge(flavors=[DataFormat.FPUB])
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
task.write(story)
|
Loading…
Reference in a new issue