From 51f1d9f901dceae6ec1301a8ee35186429510fba Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sat, 21 Oct 2017 13:16:53 +0200 Subject: [PATCH] Split fetchers module into package --- fimfarchive/fetchers.py | 344 ---------------------------- fimfarchive/fetchers/__init__.py | 34 +++ fimfarchive/fetchers/base.py | 127 ++++++++++ fimfarchive/fetchers/fimfarchive.py | 161 +++++++++++++ fimfarchive/fetchers/fimfiction.py | 120 ++++++++++ 5 files changed, 442 insertions(+), 344 deletions(-) delete mode 100644 fimfarchive/fetchers.py create mode 100644 fimfarchive/fetchers/__init__.py create mode 100644 fimfarchive/fetchers/base.py create mode 100644 fimfarchive/fetchers/fimfarchive.py create mode 100644 fimfarchive/fetchers/fimfiction.py diff --git a/fimfarchive/fetchers.py b/fimfarchive/fetchers.py deleted file mode 100644 index ab6ef8d..0000000 --- a/fimfarchive/fetchers.py +++ /dev/null @@ -1,344 +0,0 @@ -""" -Fetchers for Fimfarchive. -""" - - -# -# Fimfarchive, preserves stories from Fimfiction. -# Copyright (C) 2015 Joakim Soderlund -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - - -import codecs -import gc -import json -from copy import deepcopy -from io import BytesIO -from typing import FrozenSet -from zipfile import ZipFile, BadZipFile - -import requests - -from fimfarchive.exceptions import InvalidStoryError, StorySourceError -from fimfarchive.flavors import Flavor, StorySource, DataFormat, MetaPurity -from fimfarchive.stories import Story - - -__all__ = ( - 'Fetcher', - 'FimfictionFetcher', - 'FimfarchiveFetcher', -) - - -StreamReader = codecs.getreader('utf-8') - - -class Fetcher: - """ - Abstract base class for story fetchers. - """ - prefetch_meta = False - prefetch_data = False - - flavors: FrozenSet[Flavor] = frozenset() - - def __enter__(self): - """ - Returns self for use in with statements. - """ - return self - - def __exit__(self, exc_type, exc_value, traceback): - """ - Closes the fetcher on exit in with statements. - """ - self.close() - - def close(self): - """ - Closes file descriptors and frees memory. - """ - pass - - def fetch(self, key, prefetch_meta=None, prefetch_data=None): - """ - Fetches story information. - - Args: - key: Primary key of the story. - prefetch_meta: Force prefetching of meta. - prefetch_data: Force prefetching of data. - - Returns: - Story: A new `Story` object. - - Raises: - InvalidStoryError: If a valid story is not found. - StorySourceError: If source does not return any data. - """ - if prefetch_meta is None: - prefetch_meta = self.prefetch_meta - - if prefetch_meta: - meta = self.fetch_meta(key) - else: - meta = None - - if prefetch_data is None: - prefetch_data = self.prefetch_data - - if prefetch_data: - data = self.fetch_data(key) - else: - data = None - - return Story(key, self, meta, data, self.flavors) - - def fetch_data(self, key): - """ - Fetches story content data. - - Args: - key: Primary key of the story. - - Returns: - bytes: Story content data. - - Raises: - InvalidStoryError: If a valid story is not found. - StorySourceError: If source does not return any data. - """ - raise NotImplementedError() - - def fetch_meta(self, key): - """ - Fetches story meta information. - - Args: - key: Primary key of the story. - - Returns: - dict: Story meta information. - - Raises: - InvalidStoryError: If a valid story is not found. - StorySourceError: If source does not return any data. - """ - raise NotImplementedError() - - -class FimfictionFetcher(Fetcher): - """ - Fetcher for Fimfiction. - """ - prefetch_meta = True - prefetch_data = False - - data_path = 'https://www.fimfiction.net/story/download/{}/html' - meta_path = 'https://www.fimfiction.net/api/story.php?story={}' - - flavors = frozenset(( - StorySource.FIMFICTION, - DataFormat.HTML, - MetaPurity.DIRTY, - )) - - def get(self, url): - """ - Performs an HTTP GET request. - - Args: - url: Target of the HTTP request. - - Returns: - Response: A new `Response` object. - - Raises: - InvalidStoryError: If access to the resource was denied. - StorySourceError: If the request fails for any other reason. - """ - try: - response = requests.get(url, timeout=60) - except OSError as e: - raise StorySourceError("Could not read from server.") from e - - if response.status_code == 403: - raise InvalidStoryError("Access to resource was denied.") - - if not response.ok: - raise StorySourceError( - "Server responded with HTTP {} {}." - .format(response.status_code, response.reason) - ) - - return response - - def fetch_data(self, key): - url = self.data_path.format(key) - response = self.get(url) - data = response.content - - if len(data) == 0: - raise InvalidStoryError("Server returned empty response body.") - - if b'

' not in data: - raise InvalidStoryError("Server did not return any chapters.") - - if not data.endswith(b''): - raise StorySourceError("Server returned incomplete response.") - - return data - - def fetch_meta(self, key): - url = self.meta_path.format(key) - response = self.get(url) - - try: - meta = response.json() - except ValueError as e: - raise StorySourceError("Server did not return valid JSON.") from e - - if 'error' in meta: - message = meta['error'] - - if message == 'Invalid story id': - raise InvalidStoryError("Story does not exist.") - else: - raise StorySourceError(message) - - if 'story' not in meta: - raise StorySourceError("Server did not return a story object.") - - return meta['story'] - - -class FimfarchiveFetcher(Fetcher): - """ - Fetcher for Fimfarchive. - """ - prefetch_meta = True - prefetch_data = True - - flavors = frozenset(( - StorySource.FIMFARCHIVE, - DataFormat.EPUB, - MetaPurity.CLEAN, - )) - - def __init__(self, file): - """ - Initializes a `FimfarchiveFetcher` instance. - - Args: - file: Path or file-like object for a Fimfarchive release. - - Raises: - StorySourceError: If no valid Fimfarchive release can be loaded. - """ - self.is_open = False - self.archive = None - self.index = None - - try: - self._init(file) - except: - self.close() - raise - else: - self.is_open = True - - def _init(self, file): - """ - Internal initialization method. - """ - try: - self.archive = ZipFile(file) - except IOError as e: - raise StorySourceError("Could not read from file.") from e - except BadZipFile as e: - raise StorySourceError("Archive is not a valid ZIP-file.") from e - - try: - with self.archive.open('index.json') as fobj: - self.index = json.load(StreamReader(fobj)) - except KeyError as e: - raise StorySourceError("Archive is missing the index.") from e - except ValueError as e: - raise StorySourceError("Index is not valid JSON.") from e - except UnicodeDecodeError as e: - raise StorySourceError("Index is incorrectly encoded.") from e - except BadZipFile as e: - raise StorySourceError("Archive is corrupt.") from e - - gc.collect() - - def close(self): - self.is_open = False - self.index = None - - if self.archive is not None: - self.archive.close() - self.archive = None - - gc.collect() - - def lookup(self, key): - """ - Finds meta for a story in the index. - - Args: - key: Primary key of the story. - - Returns: - dict: A reference to the story's meta. - - Raises: - InvalidStoryError: If story does not exist. - StorySourceError: If archive is closed. - """ - if not self.is_open: - raise StorySourceError("Fetcher is closed.") - - key = str(key) - - if key not in self.index: - raise InvalidStoryError("Story does not exist.") - - return self.index[key] - - def fetch_data(self, key): - meta = self.lookup(key) - - if 'path' not in meta: - raise StorySourceError("Index is missing a path value.") - - try: - data = self.archive.read(meta['path']) - except ValueError as e: - raise StorySourceError("Archive is missing a file.") from e - except BadZipFile as e: - raise StorySourceError("Archive is corrupt.") from e - - with ZipFile(BytesIO(data)) as story: - if story.testzip() is not None: - raise StorySourceError("Story is corrupt.") - - return data - - def fetch_meta(self, key): - meta = self.lookup(key) - return deepcopy(meta) diff --git a/fimfarchive/fetchers/__init__.py b/fimfarchive/fetchers/__init__.py new file mode 100644 index 0000000..e3b022d --- /dev/null +++ b/fimfarchive/fetchers/__init__.py @@ -0,0 +1,34 @@ +""" +Story fetchers. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from .base import Fetcher +from .fimfarchive import FimfarchiveFetcher +from .fimfiction import FimfictionFetcher + + +__all__ = ( + 'Fetcher', + 'FimfarchiveFetcher', + 'FimfictionFetcher', +) diff --git a/fimfarchive/fetchers/base.py b/fimfarchive/fetchers/base.py new file mode 100644 index 0000000..cfc6b03 --- /dev/null +++ b/fimfarchive/fetchers/base.py @@ -0,0 +1,127 @@ +""" +Base fetcher. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from typing import FrozenSet + +from fimfarchive.flavors import Flavor +from fimfarchive.stories import Story + + +__all__ = ( + 'Fetcher', +) + + +class Fetcher: + """ + Abstract base class for story fetchers. + """ + prefetch_meta = False + prefetch_data = False + + flavors: FrozenSet[Flavor] = frozenset() + + def __enter__(self): + """ + Returns self for use in with statements. + """ + return self + + def __exit__(self, exc_type, exc_value, traceback): + """ + Closes the fetcher on exit in with statements. + """ + self.close() + + def close(self): + """ + Closes file descriptors and frees memory. + """ + pass + + def fetch(self, key, prefetch_meta=None, prefetch_data=None): + """ + Fetches story information. + + Args: + key: Primary key of the story. + prefetch_meta: Force prefetching of meta. + prefetch_data: Force prefetching of data. + + Returns: + Story: A new `Story` object. + + Raises: + InvalidStoryError: If a valid story is not found. + StorySourceError: If source does not return any data. + """ + if prefetch_meta is None: + prefetch_meta = self.prefetch_meta + + if prefetch_meta: + meta = self.fetch_meta(key) + else: + meta = None + + if prefetch_data is None: + prefetch_data = self.prefetch_data + + if prefetch_data: + data = self.fetch_data(key) + else: + data = None + + return Story(key, self, meta, data, self.flavors) + + def fetch_data(self, key): + """ + Fetches story content data. + + Args: + key: Primary key of the story. + + Returns: + bytes: Story content data. + + Raises: + InvalidStoryError: If a valid story is not found. + StorySourceError: If source does not return any data. + """ + raise NotImplementedError() + + def fetch_meta(self, key): + """ + Fetches story meta information. + + Args: + key: Primary key of the story. + + Returns: + dict: Story meta information. + + Raises: + InvalidStoryError: If a valid story is not found. + StorySourceError: If source does not return any data. + """ + raise NotImplementedError() diff --git a/fimfarchive/fetchers/fimfarchive.py b/fimfarchive/fetchers/fimfarchive.py new file mode 100644 index 0000000..929f9b7 --- /dev/null +++ b/fimfarchive/fetchers/fimfarchive.py @@ -0,0 +1,161 @@ +""" +Fimfarchive fetcher. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import codecs +import gc +import json +from copy import deepcopy +from io import BytesIO +from zipfile import ZipFile, BadZipFile + +from fimfarchive.exceptions import InvalidStoryError, StorySourceError +from fimfarchive.flavors import StorySource, DataFormat, MetaPurity + +from .base import Fetcher + + +__all__ = ( + 'FimfarchiveFetcher', +) + + +StreamReader = codecs.getreader('utf-8') + + +class FimfarchiveFetcher(Fetcher): + """ + Fetcher for Fimfarchive. + """ + prefetch_meta = True + prefetch_data = True + + flavors = frozenset(( + StorySource.FIMFARCHIVE, + DataFormat.EPUB, + MetaPurity.CLEAN, + )) + + def __init__(self, file): + """ + Initializes a `FimfarchiveFetcher` instance. + + Args: + file: Path or file-like object for a Fimfarchive release. + + Raises: + StorySourceError: If no valid Fimfarchive release can be loaded. + """ + self.is_open = False + self.archive = None + self.index = None + + try: + self._init(file) + except: + self.close() + raise + else: + self.is_open = True + + def _init(self, file): + """ + Internal initialization method. + """ + try: + self.archive = ZipFile(file) + except IOError as e: + raise StorySourceError("Could not read from file.") from e + except BadZipFile as e: + raise StorySourceError("Archive is not a valid ZIP-file.") from e + + try: + with self.archive.open('index.json') as fobj: + self.index = json.load(StreamReader(fobj)) + except KeyError as e: + raise StorySourceError("Archive is missing the index.") from e + except ValueError as e: + raise StorySourceError("Index is not valid JSON.") from e + except UnicodeDecodeError as e: + raise StorySourceError("Index is incorrectly encoded.") from e + except BadZipFile as e: + raise StorySourceError("Archive is corrupt.") from e + + gc.collect() + + def close(self): + self.is_open = False + self.index = None + + if self.archive is not None: + self.archive.close() + self.archive = None + + gc.collect() + + def lookup(self, key): + """ + Finds meta for a story in the index. + + Args: + key: Primary key of the story. + + Returns: + dict: A reference to the story's meta. + + Raises: + InvalidStoryError: If story does not exist. + StorySourceError: If archive is closed. + """ + if not self.is_open: + raise StorySourceError("Fetcher is closed.") + + key = str(key) + + if key not in self.index: + raise InvalidStoryError("Story does not exist.") + + return self.index[key] + + def fetch_data(self, key): + meta = self.lookup(key) + + if 'path' not in meta: + raise StorySourceError("Index is missing a path value.") + + try: + data = self.archive.read(meta['path']) + except ValueError as e: + raise StorySourceError("Archive is missing a file.") from e + except BadZipFile as e: + raise StorySourceError("Archive is corrupt.") from e + + with ZipFile(BytesIO(data)) as story: + if story.testzip() is not None: + raise StorySourceError("Story is corrupt.") + + return data + + def fetch_meta(self, key): + meta = self.lookup(key) + return deepcopy(meta) diff --git a/fimfarchive/fetchers/fimfiction.py b/fimfarchive/fetchers/fimfiction.py new file mode 100644 index 0000000..591258a --- /dev/null +++ b/fimfarchive/fetchers/fimfiction.py @@ -0,0 +1,120 @@ +""" +Fimfiction fetcher. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import requests + +from fimfarchive.exceptions import InvalidStoryError, StorySourceError +from fimfarchive.flavors import StorySource, DataFormat, MetaPurity + +from .base import Fetcher + + +__all__ = ( + 'FimfictionFetcher', +) + + +class FimfictionFetcher(Fetcher): + """ + Fetcher for Fimfiction. + """ + prefetch_meta = True + prefetch_data = False + + data_path = 'https://www.fimfiction.net/story/download/{}/html' + meta_path = 'https://www.fimfiction.net/api/story.php?story={}' + + flavors = frozenset(( + StorySource.FIMFICTION, + DataFormat.HTML, + MetaPurity.DIRTY, + )) + + def get(self, url): + """ + Performs an HTTP GET request. + + Args: + url: Target of the HTTP request. + + Returns: + Response: A new `Response` object. + + Raises: + InvalidStoryError: If access to the resource was denied. + StorySourceError: If the request fails for any other reason. + """ + try: + response = requests.get(url, timeout=60) + except OSError as e: + raise StorySourceError("Could not read from server.") from e + + if response.status_code == 403: + raise InvalidStoryError("Access to resource was denied.") + + if not response.ok: + raise StorySourceError( + "Server responded with HTTP {} {}." + .format(response.status_code, response.reason) + ) + + return response + + def fetch_data(self, key): + url = self.data_path.format(key) + response = self.get(url) + data = response.content + + if len(data) == 0: + raise InvalidStoryError("Server returned empty response body.") + + if b'

' not in data: + raise InvalidStoryError("Server did not return any chapters.") + + if not data.endswith(b''): + raise StorySourceError("Server returned incomplete response.") + + return data + + def fetch_meta(self, key): + url = self.meta_path.format(key) + response = self.get(url) + + try: + meta = response.json() + except ValueError as e: + raise StorySourceError("Server did not return valid JSON.") from e + + if 'error' in meta: + message = meta['error'] + + if message == 'Invalid story id': + raise InvalidStoryError("Story does not exist.") + else: + raise StorySourceError(message) + + if 'story' not in meta: + raise StorySourceError("Server did not return a story object.") + + return meta['story']