Add fetcher for Fimfarchive

This commit is contained in:
Joakim Soderlund 2015-10-10 00:58:16 +02:00
parent 279168bdba
commit 70718593fd

View file

@ -22,7 +22,12 @@ Fetchers for Fimfarchive.
# #
from copy import deepcopy
import gc
from io import BytesIO
import json
import requests import requests
from zipfile import ZipFile, BadZipFile
from fimfarchive.exceptions import InvalidStoryError, StorySourceError from fimfarchive.exceptions import InvalidStoryError, StorySourceError
@ -171,3 +176,125 @@ class FimfictionFetcher(Fetcher):
raise StorySourceError("Server did not return a story object.") raise StorySourceError("Server did not return a story object.")
return meta['story'] return meta['story']
class FimfarchiveFetcher(Fetcher):
"""
Fetcher for Fimfarchive.
"""
def __init__(self, file):
"""
Initializes a `FimfarchiveFetcher` instance.
Args:
file: Path or file-like object for a Fimfarchive release.
Raises:
StorySourceError: If no valid Fimfarchive release can be loaded.
"""
self.is_open = False
self.archive = None
self.index = None
try:
self._init(file)
except:
self.close()
raise
else:
self.is_open = True
def _init(self, file):
"""
Internal initialization method.
"""
try:
self.archive = ZipFile(file)
except IOError as e:
raise StorySourceError("Could not read from file.") from e
except BadZipFile as e:
raise StorySourceError("Archive is not a valid ZIP-file.") from e
try:
byte_index = self.archive.read('index.json')
except KeyError as e:
raise StorySourceError("Archive is missing the index.") from e
except BadZipFile as e:
raise StorySourceError("Archive is corrupt.") from e
try:
text_index = byte_index.decode()
except UnicodeDecodeError as e:
raise StorySourceError("Index is incorrectly encoded.") from e
del byte_index
gc.collect()
try:
self.index = json.loads(text_index)
except ValueError as e:
raise StorySourceError("Index is not valid JSON.") from e
del text_index
gc.collect()
def close(self):
self.is_open = False
self.index = None
if self.archive is not None:
self.archive.close()
self.archive = None
gc.collect()
def lookup(self, pk):
"""
Finds meta for a story in the index.
Args:
pk: Primary key of the story.
Returns:
dict: A reference to the story's meta.
Raises:
InvalidStoryError: If story does not exist.
StorySourceError: If archive is closed.
"""
if not self.is_open:
raise StorySourceError("Fetcher is closed.")
pk = str(pk)
if pk not in self.index:
raise InvalidStoryError("Story does not exist.")
return self.index[pk]
def fetch_data(self, pk):
meta = self.lookup(pk)
if 'path' not in meta:
raise StorySourceError("Index is missing a path value.")
try:
data = self.archive.read(meta['path'])
except ValueError as e:
raise StorySourceError("Archive is missing a file.") from e
except BadZipFile as e:
raise StorySourceError("Archive is corrupt.") from e
with ZipFile(BytesIO(data)) as story:
if story.testzip() is not None:
raise StorySourceError("Story is corrupt.")
if 'Chapter1.html' not in story.namelist():
raise InvalidStoryError("Story contains no chapters.")
return data
def fetch_meta(self, pk):
meta = self.lookup(pk)
return deepcopy(meta)