Add fetcher for Fimfiction APIv2

This commit is contained in:
Joakim Soderlund 2018-07-15 20:01:45 +02:00
parent 0ee02b971a
commit aaa309d3a8
6 changed files with 901 additions and 0 deletions

View file

@ -26,6 +26,7 @@ from .base import Fetcher
from .directory import DirectoryFetcher
from .fimfarchive import FimfarchiveFetcher
from .fimfiction import FimfictionFetcher
from .fimfiction2 import Fimfiction2Fetcher
__all__ = (
@ -33,4 +34,5 @@ __all__ = (
'DirectoryFetcher',
'FimfarchiveFetcher',
'FimfictionFetcher',
'Fimfiction2Fetcher',
)

View file

@ -0,0 +1,738 @@
"""
Fimfiction APIv2 fetcher.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import json
from abc import ABC, abstractmethod
from collections import OrderedDict, defaultdict
from copy import deepcopy
from typing import Any, Dict, Iterator, Optional, Set
from urllib.parse import urlencode
from jsonapi_client import Filter, Session
from jsonapi_client.document import Document
from jsonapi_client.exceptions import DocumentError
from jsonapi_client.resourceobject import ResourceObject
from fimfarchive import __version__ as VERSION
from fimfarchive.flavors import DataFormat, MetaFormat, MetaPurity, StorySource
from fimfarchive.exceptions import (
FimfarchiveError,
InvalidStoryError,
StorySourceError,
)
from .base import Fetcher
__all__ = (
'BetaFormatVerifier',
'Fimfiction2Fetcher',
)
QueryParams = Dict[str, Set[str]]
ROOT = 'root'
AUTHOR = 'author'
CHAPTERS = 'chapters'
PREQUEL = 'prequel'
TAGS = 'tags'
DATA_PARAMS: QueryParams = {
'include': {
'chapters',
},
'fields[chapter]': {
'authors_note_html',
'authors_note_position',
'chapter_number',
'content_html',
'title',
},
'fields[story]': {
'chapters',
},
}
META_PARAMS: QueryParams = {
'include': {
'author',
'chapters',
'tags',
},
'fields[chapter]': {
'chapter_number',
'date_modified',
'date_published',
'num_views',
'num_words',
'published',
'title',
},
'fields[story]': {
'author',
'chapters',
'color',
'completion_status',
'content_rating',
'cover_image',
'date_modified',
'date_published',
'date_updated',
'description_html',
'num_chapters',
'num_comments',
'num_dislikes',
'num_likes',
'num_views',
'num_words',
'prequel',
'published',
'rating',
'short_description',
'status',
'submitted',
'tags',
'title',
'total_num_views',
},
'fields[story_tag]': {
'name',
'type',
},
'fields[user]': {
'avatar',
'bio_html',
'date_joined',
'name',
'num_blog_posts',
'num_followers',
'num_stories',
},
}
class ApiClient:
"""
Performs API requests.
"""
def __init__(self, token: str) -> None:
"""
Constructor.
Args:
token: Fimfiction authorization bearer.
"""
self.session = self.create_session(token)
def create_session(self, token: str) -> Session:
"""
Creates a jsonapi session with authorization.
Args:
token: Fimfiction authorization bearer.
Returns:
A jsonapi session containing the token.
"""
headers = {
'Accept-Encoding': 'gzip, deflate',
'Authorization': f'Bearer {token}',
'User-Agent': f'fimfarchive/{VERSION}',
}
return Session(
server_url='https://www.fimfiction.net/api/v2/',
request_kwargs={'headers': headers},
)
def create_filter(self, params: QueryParams) -> Filter:
"""
Creates a jsonapi filter from query parameters.
Args:
params: Parameters to create a filter for.
Returns:
A jsonapi filter matching the parameters.
"""
joined: Dict[str, str] = OrderedDict()
for key, value in sorted(params.items()):
joined[key] = ','.join(sorted(value))
return Filter(urlencode(joined))
def get(self, path: str, params: QueryParams = dict()) -> Document:
"""
Performs a jsonapi request.
Args:
resource: Path to the resource.
params: Parameters for the request.
Returns:
A jsonapi response document.
"""
query = self.create_filter(params)
return self.session.get(path, query)
class Requester(ABC):
"""
Performs Fimfiction APIv2 requests.
"""
@abstractmethod
def get_meta(self, key: int) -> ResourceObject:
"""
Performs an API request for story meta.
Args:
key: Primary key of the story.
Returns:
A resource object containing story meta.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
@abstractmethod
def get_data(self, key: int) -> Iterator[ResourceObject]:
"""
Performs an API request for story data.
Args:
key: Primary key of the story.
Returns:
Resource objects containing story chapters.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
class SingleRequester(Requester):
"""
Requests stories one by one.
"""
def __init__(self, client: ApiClient) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
"""
self.client = client
def error(self, key: int, status: int) -> FimfarchiveError:
"""
Creates an exception for the status.
Args:
key: Primary key of the story.
status: Status code of the response.
Returns:
A fimfarchive exception instance.
"""
if status == 403:
return InvalidStoryError(f"Private story: {key}")
elif status == 404:
return InvalidStoryError(f"Missing story: {key}")
else:
return StorySourceError(f"Bad HTTP status for {key}: {status}")
def get(self, key: int, path: str, params: QueryParams) -> Document:
"""
Performs a Fimfiction APIv2 request.
Args:
key: Primary key of the story.
path: Resource to query.
params: Query parameters.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
try:
return self.client.get(path, params)
except DocumentError as e:
raise self.error(key, e.response.status_code) from e
except Exception as e:
raise StorySourceError("Unknown error for {key}.") from e
def get_meta(self, key: int) -> ResourceObject:
path = f'stories/{key}'
response = self.get(key, path, META_PARAMS)
return response.resource
def get_data(self, key: int) -> Iterator[ResourceObject]:
path = f'stories/{key}/chapters'
response = self.get(key, path, DATA_PARAMS)
return response.resources
class BulkRequester(Requester):
"""
Requests stories in bulk.
"""
response: Optional[Document]
resources: Dict[int, Optional[ResourceObject]]
def __init__(
self,
client: ApiClient,
bulk_meta: bool = True,
bulk_data: bool = True,
bulk_size: int = 16,
) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
bulk_meta: Toggles bulk fetching of meta.
bulk_data: Toggles bulk fetching of data.
bulk_size: Number of items to request per batch.
"""
self.client = client
self.bulk_meta = bulk_meta
self.bulk_data = bulk_data
self.bulk_size = bulk_size
def __setattr__(self, name: str, value: Any) -> None:
"""
Resets the requester when necessary.
"""
try:
super().__setattr__(name, value)
finally:
if name in ('bulk_meta', 'bulk_data'):
self.reset()
def reset(self) -> None:
"""
Drops the currently cached story batch.
"""
self.response = None
self.resources = dict()
def create_params(self) -> QueryParams:
"""
Creates general query parameters for a request.
"""
params: QueryParams = defaultdict(set)
if self.bulk_meta:
for key, value in META_PARAMS.items():
params[key].update(value)
if self.bulk_data:
for key, value in DATA_PARAMS.items():
params[key].update(value)
return dict(params)
def cache(self, key: int) -> None:
"""
Caches a story batch from Fimfiction.
Args:
key: Primary key of the story.
"""
count = int(self.bulk_size)
lower = key - (key % count)
upper = lower + count
keys = range(lower, upper)
params = self.create_params()
params['page[size]'] = {str(len(keys) + 4)}
params['filter[ids]'] = {str(i) for i in keys}
self.response = self.client.get('stories', params)
self.resources = {key: None for key in keys}
for resource in self.response.resources:
self.resources[int(resource.id)] = resource
def fetch(self, key: int) -> ResourceObject:
"""
Fetches a resource from either cache or Fimfiction.
Args:
key: Primary key of the story.
Returns:
A resource object containing the story.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
if key not in self.resources:
try:
self.cache(key)
except Exception as e:
self.reset()
raise StorySourceError("Unable to fetch.") from e
resource = self.resources[key]
if resource is None:
raise InvalidStoryError("Invalid story ID.")
return resource
def get_meta(self, key: int) -> ResourceObject:
if not self.bulk_meta:
raise StorySourceError("Bulk meta not enabled.")
return self.fetch(key)
def get_data(self, key: int) -> Iterator[ResourceObject]:
if not self.bulk_data:
raise StorySourceError("Bulk data not enabled.")
return self.fetch(key).chapters
class RoutedRequester(Requester):
"""
Routes between single and bulk requesters.
"""
def __init__(
self,
client: ApiClient,
bulk_meta: bool,
bulk_data: bool,
) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
bulk_meta: Toggles bulk fetching of meta.
bulk_data: Toggles bulk fetching of data.
"""
self.single = SingleRequester(client)
self.bulk = BulkRequester(client, bulk_meta, bulk_data)
def get_meta(self, key: int) -> ResourceObject:
if self.bulk.bulk_meta:
return self.bulk.get_meta(key)
else:
return self.single.get_meta(key)
def get_data(self, key: int) -> Iterator[ResourceObject]:
if self.bulk.bulk_data:
return self.bulk.get_data(key)
else:
return self.single.get_data(key)
class Documentifier:
"""
Converts a resource into a dictionary.
"""
def merge(self, target: Dict, source: Dict) -> None:
"""
Copies items from source into target.
Args:
target: Dictionary to copy to.
source: Dictionary to copy from.
"""
for key, value in deepcopy(source).items():
assert key not in target
target[key] = value
def flatten(self, resource: ResourceObject) -> Dict[str, Any]:
"""
Flattens the resource into a dictionary.
Args:
resource: Resource to flatten.
Returns:
A dictionary representation.
"""
document: Dict[str, Any] = {
'id': int(resource.id),
}
self.merge(document, resource.json['attributes'])
self.merge(document, resource.meta.meta)
return document
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
"""
Applies the documentifier.
Args:
resource: Resource to documentify.
Returns:
A dictionary representation.
"""
return self.flatten(resource)
class MetaDocumentifier(Documentifier):
"""
Converts a resource into a story meta dictionary.
"""
fill = (
'cover_image',
'date_published',
)
remove = (
'content_html',
'authors_note_html',
'authors_note_position',
)
def fill_keys(self, meta: Dict[str, Any]) -> None:
"""
Fills keys that may be left out by Fimfiction.
Args:
meta: Dictionary to fill.
"""
for key in self.fill:
if key not in meta:
meta[key] = None
def remove_data(self, meta: Dict[str, Any]) -> None:
"""
Removes keys that may be left in by the bulk fetcher.
Args:
meta: Dictionary to clean.
"""
for chapter in meta['chapters']:
for key in self.remove:
if key in chapter:
del chapter[key]
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
meta = self.flatten(resource)
assert AUTHOR not in meta
meta[AUTHOR] = self.flatten(resource.author)
assert CHAPTERS not in meta
chapters = [self.flatten(chapter) for chapter in resource.chapters]
chapters.sort(key=lambda chapter: chapter['chapter_number'])
meta[CHAPTERS] = chapters
assert PREQUEL not in meta
prequel = getattr(resource.relationships, PREQUEL, None)
if prequel:
value = prequel._resource_identifier.id
meta[PREQUEL] = int(value)
else:
meta[PREQUEL] = None
assert TAGS not in meta
tags = [self.flatten(tag) for tag in resource.tags]
tags.sort(key=lambda tag: (tag['type'], tag['name']))
meta[TAGS] = tags
self.fill_keys(meta)
self.remove_data(meta)
return meta
class BetaFormatVerifier:
"""
Verifies that required keys are present in a dictionary.
"""
def __init__(self, requirements: Dict[str, Set[str]]) -> None:
"""
Constructor.
Args:
requirements: Specifies the required keys.
"""
self.requirements: Dict[str, Set[str]] = requirements
@classmethod
def from_params(
cls,
params: QueryParams,
mapping: Dict[str, str],
) -> 'BetaFormatVerifier':
"""
Constructor, using query parameters.
Args:
params: Query parameters to base the requirements on.
mapping: Mapping from document keys to resource types.
"""
requirements = dict()
for key, resource in mapping.items():
param = f'fields[{resource}]'
fields = deepcopy(params[param])
fields.update(('id', 'url'))
requirements[key] = fields
return cls(requirements)
@classmethod
def from_meta_params(cls) -> 'BetaFormatVerifier':
"""
Constructor, for creating a meta verifier.
"""
return cls.from_params(META_PARAMS, {
ROOT: 'story',
AUTHOR: 'user',
CHAPTERS: 'chapter',
TAGS: 'story_tag',
})
@classmethod
def from_data_params(cls) -> 'BetaFormatVerifier':
"""
Constructor, for creating a chapter verifier.
"""
return cls.from_params(DATA_PARAMS, {
ROOT: 'chapter',
})
def check(self, key: str, required: Set[str], data: Any) -> None:
"""
Checks dictionaries against a set of required keys.
Args:
key: Document key being checked.
required: Set of required keys.
data: Dictionaries to check.
Raises:
StorySourceError: If a dictionary is invalid.
"""
if isinstance(data, dict):
data = (data,)
for obj in data:
if obj.keys() < required:
missing = ", ".join(required - obj.keys())
message = f"Missing from {key}: {missing}"
raise StorySourceError(message)
def __call__(self, data: Dict[str, Any]) -> None:
"""
Applies the verifier to a dictionary.
Args:
data: Dictionary to check.
Raises:
StorySourceError: If a dictionary is invalid.
"""
for key, required in self.requirements.items():
if key == ROOT:
self.check(key, required, data)
else:
self.check(key, required, data[key])
class Fimfiction2Fetcher(Fetcher):
"""
Fetcher for Fimfiction APIv2.
"""
prefetch_meta = True
prefetch_data = False
flavors = frozenset((
StorySource.FIMFICTION,
DataFormat.JSON,
MetaFormat.BETA,
MetaPurity.DIRTY,
))
def __init__(self, token: str, bulk_meta=False, bulk_data=False) -> None:
"""
Constructor.
Args:
token: Authentication token for Fimfiction.
bulk_meta: Toggles bulk fetching of story meta.
bulk_data: Toggles bulk fetching of story data.
"""
client = ApiClient(token)
self.extract_meta = MetaDocumentifier()
self.extract_chapter = Documentifier()
self.verify_meta = BetaFormatVerifier.from_meta_params()
self.verify_chapter = BetaFormatVerifier.from_data_params()
self.requester = RoutedRequester(client, bulk_meta, bulk_data)
def fetch_meta(self, key: int) -> Dict[str, Any]:
resource = self.requester.get_meta(int(key))
meta = self.extract_meta(resource)
self.verify_meta(meta)
return meta
def fetch_data(self, key: int) -> bytes:
resource = self.requester.get_data(int(key))
chapters = [self.extract_chapter(chapter) for chapter in resource]
if not chapters:
raise InvalidStoryError("Missing chapters.")
for chapter in chapters:
self.verify_chapter(chapter)
chapters.sort(key=lambda chapter: chapter['chapter_number'])
data = json.dumps(
chapters,
indent=4,
ensure_ascii=False,
sort_keys=True
)
return data.encode()

View file

@ -3,6 +3,7 @@ bbcode
blinker
boltons
flake8
git+https://github.com/qvantel/jsonapi-client.git
jmespath
mypy
pytest

View file

@ -90,6 +90,7 @@ setup(
'blinker',
'boltons',
'jmespath',
'jsonapi-client',
'requests',
'tqdm',
),

View file

@ -0,0 +1,158 @@
"""
Fimfiction APIv2 fetcher tests.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import json
import os
import pytest
from fimfarchive.exceptions import InvalidStoryError
from fimfarchive.fetchers import Fimfiction2Fetcher
VALID_STORY_KEY = 9
AVATAR_STORY_KEY = 5764
COVER_STORY_KEY = 444
PUBLISHED_STORY_KEY = 25739
INVALID_STORY_KEY = 7
EMPTY_STORY_KEY = 199462
HIDDEN_STORY_KEY = 8
PROTECTED_STORY_KEY = 208799
AVATAR_PLACEHOLDER = {
'32': 'https://static.fimfiction.net/images/none_32.png',
'64': 'https://static.fimfiction.net/images/none_64.png',
}
BULK_COMBINATIONS = [
(False, False),
(False, True),
(True, False),
(True, True),
]
class TestFimfiction2Fetcher:
"""
Fimfarchive2Fetcher tests.
"""
@pytest.fixture(params=BULK_COMBINATIONS)
def fetcher(self, request):
"""
Returns a Fimfarchive2Fetcher instance.
"""
bulk_meta, bulk_data = request.param
token = os.environ['FIMFICTION_ACCESS_TOKEN']
fetcher = Fimfiction2Fetcher(token, bulk_meta, bulk_data)
fetcher.prefetch_meta = False
fetcher.prefetch_data = False
yield fetcher
def fetch_valid(self, fetcher, key):
"""
Fetches a valid story.
"""
story = fetcher.fetch(key)
assert story.meta['id'] == key
assert json.loads(story.data.decode())
return story
def fetch_invalid(self, fetcher, key):
"""
Fetches an invalid story.
"""
story = fetcher.fetch(key)
with pytest.raises(InvalidStoryError):
story.meta
with pytest.raises(InvalidStoryError):
story.data
return story
def test_valid(self, fetcher):
"""
Tests fetching a valid story.
"""
self.fetch_valid(fetcher, VALID_STORY_KEY)
def test_valid_missing_cover(self, fetcher):
"""
Tests fetching a valid story without cover.
"""
story = self.fetch_valid(fetcher, COVER_STORY_KEY)
assert story.meta['cover_image'] is None
def test_valid_missing_avatar(self, fetcher):
"""
Tests fetching a valid story without avatar.
"""
story = self.fetch_valid(fetcher, AVATAR_STORY_KEY)
avatar = story.meta['author']['avatar']
assert AVATAR_PLACEHOLDER['32'] == avatar['32']
assert AVATAR_PLACEHOLDER['64'] == avatar['64']
assert AVATAR_PLACEHOLDER['64'] == avatar['96']
def test_valid_missing_published_date(self, fetcher):
"""
Tests fetching a valid story without published date.
"""
story = self.fetch_valid(fetcher, PUBLISHED_STORY_KEY)
assert story.meta['date_published'] is None
def test_empty_story(self, fetcher):
"""
Test fetching a story without chapters.
"""
story = fetcher.fetch(EMPTY_STORY_KEY)
assert story.meta['id'] == EMPTY_STORY_KEY
with pytest.raises(InvalidStoryError):
story.data
def test_invalid_story(self, fetcher):
"""
Tests fetching an invalid story.
"""
self.fetch_invalid(fetcher, INVALID_STORY_KEY)
def test_hidden_story(self, fetcher):
"""
Tests fetching a hidden story.
"""
self.fetch_invalid(fetcher, HIDDEN_STORY_KEY)
def test_protected_story(self, fetcher):
"""
Tests fetching a password-protected story.
"""
self.fetch_invalid(fetcher, PROTECTED_STORY_KEY)

View file

@ -17,6 +17,7 @@ commands =
addopts =
--ignore tests/fetchers/test_fimfarchive.py
--ignore tests/fetchers/test_fimfiction.py
--ignore tests/fetchers/test_fimfiction2.py
tests
[flake8]