Add fetcher for Fimfiction APIv2

This commit is contained in:
Joakim Soderlund 2018-07-15 20:01:45 +02:00
parent 0ee02b971a
commit aaa309d3a8
6 changed files with 901 additions and 0 deletions

View file

@ -26,6 +26,7 @@ from .base import Fetcher
from .directory import DirectoryFetcher from .directory import DirectoryFetcher
from .fimfarchive import FimfarchiveFetcher from .fimfarchive import FimfarchiveFetcher
from .fimfiction import FimfictionFetcher from .fimfiction import FimfictionFetcher
from .fimfiction2 import Fimfiction2Fetcher
__all__ = ( __all__ = (
@ -33,4 +34,5 @@ __all__ = (
'DirectoryFetcher', 'DirectoryFetcher',
'FimfarchiveFetcher', 'FimfarchiveFetcher',
'FimfictionFetcher', 'FimfictionFetcher',
'Fimfiction2Fetcher',
) )

View file

@ -0,0 +1,738 @@
"""
Fimfiction APIv2 fetcher.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import json
from abc import ABC, abstractmethod
from collections import OrderedDict, defaultdict
from copy import deepcopy
from typing import Any, Dict, Iterator, Optional, Set
from urllib.parse import urlencode
from jsonapi_client import Filter, Session
from jsonapi_client.document import Document
from jsonapi_client.exceptions import DocumentError
from jsonapi_client.resourceobject import ResourceObject
from fimfarchive import __version__ as VERSION
from fimfarchive.flavors import DataFormat, MetaFormat, MetaPurity, StorySource
from fimfarchive.exceptions import (
FimfarchiveError,
InvalidStoryError,
StorySourceError,
)
from .base import Fetcher
__all__ = (
'BetaFormatVerifier',
'Fimfiction2Fetcher',
)
QueryParams = Dict[str, Set[str]]
ROOT = 'root'
AUTHOR = 'author'
CHAPTERS = 'chapters'
PREQUEL = 'prequel'
TAGS = 'tags'
DATA_PARAMS: QueryParams = {
'include': {
'chapters',
},
'fields[chapter]': {
'authors_note_html',
'authors_note_position',
'chapter_number',
'content_html',
'title',
},
'fields[story]': {
'chapters',
},
}
META_PARAMS: QueryParams = {
'include': {
'author',
'chapters',
'tags',
},
'fields[chapter]': {
'chapter_number',
'date_modified',
'date_published',
'num_views',
'num_words',
'published',
'title',
},
'fields[story]': {
'author',
'chapters',
'color',
'completion_status',
'content_rating',
'cover_image',
'date_modified',
'date_published',
'date_updated',
'description_html',
'num_chapters',
'num_comments',
'num_dislikes',
'num_likes',
'num_views',
'num_words',
'prequel',
'published',
'rating',
'short_description',
'status',
'submitted',
'tags',
'title',
'total_num_views',
},
'fields[story_tag]': {
'name',
'type',
},
'fields[user]': {
'avatar',
'bio_html',
'date_joined',
'name',
'num_blog_posts',
'num_followers',
'num_stories',
},
}
class ApiClient:
"""
Performs API requests.
"""
def __init__(self, token: str) -> None:
"""
Constructor.
Args:
token: Fimfiction authorization bearer.
"""
self.session = self.create_session(token)
def create_session(self, token: str) -> Session:
"""
Creates a jsonapi session with authorization.
Args:
token: Fimfiction authorization bearer.
Returns:
A jsonapi session containing the token.
"""
headers = {
'Accept-Encoding': 'gzip, deflate',
'Authorization': f'Bearer {token}',
'User-Agent': f'fimfarchive/{VERSION}',
}
return Session(
server_url='https://www.fimfiction.net/api/v2/',
request_kwargs={'headers': headers},
)
def create_filter(self, params: QueryParams) -> Filter:
"""
Creates a jsonapi filter from query parameters.
Args:
params: Parameters to create a filter for.
Returns:
A jsonapi filter matching the parameters.
"""
joined: Dict[str, str] = OrderedDict()
for key, value in sorted(params.items()):
joined[key] = ','.join(sorted(value))
return Filter(urlencode(joined))
def get(self, path: str, params: QueryParams = dict()) -> Document:
"""
Performs a jsonapi request.
Args:
resource: Path to the resource.
params: Parameters for the request.
Returns:
A jsonapi response document.
"""
query = self.create_filter(params)
return self.session.get(path, query)
class Requester(ABC):
"""
Performs Fimfiction APIv2 requests.
"""
@abstractmethod
def get_meta(self, key: int) -> ResourceObject:
"""
Performs an API request for story meta.
Args:
key: Primary key of the story.
Returns:
A resource object containing story meta.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
@abstractmethod
def get_data(self, key: int) -> Iterator[ResourceObject]:
"""
Performs an API request for story data.
Args:
key: Primary key of the story.
Returns:
Resource objects containing story chapters.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
class SingleRequester(Requester):
"""
Requests stories one by one.
"""
def __init__(self, client: ApiClient) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
"""
self.client = client
def error(self, key: int, status: int) -> FimfarchiveError:
"""
Creates an exception for the status.
Args:
key: Primary key of the story.
status: Status code of the response.
Returns:
A fimfarchive exception instance.
"""
if status == 403:
return InvalidStoryError(f"Private story: {key}")
elif status == 404:
return InvalidStoryError(f"Missing story: {key}")
else:
return StorySourceError(f"Bad HTTP status for {key}: {status}")
def get(self, key: int, path: str, params: QueryParams) -> Document:
"""
Performs a Fimfiction APIv2 request.
Args:
key: Primary key of the story.
path: Resource to query.
params: Query parameters.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
try:
return self.client.get(path, params)
except DocumentError as e:
raise self.error(key, e.response.status_code) from e
except Exception as e:
raise StorySourceError("Unknown error for {key}.") from e
def get_meta(self, key: int) -> ResourceObject:
path = f'stories/{key}'
response = self.get(key, path, META_PARAMS)
return response.resource
def get_data(self, key: int) -> Iterator[ResourceObject]:
path = f'stories/{key}/chapters'
response = self.get(key, path, DATA_PARAMS)
return response.resources
class BulkRequester(Requester):
"""
Requests stories in bulk.
"""
response: Optional[Document]
resources: Dict[int, Optional[ResourceObject]]
def __init__(
self,
client: ApiClient,
bulk_meta: bool = True,
bulk_data: bool = True,
bulk_size: int = 16,
) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
bulk_meta: Toggles bulk fetching of meta.
bulk_data: Toggles bulk fetching of data.
bulk_size: Number of items to request per batch.
"""
self.client = client
self.bulk_meta = bulk_meta
self.bulk_data = bulk_data
self.bulk_size = bulk_size
def __setattr__(self, name: str, value: Any) -> None:
"""
Resets the requester when necessary.
"""
try:
super().__setattr__(name, value)
finally:
if name in ('bulk_meta', 'bulk_data'):
self.reset()
def reset(self) -> None:
"""
Drops the currently cached story batch.
"""
self.response = None
self.resources = dict()
def create_params(self) -> QueryParams:
"""
Creates general query parameters for a request.
"""
params: QueryParams = defaultdict(set)
if self.bulk_meta:
for key, value in META_PARAMS.items():
params[key].update(value)
if self.bulk_data:
for key, value in DATA_PARAMS.items():
params[key].update(value)
return dict(params)
def cache(self, key: int) -> None:
"""
Caches a story batch from Fimfiction.
Args:
key: Primary key of the story.
"""
count = int(self.bulk_size)
lower = key - (key % count)
upper = lower + count
keys = range(lower, upper)
params = self.create_params()
params['page[size]'] = {str(len(keys) + 4)}
params['filter[ids]'] = {str(i) for i in keys}
self.response = self.client.get('stories', params)
self.resources = {key: None for key in keys}
for resource in self.response.resources:
self.resources[int(resource.id)] = resource
def fetch(self, key: int) -> ResourceObject:
"""
Fetches a resource from either cache or Fimfiction.
Args:
key: Primary key of the story.
Returns:
A resource object containing the story.
Raises:
InvalidStoryError: If a valid story is not found.
StorySourceError: If source does not return valid data.
"""
if key not in self.resources:
try:
self.cache(key)
except Exception as e:
self.reset()
raise StorySourceError("Unable to fetch.") from e
resource = self.resources[key]
if resource is None:
raise InvalidStoryError("Invalid story ID.")
return resource
def get_meta(self, key: int) -> ResourceObject:
if not self.bulk_meta:
raise StorySourceError("Bulk meta not enabled.")
return self.fetch(key)
def get_data(self, key: int) -> Iterator[ResourceObject]:
if not self.bulk_data:
raise StorySourceError("Bulk data not enabled.")
return self.fetch(key).chapters
class RoutedRequester(Requester):
"""
Routes between single and bulk requesters.
"""
def __init__(
self,
client: ApiClient,
bulk_meta: bool,
bulk_data: bool,
) -> None:
"""
Constructor.
Args:
client: Client to use for queries.
bulk_meta: Toggles bulk fetching of meta.
bulk_data: Toggles bulk fetching of data.
"""
self.single = SingleRequester(client)
self.bulk = BulkRequester(client, bulk_meta, bulk_data)
def get_meta(self, key: int) -> ResourceObject:
if self.bulk.bulk_meta:
return self.bulk.get_meta(key)
else:
return self.single.get_meta(key)
def get_data(self, key: int) -> Iterator[ResourceObject]:
if self.bulk.bulk_data:
return self.bulk.get_data(key)
else:
return self.single.get_data(key)
class Documentifier:
"""
Converts a resource into a dictionary.
"""
def merge(self, target: Dict, source: Dict) -> None:
"""
Copies items from source into target.
Args:
target: Dictionary to copy to.
source: Dictionary to copy from.
"""
for key, value in deepcopy(source).items():
assert key not in target
target[key] = value
def flatten(self, resource: ResourceObject) -> Dict[str, Any]:
"""
Flattens the resource into a dictionary.
Args:
resource: Resource to flatten.
Returns:
A dictionary representation.
"""
document: Dict[str, Any] = {
'id': int(resource.id),
}
self.merge(document, resource.json['attributes'])
self.merge(document, resource.meta.meta)
return document
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
"""
Applies the documentifier.
Args:
resource: Resource to documentify.
Returns:
A dictionary representation.
"""
return self.flatten(resource)
class MetaDocumentifier(Documentifier):
"""
Converts a resource into a story meta dictionary.
"""
fill = (
'cover_image',
'date_published',
)
remove = (
'content_html',
'authors_note_html',
'authors_note_position',
)
def fill_keys(self, meta: Dict[str, Any]) -> None:
"""
Fills keys that may be left out by Fimfiction.
Args:
meta: Dictionary to fill.
"""
for key in self.fill:
if key not in meta:
meta[key] = None
def remove_data(self, meta: Dict[str, Any]) -> None:
"""
Removes keys that may be left in by the bulk fetcher.
Args:
meta: Dictionary to clean.
"""
for chapter in meta['chapters']:
for key in self.remove:
if key in chapter:
del chapter[key]
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
meta = self.flatten(resource)
assert AUTHOR not in meta
meta[AUTHOR] = self.flatten(resource.author)
assert CHAPTERS not in meta
chapters = [self.flatten(chapter) for chapter in resource.chapters]
chapters.sort(key=lambda chapter: chapter['chapter_number'])
meta[CHAPTERS] = chapters
assert PREQUEL not in meta
prequel = getattr(resource.relationships, PREQUEL, None)
if prequel:
value = prequel._resource_identifier.id
meta[PREQUEL] = int(value)
else:
meta[PREQUEL] = None
assert TAGS not in meta
tags = [self.flatten(tag) for tag in resource.tags]
tags.sort(key=lambda tag: (tag['type'], tag['name']))
meta[TAGS] = tags
self.fill_keys(meta)
self.remove_data(meta)
return meta
class BetaFormatVerifier:
"""
Verifies that required keys are present in a dictionary.
"""
def __init__(self, requirements: Dict[str, Set[str]]) -> None:
"""
Constructor.
Args:
requirements: Specifies the required keys.
"""
self.requirements: Dict[str, Set[str]] = requirements
@classmethod
def from_params(
cls,
params: QueryParams,
mapping: Dict[str, str],
) -> 'BetaFormatVerifier':
"""
Constructor, using query parameters.
Args:
params: Query parameters to base the requirements on.
mapping: Mapping from document keys to resource types.
"""
requirements = dict()
for key, resource in mapping.items():
param = f'fields[{resource}]'
fields = deepcopy(params[param])
fields.update(('id', 'url'))
requirements[key] = fields
return cls(requirements)
@classmethod
def from_meta_params(cls) -> 'BetaFormatVerifier':
"""
Constructor, for creating a meta verifier.
"""
return cls.from_params(META_PARAMS, {
ROOT: 'story',
AUTHOR: 'user',
CHAPTERS: 'chapter',
TAGS: 'story_tag',
})
@classmethod
def from_data_params(cls) -> 'BetaFormatVerifier':
"""
Constructor, for creating a chapter verifier.
"""
return cls.from_params(DATA_PARAMS, {
ROOT: 'chapter',
})
def check(self, key: str, required: Set[str], data: Any) -> None:
"""
Checks dictionaries against a set of required keys.
Args:
key: Document key being checked.
required: Set of required keys.
data: Dictionaries to check.
Raises:
StorySourceError: If a dictionary is invalid.
"""
if isinstance(data, dict):
data = (data,)
for obj in data:
if obj.keys() < required:
missing = ", ".join(required - obj.keys())
message = f"Missing from {key}: {missing}"
raise StorySourceError(message)
def __call__(self, data: Dict[str, Any]) -> None:
"""
Applies the verifier to a dictionary.
Args:
data: Dictionary to check.
Raises:
StorySourceError: If a dictionary is invalid.
"""
for key, required in self.requirements.items():
if key == ROOT:
self.check(key, required, data)
else:
self.check(key, required, data[key])
class Fimfiction2Fetcher(Fetcher):
"""
Fetcher for Fimfiction APIv2.
"""
prefetch_meta = True
prefetch_data = False
flavors = frozenset((
StorySource.FIMFICTION,
DataFormat.JSON,
MetaFormat.BETA,
MetaPurity.DIRTY,
))
def __init__(self, token: str, bulk_meta=False, bulk_data=False) -> None:
"""
Constructor.
Args:
token: Authentication token for Fimfiction.
bulk_meta: Toggles bulk fetching of story meta.
bulk_data: Toggles bulk fetching of story data.
"""
client = ApiClient(token)
self.extract_meta = MetaDocumentifier()
self.extract_chapter = Documentifier()
self.verify_meta = BetaFormatVerifier.from_meta_params()
self.verify_chapter = BetaFormatVerifier.from_data_params()
self.requester = RoutedRequester(client, bulk_meta, bulk_data)
def fetch_meta(self, key: int) -> Dict[str, Any]:
resource = self.requester.get_meta(int(key))
meta = self.extract_meta(resource)
self.verify_meta(meta)
return meta
def fetch_data(self, key: int) -> bytes:
resource = self.requester.get_data(int(key))
chapters = [self.extract_chapter(chapter) for chapter in resource]
if not chapters:
raise InvalidStoryError("Missing chapters.")
for chapter in chapters:
self.verify_chapter(chapter)
chapters.sort(key=lambda chapter: chapter['chapter_number'])
data = json.dumps(
chapters,
indent=4,
ensure_ascii=False,
sort_keys=True
)
return data.encode()

View file

@ -3,6 +3,7 @@ bbcode
blinker blinker
boltons boltons
flake8 flake8
git+https://github.com/qvantel/jsonapi-client.git
jmespath jmespath
mypy mypy
pytest pytest

View file

@ -90,6 +90,7 @@ setup(
'blinker', 'blinker',
'boltons', 'boltons',
'jmespath', 'jmespath',
'jsonapi-client',
'requests', 'requests',
'tqdm', 'tqdm',
), ),

View file

@ -0,0 +1,158 @@
"""
Fimfiction APIv2 fetcher tests.
"""
#
# Fimfarchive, preserves stories from Fimfiction.
# Copyright (C) 2015 Joakim Soderlund
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import json
import os
import pytest
from fimfarchive.exceptions import InvalidStoryError
from fimfarchive.fetchers import Fimfiction2Fetcher
VALID_STORY_KEY = 9
AVATAR_STORY_KEY = 5764
COVER_STORY_KEY = 444
PUBLISHED_STORY_KEY = 25739
INVALID_STORY_KEY = 7
EMPTY_STORY_KEY = 199462
HIDDEN_STORY_KEY = 8
PROTECTED_STORY_KEY = 208799
AVATAR_PLACEHOLDER = {
'32': 'https://static.fimfiction.net/images/none_32.png',
'64': 'https://static.fimfiction.net/images/none_64.png',
}
BULK_COMBINATIONS = [
(False, False),
(False, True),
(True, False),
(True, True),
]
class TestFimfiction2Fetcher:
"""
Fimfarchive2Fetcher tests.
"""
@pytest.fixture(params=BULK_COMBINATIONS)
def fetcher(self, request):
"""
Returns a Fimfarchive2Fetcher instance.
"""
bulk_meta, bulk_data = request.param
token = os.environ['FIMFICTION_ACCESS_TOKEN']
fetcher = Fimfiction2Fetcher(token, bulk_meta, bulk_data)
fetcher.prefetch_meta = False
fetcher.prefetch_data = False
yield fetcher
def fetch_valid(self, fetcher, key):
"""
Fetches a valid story.
"""
story = fetcher.fetch(key)
assert story.meta['id'] == key
assert json.loads(story.data.decode())
return story
def fetch_invalid(self, fetcher, key):
"""
Fetches an invalid story.
"""
story = fetcher.fetch(key)
with pytest.raises(InvalidStoryError):
story.meta
with pytest.raises(InvalidStoryError):
story.data
return story
def test_valid(self, fetcher):
"""
Tests fetching a valid story.
"""
self.fetch_valid(fetcher, VALID_STORY_KEY)
def test_valid_missing_cover(self, fetcher):
"""
Tests fetching a valid story without cover.
"""
story = self.fetch_valid(fetcher, COVER_STORY_KEY)
assert story.meta['cover_image'] is None
def test_valid_missing_avatar(self, fetcher):
"""
Tests fetching a valid story without avatar.
"""
story = self.fetch_valid(fetcher, AVATAR_STORY_KEY)
avatar = story.meta['author']['avatar']
assert AVATAR_PLACEHOLDER['32'] == avatar['32']
assert AVATAR_PLACEHOLDER['64'] == avatar['64']
assert AVATAR_PLACEHOLDER['64'] == avatar['96']
def test_valid_missing_published_date(self, fetcher):
"""
Tests fetching a valid story without published date.
"""
story = self.fetch_valid(fetcher, PUBLISHED_STORY_KEY)
assert story.meta['date_published'] is None
def test_empty_story(self, fetcher):
"""
Test fetching a story without chapters.
"""
story = fetcher.fetch(EMPTY_STORY_KEY)
assert story.meta['id'] == EMPTY_STORY_KEY
with pytest.raises(InvalidStoryError):
story.data
def test_invalid_story(self, fetcher):
"""
Tests fetching an invalid story.
"""
self.fetch_invalid(fetcher, INVALID_STORY_KEY)
def test_hidden_story(self, fetcher):
"""
Tests fetching a hidden story.
"""
self.fetch_invalid(fetcher, HIDDEN_STORY_KEY)
def test_protected_story(self, fetcher):
"""
Tests fetching a password-protected story.
"""
self.fetch_invalid(fetcher, PROTECTED_STORY_KEY)

View file

@ -17,6 +17,7 @@ commands =
addopts = addopts =
--ignore tests/fetchers/test_fimfarchive.py --ignore tests/fetchers/test_fimfarchive.py
--ignore tests/fetchers/test_fimfiction.py --ignore tests/fetchers/test_fimfiction.py
--ignore tests/fetchers/test_fimfiction2.py
tests tests
[flake8] [flake8]