mirror of
https://github.com/JockeTF/fimfarchive.git
synced 2024-11-22 05:17:59 +01:00
Add fetcher for Fimfiction APIv2
This commit is contained in:
parent
0ee02b971a
commit
aaa309d3a8
6 changed files with 901 additions and 0 deletions
|
@ -26,6 +26,7 @@ from .base import Fetcher
|
||||||
from .directory import DirectoryFetcher
|
from .directory import DirectoryFetcher
|
||||||
from .fimfarchive import FimfarchiveFetcher
|
from .fimfarchive import FimfarchiveFetcher
|
||||||
from .fimfiction import FimfictionFetcher
|
from .fimfiction import FimfictionFetcher
|
||||||
|
from .fimfiction2 import Fimfiction2Fetcher
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
|
@ -33,4 +34,5 @@ __all__ = (
|
||||||
'DirectoryFetcher',
|
'DirectoryFetcher',
|
||||||
'FimfarchiveFetcher',
|
'FimfarchiveFetcher',
|
||||||
'FimfictionFetcher',
|
'FimfictionFetcher',
|
||||||
|
'Fimfiction2Fetcher',
|
||||||
)
|
)
|
||||||
|
|
738
fimfarchive/fetchers/fimfiction2.py
Normal file
738
fimfarchive/fetchers/fimfiction2.py
Normal file
|
@ -0,0 +1,738 @@
|
||||||
|
"""
|
||||||
|
Fimfiction APIv2 fetcher.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fimfarchive, preserves stories from Fimfiction.
|
||||||
|
# Copyright (C) 2015 Joakim Soderlund
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections import OrderedDict, defaultdict
|
||||||
|
from copy import deepcopy
|
||||||
|
from typing import Any, Dict, Iterator, Optional, Set
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
from jsonapi_client import Filter, Session
|
||||||
|
from jsonapi_client.document import Document
|
||||||
|
from jsonapi_client.exceptions import DocumentError
|
||||||
|
from jsonapi_client.resourceobject import ResourceObject
|
||||||
|
|
||||||
|
from fimfarchive import __version__ as VERSION
|
||||||
|
from fimfarchive.flavors import DataFormat, MetaFormat, MetaPurity, StorySource
|
||||||
|
|
||||||
|
from fimfarchive.exceptions import (
|
||||||
|
FimfarchiveError,
|
||||||
|
InvalidStoryError,
|
||||||
|
StorySourceError,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .base import Fetcher
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'BetaFormatVerifier',
|
||||||
|
'Fimfiction2Fetcher',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
QueryParams = Dict[str, Set[str]]
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = 'root'
|
||||||
|
AUTHOR = 'author'
|
||||||
|
CHAPTERS = 'chapters'
|
||||||
|
PREQUEL = 'prequel'
|
||||||
|
TAGS = 'tags'
|
||||||
|
|
||||||
|
|
||||||
|
DATA_PARAMS: QueryParams = {
|
||||||
|
'include': {
|
||||||
|
'chapters',
|
||||||
|
},
|
||||||
|
'fields[chapter]': {
|
||||||
|
'authors_note_html',
|
||||||
|
'authors_note_position',
|
||||||
|
'chapter_number',
|
||||||
|
'content_html',
|
||||||
|
'title',
|
||||||
|
},
|
||||||
|
'fields[story]': {
|
||||||
|
'chapters',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
META_PARAMS: QueryParams = {
|
||||||
|
'include': {
|
||||||
|
'author',
|
||||||
|
'chapters',
|
||||||
|
'tags',
|
||||||
|
},
|
||||||
|
'fields[chapter]': {
|
||||||
|
'chapter_number',
|
||||||
|
'date_modified',
|
||||||
|
'date_published',
|
||||||
|
'num_views',
|
||||||
|
'num_words',
|
||||||
|
'published',
|
||||||
|
'title',
|
||||||
|
},
|
||||||
|
'fields[story]': {
|
||||||
|
'author',
|
||||||
|
'chapters',
|
||||||
|
'color',
|
||||||
|
'completion_status',
|
||||||
|
'content_rating',
|
||||||
|
'cover_image',
|
||||||
|
'date_modified',
|
||||||
|
'date_published',
|
||||||
|
'date_updated',
|
||||||
|
'description_html',
|
||||||
|
'num_chapters',
|
||||||
|
'num_comments',
|
||||||
|
'num_dislikes',
|
||||||
|
'num_likes',
|
||||||
|
'num_views',
|
||||||
|
'num_words',
|
||||||
|
'prequel',
|
||||||
|
'published',
|
||||||
|
'rating',
|
||||||
|
'short_description',
|
||||||
|
'status',
|
||||||
|
'submitted',
|
||||||
|
'tags',
|
||||||
|
'title',
|
||||||
|
'total_num_views',
|
||||||
|
},
|
||||||
|
'fields[story_tag]': {
|
||||||
|
'name',
|
||||||
|
'type',
|
||||||
|
},
|
||||||
|
'fields[user]': {
|
||||||
|
'avatar',
|
||||||
|
'bio_html',
|
||||||
|
'date_joined',
|
||||||
|
'name',
|
||||||
|
'num_blog_posts',
|
||||||
|
'num_followers',
|
||||||
|
'num_stories',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ApiClient:
|
||||||
|
"""
|
||||||
|
Performs API requests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, token: str) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token: Fimfiction authorization bearer.
|
||||||
|
"""
|
||||||
|
self.session = self.create_session(token)
|
||||||
|
|
||||||
|
def create_session(self, token: str) -> Session:
|
||||||
|
"""
|
||||||
|
Creates a jsonapi session with authorization.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token: Fimfiction authorization bearer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A jsonapi session containing the token.
|
||||||
|
"""
|
||||||
|
headers = {
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Authorization': f'Bearer {token}',
|
||||||
|
'User-Agent': f'fimfarchive/{VERSION}',
|
||||||
|
}
|
||||||
|
|
||||||
|
return Session(
|
||||||
|
server_url='https://www.fimfiction.net/api/v2/',
|
||||||
|
request_kwargs={'headers': headers},
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_filter(self, params: QueryParams) -> Filter:
|
||||||
|
"""
|
||||||
|
Creates a jsonapi filter from query parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
params: Parameters to create a filter for.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A jsonapi filter matching the parameters.
|
||||||
|
"""
|
||||||
|
joined: Dict[str, str] = OrderedDict()
|
||||||
|
|
||||||
|
for key, value in sorted(params.items()):
|
||||||
|
joined[key] = ','.join(sorted(value))
|
||||||
|
|
||||||
|
return Filter(urlencode(joined))
|
||||||
|
|
||||||
|
def get(self, path: str, params: QueryParams = dict()) -> Document:
|
||||||
|
"""
|
||||||
|
Performs a jsonapi request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: Path to the resource.
|
||||||
|
params: Parameters for the request.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A jsonapi response document.
|
||||||
|
"""
|
||||||
|
query = self.create_filter(params)
|
||||||
|
|
||||||
|
return self.session.get(path, query)
|
||||||
|
|
||||||
|
|
||||||
|
class Requester(ABC):
|
||||||
|
"""
|
||||||
|
Performs Fimfiction APIv2 requests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_meta(self, key: int) -> ResourceObject:
|
||||||
|
"""
|
||||||
|
Performs an API request for story meta.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A resource object containing story meta.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
InvalidStoryError: If a valid story is not found.
|
||||||
|
StorySourceError: If source does not return valid data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_data(self, key: int) -> Iterator[ResourceObject]:
|
||||||
|
"""
|
||||||
|
Performs an API request for story data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Resource objects containing story chapters.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
InvalidStoryError: If a valid story is not found.
|
||||||
|
StorySourceError: If source does not return valid data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class SingleRequester(Requester):
|
||||||
|
"""
|
||||||
|
Requests stories one by one.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, client: ApiClient) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: Client to use for queries.
|
||||||
|
"""
|
||||||
|
self.client = client
|
||||||
|
|
||||||
|
def error(self, key: int, status: int) -> FimfarchiveError:
|
||||||
|
"""
|
||||||
|
Creates an exception for the status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
status: Status code of the response.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A fimfarchive exception instance.
|
||||||
|
"""
|
||||||
|
if status == 403:
|
||||||
|
return InvalidStoryError(f"Private story: {key}")
|
||||||
|
elif status == 404:
|
||||||
|
return InvalidStoryError(f"Missing story: {key}")
|
||||||
|
else:
|
||||||
|
return StorySourceError(f"Bad HTTP status for {key}: {status}")
|
||||||
|
|
||||||
|
def get(self, key: int, path: str, params: QueryParams) -> Document:
|
||||||
|
"""
|
||||||
|
Performs a Fimfiction APIv2 request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
path: Resource to query.
|
||||||
|
params: Query parameters.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
InvalidStoryError: If a valid story is not found.
|
||||||
|
StorySourceError: If source does not return valid data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.client.get(path, params)
|
||||||
|
except DocumentError as e:
|
||||||
|
raise self.error(key, e.response.status_code) from e
|
||||||
|
except Exception as e:
|
||||||
|
raise StorySourceError("Unknown error for {key}.") from e
|
||||||
|
|
||||||
|
def get_meta(self, key: int) -> ResourceObject:
|
||||||
|
path = f'stories/{key}'
|
||||||
|
response = self.get(key, path, META_PARAMS)
|
||||||
|
return response.resource
|
||||||
|
|
||||||
|
def get_data(self, key: int) -> Iterator[ResourceObject]:
|
||||||
|
path = f'stories/{key}/chapters'
|
||||||
|
response = self.get(key, path, DATA_PARAMS)
|
||||||
|
return response.resources
|
||||||
|
|
||||||
|
|
||||||
|
class BulkRequester(Requester):
|
||||||
|
"""
|
||||||
|
Requests stories in bulk.
|
||||||
|
"""
|
||||||
|
response: Optional[Document]
|
||||||
|
resources: Dict[int, Optional[ResourceObject]]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
client: ApiClient,
|
||||||
|
bulk_meta: bool = True,
|
||||||
|
bulk_data: bool = True,
|
||||||
|
bulk_size: int = 16,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: Client to use for queries.
|
||||||
|
bulk_meta: Toggles bulk fetching of meta.
|
||||||
|
bulk_data: Toggles bulk fetching of data.
|
||||||
|
bulk_size: Number of items to request per batch.
|
||||||
|
"""
|
||||||
|
self.client = client
|
||||||
|
self.bulk_meta = bulk_meta
|
||||||
|
self.bulk_data = bulk_data
|
||||||
|
self.bulk_size = bulk_size
|
||||||
|
|
||||||
|
def __setattr__(self, name: str, value: Any) -> None:
|
||||||
|
"""
|
||||||
|
Resets the requester when necessary.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
super().__setattr__(name, value)
|
||||||
|
finally:
|
||||||
|
if name in ('bulk_meta', 'bulk_data'):
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
"""
|
||||||
|
Drops the currently cached story batch.
|
||||||
|
"""
|
||||||
|
self.response = None
|
||||||
|
self.resources = dict()
|
||||||
|
|
||||||
|
def create_params(self) -> QueryParams:
|
||||||
|
"""
|
||||||
|
Creates general query parameters for a request.
|
||||||
|
"""
|
||||||
|
params: QueryParams = defaultdict(set)
|
||||||
|
|
||||||
|
if self.bulk_meta:
|
||||||
|
for key, value in META_PARAMS.items():
|
||||||
|
params[key].update(value)
|
||||||
|
|
||||||
|
if self.bulk_data:
|
||||||
|
for key, value in DATA_PARAMS.items():
|
||||||
|
params[key].update(value)
|
||||||
|
|
||||||
|
return dict(params)
|
||||||
|
|
||||||
|
def cache(self, key: int) -> None:
|
||||||
|
"""
|
||||||
|
Caches a story batch from Fimfiction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
"""
|
||||||
|
count = int(self.bulk_size)
|
||||||
|
lower = key - (key % count)
|
||||||
|
upper = lower + count
|
||||||
|
|
||||||
|
keys = range(lower, upper)
|
||||||
|
params = self.create_params()
|
||||||
|
params['page[size]'] = {str(len(keys) + 4)}
|
||||||
|
params['filter[ids]'] = {str(i) for i in keys}
|
||||||
|
|
||||||
|
self.response = self.client.get('stories', params)
|
||||||
|
self.resources = {key: None for key in keys}
|
||||||
|
|
||||||
|
for resource in self.response.resources:
|
||||||
|
self.resources[int(resource.id)] = resource
|
||||||
|
|
||||||
|
def fetch(self, key: int) -> ResourceObject:
|
||||||
|
"""
|
||||||
|
Fetches a resource from either cache or Fimfiction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Primary key of the story.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A resource object containing the story.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
InvalidStoryError: If a valid story is not found.
|
||||||
|
StorySourceError: If source does not return valid data.
|
||||||
|
"""
|
||||||
|
if key not in self.resources:
|
||||||
|
try:
|
||||||
|
self.cache(key)
|
||||||
|
except Exception as e:
|
||||||
|
self.reset()
|
||||||
|
raise StorySourceError("Unable to fetch.") from e
|
||||||
|
|
||||||
|
resource = self.resources[key]
|
||||||
|
|
||||||
|
if resource is None:
|
||||||
|
raise InvalidStoryError("Invalid story ID.")
|
||||||
|
|
||||||
|
return resource
|
||||||
|
|
||||||
|
def get_meta(self, key: int) -> ResourceObject:
|
||||||
|
if not self.bulk_meta:
|
||||||
|
raise StorySourceError("Bulk meta not enabled.")
|
||||||
|
|
||||||
|
return self.fetch(key)
|
||||||
|
|
||||||
|
def get_data(self, key: int) -> Iterator[ResourceObject]:
|
||||||
|
if not self.bulk_data:
|
||||||
|
raise StorySourceError("Bulk data not enabled.")
|
||||||
|
|
||||||
|
return self.fetch(key).chapters
|
||||||
|
|
||||||
|
|
||||||
|
class RoutedRequester(Requester):
|
||||||
|
"""
|
||||||
|
Routes between single and bulk requesters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
client: ApiClient,
|
||||||
|
bulk_meta: bool,
|
||||||
|
bulk_data: bool,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: Client to use for queries.
|
||||||
|
bulk_meta: Toggles bulk fetching of meta.
|
||||||
|
bulk_data: Toggles bulk fetching of data.
|
||||||
|
"""
|
||||||
|
self.single = SingleRequester(client)
|
||||||
|
self.bulk = BulkRequester(client, bulk_meta, bulk_data)
|
||||||
|
|
||||||
|
def get_meta(self, key: int) -> ResourceObject:
|
||||||
|
if self.bulk.bulk_meta:
|
||||||
|
return self.bulk.get_meta(key)
|
||||||
|
else:
|
||||||
|
return self.single.get_meta(key)
|
||||||
|
|
||||||
|
def get_data(self, key: int) -> Iterator[ResourceObject]:
|
||||||
|
if self.bulk.bulk_data:
|
||||||
|
return self.bulk.get_data(key)
|
||||||
|
else:
|
||||||
|
return self.single.get_data(key)
|
||||||
|
|
||||||
|
|
||||||
|
class Documentifier:
|
||||||
|
"""
|
||||||
|
Converts a resource into a dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def merge(self, target: Dict, source: Dict) -> None:
|
||||||
|
"""
|
||||||
|
Copies items from source into target.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: Dictionary to copy to.
|
||||||
|
source: Dictionary to copy from.
|
||||||
|
"""
|
||||||
|
for key, value in deepcopy(source).items():
|
||||||
|
assert key not in target
|
||||||
|
target[key] = value
|
||||||
|
|
||||||
|
def flatten(self, resource: ResourceObject) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Flattens the resource into a dictionary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: Resource to flatten.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary representation.
|
||||||
|
"""
|
||||||
|
document: Dict[str, Any] = {
|
||||||
|
'id': int(resource.id),
|
||||||
|
}
|
||||||
|
|
||||||
|
self.merge(document, resource.json['attributes'])
|
||||||
|
self.merge(document, resource.meta.meta)
|
||||||
|
|
||||||
|
return document
|
||||||
|
|
||||||
|
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Applies the documentifier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: Resource to documentify.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary representation.
|
||||||
|
"""
|
||||||
|
return self.flatten(resource)
|
||||||
|
|
||||||
|
|
||||||
|
class MetaDocumentifier(Documentifier):
|
||||||
|
"""
|
||||||
|
Converts a resource into a story meta dictionary.
|
||||||
|
"""
|
||||||
|
fill = (
|
||||||
|
'cover_image',
|
||||||
|
'date_published',
|
||||||
|
)
|
||||||
|
|
||||||
|
remove = (
|
||||||
|
'content_html',
|
||||||
|
'authors_note_html',
|
||||||
|
'authors_note_position',
|
||||||
|
)
|
||||||
|
|
||||||
|
def fill_keys(self, meta: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Fills keys that may be left out by Fimfiction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
meta: Dictionary to fill.
|
||||||
|
"""
|
||||||
|
for key in self.fill:
|
||||||
|
if key not in meta:
|
||||||
|
meta[key] = None
|
||||||
|
|
||||||
|
def remove_data(self, meta: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Removes keys that may be left in by the bulk fetcher.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
meta: Dictionary to clean.
|
||||||
|
"""
|
||||||
|
for chapter in meta['chapters']:
|
||||||
|
for key in self.remove:
|
||||||
|
if key in chapter:
|
||||||
|
del chapter[key]
|
||||||
|
|
||||||
|
def __call__(self, resource: ResourceObject) -> Dict[str, Any]:
|
||||||
|
meta = self.flatten(resource)
|
||||||
|
|
||||||
|
assert AUTHOR not in meta
|
||||||
|
meta[AUTHOR] = self.flatten(resource.author)
|
||||||
|
|
||||||
|
assert CHAPTERS not in meta
|
||||||
|
chapters = [self.flatten(chapter) for chapter in resource.chapters]
|
||||||
|
chapters.sort(key=lambda chapter: chapter['chapter_number'])
|
||||||
|
meta[CHAPTERS] = chapters
|
||||||
|
|
||||||
|
assert PREQUEL not in meta
|
||||||
|
prequel = getattr(resource.relationships, PREQUEL, None)
|
||||||
|
|
||||||
|
if prequel:
|
||||||
|
value = prequel._resource_identifier.id
|
||||||
|
meta[PREQUEL] = int(value)
|
||||||
|
else:
|
||||||
|
meta[PREQUEL] = None
|
||||||
|
|
||||||
|
assert TAGS not in meta
|
||||||
|
tags = [self.flatten(tag) for tag in resource.tags]
|
||||||
|
tags.sort(key=lambda tag: (tag['type'], tag['name']))
|
||||||
|
meta[TAGS] = tags
|
||||||
|
|
||||||
|
self.fill_keys(meta)
|
||||||
|
self.remove_data(meta)
|
||||||
|
|
||||||
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
class BetaFormatVerifier:
|
||||||
|
"""
|
||||||
|
Verifies that required keys are present in a dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, requirements: Dict[str, Set[str]]) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
requirements: Specifies the required keys.
|
||||||
|
"""
|
||||||
|
self.requirements: Dict[str, Set[str]] = requirements
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_params(
|
||||||
|
cls,
|
||||||
|
params: QueryParams,
|
||||||
|
mapping: Dict[str, str],
|
||||||
|
) -> 'BetaFormatVerifier':
|
||||||
|
"""
|
||||||
|
Constructor, using query parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
params: Query parameters to base the requirements on.
|
||||||
|
mapping: Mapping from document keys to resource types.
|
||||||
|
"""
|
||||||
|
requirements = dict()
|
||||||
|
|
||||||
|
for key, resource in mapping.items():
|
||||||
|
param = f'fields[{resource}]'
|
||||||
|
fields = deepcopy(params[param])
|
||||||
|
fields.update(('id', 'url'))
|
||||||
|
requirements[key] = fields
|
||||||
|
|
||||||
|
return cls(requirements)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_meta_params(cls) -> 'BetaFormatVerifier':
|
||||||
|
"""
|
||||||
|
Constructor, for creating a meta verifier.
|
||||||
|
"""
|
||||||
|
return cls.from_params(META_PARAMS, {
|
||||||
|
ROOT: 'story',
|
||||||
|
AUTHOR: 'user',
|
||||||
|
CHAPTERS: 'chapter',
|
||||||
|
TAGS: 'story_tag',
|
||||||
|
})
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_data_params(cls) -> 'BetaFormatVerifier':
|
||||||
|
"""
|
||||||
|
Constructor, for creating a chapter verifier.
|
||||||
|
"""
|
||||||
|
return cls.from_params(DATA_PARAMS, {
|
||||||
|
ROOT: 'chapter',
|
||||||
|
})
|
||||||
|
|
||||||
|
def check(self, key: str, required: Set[str], data: Any) -> None:
|
||||||
|
"""
|
||||||
|
Checks dictionaries against a set of required keys.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Document key being checked.
|
||||||
|
required: Set of required keys.
|
||||||
|
data: Dictionaries to check.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
StorySourceError: If a dictionary is invalid.
|
||||||
|
"""
|
||||||
|
if isinstance(data, dict):
|
||||||
|
data = (data,)
|
||||||
|
|
||||||
|
for obj in data:
|
||||||
|
if obj.keys() < required:
|
||||||
|
missing = ", ".join(required - obj.keys())
|
||||||
|
message = f"Missing from {key}: {missing}"
|
||||||
|
raise StorySourceError(message)
|
||||||
|
|
||||||
|
def __call__(self, data: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Applies the verifier to a dictionary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: Dictionary to check.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
StorySourceError: If a dictionary is invalid.
|
||||||
|
"""
|
||||||
|
for key, required in self.requirements.items():
|
||||||
|
if key == ROOT:
|
||||||
|
self.check(key, required, data)
|
||||||
|
else:
|
||||||
|
self.check(key, required, data[key])
|
||||||
|
|
||||||
|
|
||||||
|
class Fimfiction2Fetcher(Fetcher):
|
||||||
|
"""
|
||||||
|
Fetcher for Fimfiction APIv2.
|
||||||
|
"""
|
||||||
|
prefetch_meta = True
|
||||||
|
prefetch_data = False
|
||||||
|
|
||||||
|
flavors = frozenset((
|
||||||
|
StorySource.FIMFICTION,
|
||||||
|
DataFormat.JSON,
|
||||||
|
MetaFormat.BETA,
|
||||||
|
MetaPurity.DIRTY,
|
||||||
|
))
|
||||||
|
|
||||||
|
def __init__(self, token: str, bulk_meta=False, bulk_data=False) -> None:
|
||||||
|
"""
|
||||||
|
Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token: Authentication token for Fimfiction.
|
||||||
|
bulk_meta: Toggles bulk fetching of story meta.
|
||||||
|
bulk_data: Toggles bulk fetching of story data.
|
||||||
|
"""
|
||||||
|
client = ApiClient(token)
|
||||||
|
self.extract_meta = MetaDocumentifier()
|
||||||
|
self.extract_chapter = Documentifier()
|
||||||
|
self.verify_meta = BetaFormatVerifier.from_meta_params()
|
||||||
|
self.verify_chapter = BetaFormatVerifier.from_data_params()
|
||||||
|
self.requester = RoutedRequester(client, bulk_meta, bulk_data)
|
||||||
|
|
||||||
|
def fetch_meta(self, key: int) -> Dict[str, Any]:
|
||||||
|
resource = self.requester.get_meta(int(key))
|
||||||
|
meta = self.extract_meta(resource)
|
||||||
|
self.verify_meta(meta)
|
||||||
|
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def fetch_data(self, key: int) -> bytes:
|
||||||
|
resource = self.requester.get_data(int(key))
|
||||||
|
chapters = [self.extract_chapter(chapter) for chapter in resource]
|
||||||
|
|
||||||
|
if not chapters:
|
||||||
|
raise InvalidStoryError("Missing chapters.")
|
||||||
|
|
||||||
|
for chapter in chapters:
|
||||||
|
self.verify_chapter(chapter)
|
||||||
|
|
||||||
|
chapters.sort(key=lambda chapter: chapter['chapter_number'])
|
||||||
|
|
||||||
|
data = json.dumps(
|
||||||
|
chapters,
|
||||||
|
indent=4,
|
||||||
|
ensure_ascii=False,
|
||||||
|
sort_keys=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return data.encode()
|
|
@ -3,6 +3,7 @@ bbcode
|
||||||
blinker
|
blinker
|
||||||
boltons
|
boltons
|
||||||
flake8
|
flake8
|
||||||
|
git+https://github.com/qvantel/jsonapi-client.git
|
||||||
jmespath
|
jmespath
|
||||||
mypy
|
mypy
|
||||||
pytest
|
pytest
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -90,6 +90,7 @@ setup(
|
||||||
'blinker',
|
'blinker',
|
||||||
'boltons',
|
'boltons',
|
||||||
'jmespath',
|
'jmespath',
|
||||||
|
'jsonapi-client',
|
||||||
'requests',
|
'requests',
|
||||||
'tqdm',
|
'tqdm',
|
||||||
),
|
),
|
||||||
|
|
158
tests/fetchers/test_fimfiction2.py
Normal file
158
tests/fetchers/test_fimfiction2.py
Normal file
|
@ -0,0 +1,158 @@
|
||||||
|
"""
|
||||||
|
Fimfiction APIv2 fetcher tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fimfarchive, preserves stories from Fimfiction.
|
||||||
|
# Copyright (C) 2015 Joakim Soderlund
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from fimfarchive.exceptions import InvalidStoryError
|
||||||
|
from fimfarchive.fetchers import Fimfiction2Fetcher
|
||||||
|
|
||||||
|
|
||||||
|
VALID_STORY_KEY = 9
|
||||||
|
AVATAR_STORY_KEY = 5764
|
||||||
|
COVER_STORY_KEY = 444
|
||||||
|
PUBLISHED_STORY_KEY = 25739
|
||||||
|
|
||||||
|
INVALID_STORY_KEY = 7
|
||||||
|
EMPTY_STORY_KEY = 199462
|
||||||
|
HIDDEN_STORY_KEY = 8
|
||||||
|
PROTECTED_STORY_KEY = 208799
|
||||||
|
|
||||||
|
AVATAR_PLACEHOLDER = {
|
||||||
|
'32': 'https://static.fimfiction.net/images/none_32.png',
|
||||||
|
'64': 'https://static.fimfiction.net/images/none_64.png',
|
||||||
|
}
|
||||||
|
|
||||||
|
BULK_COMBINATIONS = [
|
||||||
|
(False, False),
|
||||||
|
(False, True),
|
||||||
|
(True, False),
|
||||||
|
(True, True),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestFimfiction2Fetcher:
|
||||||
|
"""
|
||||||
|
Fimfarchive2Fetcher tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture(params=BULK_COMBINATIONS)
|
||||||
|
def fetcher(self, request):
|
||||||
|
"""
|
||||||
|
Returns a Fimfarchive2Fetcher instance.
|
||||||
|
"""
|
||||||
|
bulk_meta, bulk_data = request.param
|
||||||
|
token = os.environ['FIMFICTION_ACCESS_TOKEN']
|
||||||
|
fetcher = Fimfiction2Fetcher(token, bulk_meta, bulk_data)
|
||||||
|
|
||||||
|
fetcher.prefetch_meta = False
|
||||||
|
fetcher.prefetch_data = False
|
||||||
|
|
||||||
|
yield fetcher
|
||||||
|
|
||||||
|
def fetch_valid(self, fetcher, key):
|
||||||
|
"""
|
||||||
|
Fetches a valid story.
|
||||||
|
"""
|
||||||
|
story = fetcher.fetch(key)
|
||||||
|
|
||||||
|
assert story.meta['id'] == key
|
||||||
|
assert json.loads(story.data.decode())
|
||||||
|
|
||||||
|
return story
|
||||||
|
|
||||||
|
def fetch_invalid(self, fetcher, key):
|
||||||
|
"""
|
||||||
|
Fetches an invalid story.
|
||||||
|
"""
|
||||||
|
story = fetcher.fetch(key)
|
||||||
|
|
||||||
|
with pytest.raises(InvalidStoryError):
|
||||||
|
story.meta
|
||||||
|
|
||||||
|
with pytest.raises(InvalidStoryError):
|
||||||
|
story.data
|
||||||
|
|
||||||
|
return story
|
||||||
|
|
||||||
|
def test_valid(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a valid story.
|
||||||
|
"""
|
||||||
|
self.fetch_valid(fetcher, VALID_STORY_KEY)
|
||||||
|
|
||||||
|
def test_valid_missing_cover(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a valid story without cover.
|
||||||
|
"""
|
||||||
|
story = self.fetch_valid(fetcher, COVER_STORY_KEY)
|
||||||
|
assert story.meta['cover_image'] is None
|
||||||
|
|
||||||
|
def test_valid_missing_avatar(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a valid story without avatar.
|
||||||
|
"""
|
||||||
|
story = self.fetch_valid(fetcher, AVATAR_STORY_KEY)
|
||||||
|
avatar = story.meta['author']['avatar']
|
||||||
|
|
||||||
|
assert AVATAR_PLACEHOLDER['32'] == avatar['32']
|
||||||
|
assert AVATAR_PLACEHOLDER['64'] == avatar['64']
|
||||||
|
assert AVATAR_PLACEHOLDER['64'] == avatar['96']
|
||||||
|
|
||||||
|
def test_valid_missing_published_date(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a valid story without published date.
|
||||||
|
"""
|
||||||
|
story = self.fetch_valid(fetcher, PUBLISHED_STORY_KEY)
|
||||||
|
assert story.meta['date_published'] is None
|
||||||
|
|
||||||
|
def test_empty_story(self, fetcher):
|
||||||
|
"""
|
||||||
|
Test fetching a story without chapters.
|
||||||
|
"""
|
||||||
|
story = fetcher.fetch(EMPTY_STORY_KEY)
|
||||||
|
assert story.meta['id'] == EMPTY_STORY_KEY
|
||||||
|
|
||||||
|
with pytest.raises(InvalidStoryError):
|
||||||
|
story.data
|
||||||
|
|
||||||
|
def test_invalid_story(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching an invalid story.
|
||||||
|
"""
|
||||||
|
self.fetch_invalid(fetcher, INVALID_STORY_KEY)
|
||||||
|
|
||||||
|
def test_hidden_story(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a hidden story.
|
||||||
|
"""
|
||||||
|
self.fetch_invalid(fetcher, HIDDEN_STORY_KEY)
|
||||||
|
|
||||||
|
def test_protected_story(self, fetcher):
|
||||||
|
"""
|
||||||
|
Tests fetching a password-protected story.
|
||||||
|
"""
|
||||||
|
self.fetch_invalid(fetcher, PROTECTED_STORY_KEY)
|
1
tox.ini
1
tox.ini
|
@ -17,6 +17,7 @@ commands =
|
||||||
addopts =
|
addopts =
|
||||||
--ignore tests/fetchers/test_fimfarchive.py
|
--ignore tests/fetchers/test_fimfarchive.py
|
||||||
--ignore tests/fetchers/test_fimfiction.py
|
--ignore tests/fetchers/test_fimfiction.py
|
||||||
|
--ignore tests/fetchers/test_fimfiction2.py
|
||||||
tests
|
tests
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
|
|
Loading…
Reference in a new issue