From a4c1fe5a2d5efb9d63245d1aa0dc4ef299806226 Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sun, 12 Nov 2017 18:22:17 +0100 Subject: [PATCH] Add alpha to beta converter --- fimfarchive/converters/__init__.py | 2 + fimfarchive/converters/alpha_beta.py | 468 ++++++++++++++++++++++++++ requirements.txt | 1 + tests/converters/test_alpha_beta.json | 164 +++++++++ tests/converters/test_alpha_beta.py | 148 ++++++++ 5 files changed, 783 insertions(+) create mode 100644 fimfarchive/converters/alpha_beta.py create mode 100644 tests/converters/test_alpha_beta.json create mode 100644 tests/converters/test_alpha_beta.py diff --git a/fimfarchive/converters/__init__.py b/fimfarchive/converters/__init__.py index 54b534d..93a6d7d 100644 --- a/fimfarchive/converters/__init__.py +++ b/fimfarchive/converters/__init__.py @@ -23,8 +23,10 @@ Converter module. from .base import Converter +from .alpha_beta import AlphaBetaConverter __all__ = ( 'Converter', + 'AlphaBetaConverter', ) diff --git a/fimfarchive/converters/alpha_beta.py b/fimfarchive/converters/alpha_beta.py new file mode 100644 index 0000000..eb2fa5d --- /dev/null +++ b/fimfarchive/converters/alpha_beta.py @@ -0,0 +1,468 @@ +""" +Alpha to beta converter for story meta. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from copy import deepcopy +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple +from urllib.parse import quote_plus as urlquote + +import arrow +import bbcode +from jmespath import compile as jmes +from jmespath.parser import ParsedResult + +from fimfarchive.flavors import MetaFormat +from fimfarchive.stories import Story + +from .base import Converter + + +__all__ = ( + 'AlphaBetaConverter', +) + + +HOST = 'https://www.fimfiction.net' +EPOCH = arrow.get(0).isoformat() + + +TAGS = { + '2nd Person': { + 'id': 225, + 'name': 'Second Person', + 'old_id': 'g:second_person', + 'type': 'content', + 'url': 'https://www.fimfiction.net/tag/second-person', + }, + 'Adventure': { + 'id': 226, + 'name': 'Adventure', + 'old_id': 'g:adventure', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/adventure', + }, + 'Alternate Universe': { + 'id': 240, + 'name': 'Alternate Universe', + 'old_id': 'g:alternate_universe', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/alternate-universe', + }, + 'Anthro': { + 'id': 227, + 'name': 'Anthro', + 'old_id': 'g:anthro', + 'type': 'content', + 'url': 'https://www.fimfiction.net/tag/anthro', + }, + 'Comedy': { + 'id': 228, + 'name': 'Comedy', + 'old_id': 'g:comedy', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/comedy', + }, + 'Crossover': { + 'id': 229, + 'name': 'Crossover', + 'old_id': 'g:crossover', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/crossover', + }, + 'Dark': { + 'id': 122, + 'name': 'Dark', + 'old_id': 'g:dark', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/dark', + }, + 'Drama': { + 'id': 230, + 'name': 'Drama', + 'old_id': 'g:drama', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/drama', + }, + 'Equestria Girls': { + 'id': 123, + 'name': 'Equestria Girls', + 'old_id': 'g:equestria_girls', + 'type': 'series', + 'url': 'https://www.fimfiction.net/tag/equestria-girls', + }, + 'Horror': { + 'id': 231, + 'name': 'Horror', + 'old_id': 'g:horror', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/horror', + }, + 'Human': { + 'id': 232, + 'name': 'Human', + 'old_id': 'g:human', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/human', + }, + 'Mystery': { + 'id': 233, + 'name': 'Mystery', + 'old_id': 'g:mystery', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/mystery', + }, + 'Random': { + 'id': 234, + 'name': 'Random', + 'old_id': 'g:random', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/random', + }, + 'Romance': { + 'id': 120, + 'name': 'Romance', + 'old_id': 'g:romance', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/romance', + }, + 'Sad': { + 'id': 235, + 'name': 'Sad', + 'old_id': 'g:sad', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/sad', + }, + 'Sci-Fi': { + 'id': 236, + 'name': 'Science Fiction', + 'old_id': 'g:scifi', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/scifi', + }, + 'Slice of Life': { + 'id': 237, + 'name': 'Slice of Life', + 'old_id': 'g:slice_of_life', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/slice-of-life', + }, + 'Thriller': { + 'id': 238, + 'name': 'Thriller', + 'old_id': 'g:thriller', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/thriller', + }, + 'Tragedy': { + 'id': 239, + 'name': 'Tragedy', + 'old_id': 'g:tragedy', + 'type': 'genre', + 'url': 'https://www.fimfiction.net/tag/tragedy', + }, +} + + +class Handler(Iterable[Tuple[str, Any]]): + """ + Maps story meta to another style. + """ + attrs: Iterable[str] = tuple() + static: Dict[str, Any] = dict() + paths: Dict[str, ParsedResult] = dict() + + def __init__(self, meta: Dict[str, Any]) -> None: + """ + Constructor. + + Args: + meta: The story meta to map. + """ + self.meta = meta + + def __getattr__(self, key: str) -> Any: + """ + Returns values from indirect sources. + """ + if key in self.static: + return self.static[key] + + if key in self.paths: + meta = self.meta + path = self.paths[key] + return path.search(meta) + + return self.meta.get(key) + + def __iter__(self) -> Iterator[Tuple[str, Any]]: + """ + Yields all story meta items. + """ + for attr in self.attrs: + value = getattr(self, attr) + yield attr, value + + +class ArchiveHandler(Handler): + """ + Maps an archive meta dict from root. + """ + attrs = ( + 'date_checked', + 'date_created', + 'date_fetched', + 'date_updated', + 'path', + ) + + paths = { + 'date_checked': jmes('archive.date_checked'), + 'date_created': jmes('archive.date_created'), + 'date_fetched': jmes('archive.date_fetched'), + 'date_updated': jmes('archive.date_updated'), + 'path': jmes('archive.path || path'), + } + + +class AuthorHandler(Handler): + """ + Maps an author meta dict. + """ + attrs = ( + 'avatar', + 'bio_html', + 'date_joined', + 'id', + 'name', + 'num_blog_posts', + 'num_followers', + 'num_stories', + 'url', + ) + + @property + def url(self): + uid = int(self.id) + name = urlquote(str(self.name)) + return f'{HOST}/user/{uid}/{name}' + + +class ChapterHandler(Handler): + """ + Maps a chapter meta dict. + """ + attrs = ( + 'chapter_number', + 'date_modified', + 'date_published', + 'id', + 'num_views', + 'num_words', + 'published', + 'title', + 'url', + ) + + static = { + 'published': True, + } + + paths = { + 'url': jmes('link'), + 'num_views': jmes('views'), + 'num_words': jmes('words'), + } + + def __init__(self, meta: Dict[str, Any], index: int) -> None: + """ + Constructor. + + Args: + meta: The chapter meta to map. + index: The current chapter index. + """ + self.meta = meta + self.chapter_number = int(index) + 1 + + @property + def date_modified(self) -> Optional[str]: + timestamp = self.meta.get('date_modified') + + if timestamp is None: + return None + + return arrow.get(timestamp).isoformat() + + +class RootHandler(Handler): + """ + Maps a root meta dict. + """ + attrs = ( + 'archive', + 'author', + 'chapters', + 'color', + 'completion_status', + 'content_rating', + 'cover_image', + 'date_modified', + 'date_published', + 'date_updated', + 'description_html', + 'id', + 'num_chapters', + 'num_comments', + 'num_dislikes', + 'num_likes', + 'num_views', + 'num_words', + 'prequel', + 'published', + 'rating', + 'short_description', + 'status', + 'submitted', + 'tags', + 'title', + 'total_num_views', + 'url', + ) + + static = { + 'date_modified': EPOCH, + 'published': True, + 'status': 'visible', + 'submitted': True, + } + + paths = { + 'num_chapters': jmes('chapter_count'), + 'num_comments': jmes('comments'), + 'num_dislikes': jmes('dislikes'), + 'num_likes': jmes('likes'), + 'num_views': jmes('views'), + 'num_words': jmes('words'), + 'total_num_views': jmes('total_views'), + } + + @property + def archive(self) -> Dict[str, Any]: + handler = ArchiveHandler(self.meta) + return dict(iter(handler)) + + @property + def author(self) -> Dict[str, Any]: + author = self.meta.get('author') or dict() + handler = AuthorHandler(author) + return dict(iter(handler)) + + @property + def chapters(self) -> List[Dict[str, Any]]: + items = enumerate(self.meta.get('chapters') or list()) + handlers = (ChapterHandler(c, i) for i, c in items) + return [dict(iter(handler)) for handler in handlers] + + @property + def completion_status(self) -> Optional[str]: + status = self.meta.get('status') + return status and status.strip().lower() + + @property + def content_rating(self) -> Optional[str]: + rating = self.meta.get('content_rating_text') + return rating and rating.strip().lower() + + @property + def cover_image(self) -> Dict[str, Any]: + image = self.meta.get('image') + + if image is None: + return None + + base = image.rsplit("-", 1)[0] + assert base.startswith('http') + + return { + 'full': f'{base}-full', + 'large': f'{base}-large', + 'medium': f'{base}-medium', + 'thumbnail': f'{base}-tiny', + } + + @property + def date_updated(self) -> Optional[str]: + timestamp = self.meta.get('date_modified') + + if timestamp is None: + return None + + return arrow.get(timestamp).isoformat() + + @property + def description_html(self) -> Optional[str]: + desc = self.meta.get('description') + + if desc is None: + return None + + html = bbcode.render_html(desc) + return f'

{html.strip()}

' + + @property + def rating(self) -> int: + likes = self.num_likes + dislikes = self.num_dislikes + + if None in (likes, dislikes): + return None + + try: + return round(likes / (likes + dislikes) * 100) + except ZeroDivisionError: + return 50 + + @property + def tags(self) -> List[Dict[str, Any]]: + cats = self.meta.get('categories') or dict() + tags = [TAGS[k] for k, v in cats.items() if v] + return deepcopy(tags) + + +class AlphaBetaConverter(Converter): + """ + Converts story meta from alpha to beta format. + """ + + def __call__(self, story: Story) -> Story: + if MetaFormat.ALPHA not in story.flavors: + raise ValueError(f"Missing flavor: {MetaFormat.ALPHA}") + + handler = RootHandler(story.meta) + meta = dict(iter(handler)) + + flavors = set(story.flavors) + flavors.remove(MetaFormat.ALPHA) + flavors.add(MetaFormat.BETA) + + return story.merge(meta=meta, flavors=flavors) diff --git a/requirements.txt b/requirements.txt index 0bc4ef1..175e8de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ arrow +bbcode blinker jmespath requests diff --git a/tests/converters/test_alpha_beta.json b/tests/converters/test_alpha_beta.json new file mode 100644 index 0000000..5bb4fc8 --- /dev/null +++ b/tests/converters/test_alpha_beta.json @@ -0,0 +1,164 @@ +{ + "pairs": [ + { + "alpha": { + "author": { + "id": 18, + "name": "Sethisto" + }, + "categories": { + "2nd Person": false, + "Adventure": false, + "Alternate Universe": false, + "Anthro": false, + "Comedy": false, + "Crossover": false, + "Dark": false, + "Drama": false, + "Equestria Girls": false, + "Horror": false, + "Human": false, + "Mystery": false, + "Random": true, + "Romance": true, + "Sad": false, + "Sci-Fi": false, + "Slice of Life": false, + "Thriller": false, + "Tragedy": false + }, + "chapter_count": 1, + "chapters": [ + { + "date_modified": 1390908352, + "id": 10, + "link": "https://www.fimfiction.net/story/9/1/the-greatest-equine-who-has-ever-lived/chapter-1", + "title": "Chapter 1", + "views": 9943, + "words": 321 + } + ], + "comments": 223, + "content_rating": 0, + "content_rating_text": "Everyone", + "date_modified": 1309035953, + "description": "REDACTED", + "dislikes": 51, + "full_image": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-full", + "id": 9, + "image": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-medium", + "likes": 365, + "short_description": "", + "status": "Incomplete", + "title": "The Greatest Equine Who has Ever Lived!", + "total_views": 9943, + "url": "https://www.fimfiction.net/story/9/the-greatest-equine-who-has-ever-lived", + "views": 9943, + "words": 321 + }, + "beta": { + "author": { + "avatar": { + "128": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-128", + "16": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-16", + "192": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-192", + "256": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-256", + "32": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-32", + "384": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-384", + "48": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-48", + "512": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-512", + "64": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-64", + "96": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-96" + }, + "bio_html": "", + "date_joined": "2011-06-25T16:53:48-04:00", + "id": 18, + "name": "Sethisto", + "num_blog_posts": 0, + "num_followers": 137, + "num_stories": 1, + "url": "https://www.fimfiction.net/user/18/Sethisto" + }, + "chapters": [ + { + "chapter_number": 1, + "date_modified": "2014-01-28T06:25:52-05:00", + "date_published": "2011-07-08T14:04:11-04:00", + "id": 10, + "num_views": 9943, + "num_words": 321, + "published": true, + "title": "Chapter 1", + "url": "https://www.fimfiction.net/story/9/1/the-greatest-equine-who-has-ever-lived/chapter-1" + } + ], + "color": { + "hex": "3e3e7e", + "rgb": [ + 62, + 62, + 126 + ] + }, + "completion_status": "incomplete", + "content_rating": "everyone", + "cover_image": { + "full": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-full", + "large": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-large", + "medium": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-medium", + "thumbnail": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-tiny" + }, + "date_modified": "1969-12-31T19:00:00-05:00", + "date_published": "2011-07-08T14:04:11-04:00", + "date_updated": "2011-06-25T17:05:53-04:00", + "description_html": "

REDACTED

", + "id": 9, + "num_chapters": 1, + "num_comments": 223, + "num_dislikes": 51, + "num_likes": 365, + "num_views": 9943, + "num_words": 321, + "prequel": null, + "published": true, + "rating": 88, + "short_description": "", + "status": "visible", + "submitted": true, + "tags": [ + { + "id": 20, + "name": "Trixie", + "old_id": "c:21", + "type": "character", + "url": "https://www.fimfiction.net/tag/trixie" + }, + { + "id": 6, + "name": "Twilight Sparkle", + "old_id": "c:7", + "type": "character", + "url": "https://www.fimfiction.net/tag/twilight-sparkle" + }, + { + "id": 234, + "name": "Random", + "old_id": "g:random", + "type": "genre", + "url": "https://www.fimfiction.net/tag/random" + }, + { + "id": 120, + "name": "Romance", + "old_id": "g:romance", + "type": "genre", + "url": "https://www.fimfiction.net/tag/romance" + } + ], + "title": "The Greatest Equine Who has Ever Lived!", + "total_num_views": 9943, + "url": "https://www.fimfiction.net/story/9/the-greatest-equine-who-has-ever-lived" + } + } + ] +} \ No newline at end of file diff --git a/tests/converters/test_alpha_beta.py b/tests/converters/test_alpha_beta.py new file mode 100644 index 0000000..f28e204 --- /dev/null +++ b/tests/converters/test_alpha_beta.py @@ -0,0 +1,148 @@ +""" +Alpha to beta converter tests. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2015 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import json +from copy import deepcopy +from typing import Any, Dict + +import arrow +import pytest + +from fimfarchive.converters import AlphaBetaConverter +from fimfarchive.flavors import MetaFormat + + +def to_null(data: Dict[str, Any], *keys: str) -> None: + """ + Nulls the requested keys. + """ + for key in keys: + data[key] = None + + +def to_utc(data: Dict[str, Any], *keys: str) -> None: + """ + Converts the requested keys to UTC time strings. + """ + for key in keys: + value = data.get(key) + + if value is None: + continue + + time = arrow.get(value).to('utc') + data[key] = time.isoformat() + + +@pytest.fixture(scope='module') +def data(): + """ + Returns test data from JSON. + """ + path = f'{__file__[:-3]}.json' + + with open(path, 'rt') as fobj: + return json.load(fobj) + + +class TestAlphaBetaConverter: + """ + AlphaBetaConverter tests. + """ + + @pytest.fixture + def converter(self): + """ + Returns an alpha beta converter instance. + """ + return AlphaBetaConverter() + + @pytest.fixture(params=range(1)) + def pair(self, request, data): + """ + Returns meta test data pairs. + """ + return data['pairs'][request.param] + + @pytest.fixture + def alpha(self, pair): + """ + Returns meta in alpha format. + """ + return deepcopy(pair['alpha']) + + @pytest.fixture + def beta(self, pair): + """ + Returns meta in beta format. + """ + return deepcopy(pair['beta']) + + @pytest.fixture + def expected(self, beta): + """ + Returns the expected meta result. + """ + data = deepcopy(beta) + + data['archive'] = { + 'date_checked': None, + 'date_created': None, + 'date_fetched': None, + 'date_updated': None, + 'path': None, + } + + to_null(data, 'color', 'date_published') + to_utc(data, 'date_modified', 'date_updated') + + to_null(data['author'], *( + 'avatar', + 'bio_html', + 'date_joined', + 'num_blog_posts', + 'num_followers', + 'num_stories', + )) + + for chapter in data['chapters']: + to_null(chapter, 'date_published') + to_utc(chapter, 'date_modified') + + data['tags'] = [ + tag for tag in data['tags'] + if tag['type'] in {'content', 'genre', 'series'} + ] + + return data + + def test_conversion(self, converter, story, expected, alpha): + """ + Tests conversion of story meta from alpha to beta format. + """ + story = story.merge(flavors=[MetaFormat.ALPHA], meta=alpha) + converted = converter(story) + + assert MetaFormat.BETA in converted.flavors + assert expected == converted.meta