From eca26dd2c34f7d62dae8ce97bb670602f3a04fef Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sun, 10 Mar 2019 15:11:13 +0100 Subject: [PATCH] Add JSON to FPUB converter --- fimfarchive/converters/__init__.py | 4 +- fimfarchive/converters/json_fpub/__init__.py | 209 ++++++++++++++++ fimfarchive/converters/json_fpub/book.ncx | 28 +++ fimfarchive/converters/json_fpub/book.opf | 30 +++ fimfarchive/converters/json_fpub/chapter.html | 24 ++ .../converters/json_fpub/container.xml | 6 + requirements.txt | 1 + setup.py | 1 + tests/converters/test_json_fpub.json | 151 ++++++++++++ tests/converters/test_json_fpub.py | 232 ++++++++++++++++++ 10 files changed, 685 insertions(+), 1 deletion(-) create mode 100644 fimfarchive/converters/json_fpub/__init__.py create mode 100644 fimfarchive/converters/json_fpub/book.ncx create mode 100644 fimfarchive/converters/json_fpub/book.opf create mode 100644 fimfarchive/converters/json_fpub/chapter.html create mode 100644 fimfarchive/converters/json_fpub/container.xml create mode 100644 tests/converters/test_json_fpub.json create mode 100644 tests/converters/test_json_fpub.py diff --git a/fimfarchive/converters/__init__.py b/fimfarchive/converters/__init__.py index 93a6d7d..a072a63 100644 --- a/fimfarchive/converters/__init__.py +++ b/fimfarchive/converters/__init__.py @@ -5,7 +5,7 @@ Converter module. # # Fimfarchive, preserves stories from Fimfiction. -# Copyright (C) 2015 Joakim Soderlund +# Copyright (C) 2019 Joakim Soderlund # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,9 +24,11 @@ Converter module. from .base import Converter from .alpha_beta import AlphaBetaConverter +from .json_fpub import JsonFpubConverter __all__ = ( 'Converter', 'AlphaBetaConverter', + 'JsonFpubConverter', ) diff --git a/fimfarchive/converters/json_fpub/__init__.py b/fimfarchive/converters/json_fpub/__init__.py new file mode 100644 index 0000000..212ea78 --- /dev/null +++ b/fimfarchive/converters/json_fpub/__init__.py @@ -0,0 +1,209 @@ +""" +JSON to FPUB converter for story data. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2019 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import json +from copy import deepcopy +from io import BytesIO +from typing import Any, Dict, Iterator, Optional, Tuple +from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED + +import arrow +from jinja2 import Environment, PackageLoader + +from fimfarchive.flavors import DataFormat, MetaFormat +from fimfarchive.stories import Story +from fimfarchive.utils import JayWalker + +from fimfarchive.fetchers.fimfiction2 import BetaFormatVerifier + +from ..base import Converter + + +__all__ = ( + 'JsonFpubConverter', +) + + +MIMETYPE = 'application/epub+zip' +PACKAGE = __package__.rsplit('.', 1) + + +class DateNormalizer(JayWalker): + """ + Normalizes timezones of date values to UTC. + """ + + def handle(self, data, key, value) -> None: + if str(key).startswith('date_'): + data[key] = self.normalize(value) + else: + self.walk(value) + + def normalize(self, value: Optional[str]) -> Optional[str]: + """ + Normalizes a single date value. + """ + parsed = arrow.get(value or 0) + + if parsed.timestamp == 0: + return None + + return parsed.to('utc').isoformat() + + +class StoryRenderer: + """ + Renders story data. + """ + + def __init__(self) -> None: + env = Environment( + autoescape=True, + keep_trailing_newline=True, + loader=PackageLoader(*PACKAGE), + ) + + self.container_xml = env.get_template('container.xml') + self.chapter_html = env.get_template('chapter.html') + self.book_opf = env.get_template('book.opf') + self.book_ncx = env.get_template('book.ncx') + + self.date_normalizer = DateNormalizer() + self.verify_meta = BetaFormatVerifier.from_meta_params() + self.verify_data = BetaFormatVerifier.from_data_params() + + def fix_authors_note_position(self, data: Dict[str, Any]) -> None: + """ + Clears author's note position if author's note is missing. + """ + authors_note = data['authors_note_html'] + + if not authors_note or len(authors_note.strip()) < 10: + data['authors_note_position'] = None + + def fix_local_href_attributes(self, data: Dict[str, Any]) -> None: + """ + Replaces local href attributes with global ones. + """ + source = ' href="/' + target = ' href="https://www.fimfiction.net/' + + for key in ('authors_note_html', 'content_html'): + data[key] = data[key].replace(source, target) + + def fix_local_src_attributes(self, data: Dict[str, Any]) -> None: + """ + Replaces local src attributes with global ones. + """ + source = ' src="/' + target = ' src="https://www.fimfiction.net/' + + for key in ('authors_note_html', 'content_html'): + data[key] = data[key].replace(source, target) + + def verify_index(self, expected, actual): + """ + Raises an exception if the index values differ. + """ + if expected != actual: + raise ValueError(f"Expected index {expected}, was {actual}.") + + def iter_chapters(self, story: Story) -> Iterator[Dict[str, Any]]: + """ + Yields chapter meta and data, combined into one. + """ + self.verify_meta(story.meta) + + meta_chapters = story.meta['chapters'] + data_chapters = json.loads(story.data.decode()) + zipped = zip(meta_chapters, data_chapters) + + for index, chapter in enumerate(zipped, 1): + meta, data = chapter + + self.verify_data(data) + self.verify_index(index, meta['chapter_number']) + self.verify_index(index, data['chapter_number']) + + yield {**meta, **data} + + def iter_content(self, story: Story) -> Iterator[Tuple[str, str]]: + """ + Yields EPUB file paths and content for a story. + """ + yield 'META-INF/container.xml', self.container_xml.render() + + for chapter in self.iter_chapters(story): + number = chapter['chapter_number'] + path = f"Chapter{number}.html" + + self.fix_authors_note_position(chapter) + self.fix_local_href_attributes(chapter) + self.fix_local_src_attributes(chapter) + + yield path, self.chapter_html.render(chapter) + + meta = deepcopy(story.meta) + self.date_normalizer.walk(meta) + + yield 'book.opf', self.book_opf.render(meta) + yield 'book.ncx', self.book_ncx.render(meta) + + def __call__(self, story: Story) -> bytes: + """ + Renders the EPUB file contents as bytes. + """ + fobj = BytesIO() + + with ZipFile(fobj, 'w') as archive: + archive.writestr('mimetype', MIMETYPE, ZIP_STORED) + + for path, data in self.iter_content(story): + archive.writestr(path, data, ZIP_DEFLATED) + + return fobj.getvalue() + + +class JsonFpubConverter(Converter): + """ + Converts story data from JSON to FPUB format. + """ + + def __init__(self) -> None: + self.render = StoryRenderer() + + def __call__(self, story: Story) -> Story: + if DataFormat.JSON not in story.flavors: + raise ValueError(f"Missing flavor: {DataFormat.JSON}") + + if MetaFormat.BETA not in story.flavors: + raise ValueError(f"Missing flavor: {MetaFormat.BETA}") + + data = self.render(story) + + flavors = set(story.flavors) + flavors.remove(DataFormat.JSON) + flavors.add(DataFormat.FPUB) + + return story.merge(data=data, flavors=flavors) diff --git a/fimfarchive/converters/json_fpub/book.ncx b/fimfarchive/converters/json_fpub/book.ncx new file mode 100644 index 0000000..af87a6d --- /dev/null +++ b/fimfarchive/converters/json_fpub/book.ncx @@ -0,0 +1,28 @@ + + + + + + + + + + + + {{ title }} + + + {{ author.name }} + + + {%- for chapter in chapters %} + {%- set cnum = chapter.chapter_number %} + + + {{ chapter.title }} + + + + {%- endfor %} + + diff --git a/fimfarchive/converters/json_fpub/book.opf b/fimfarchive/converters/json_fpub/book.opf new file mode 100644 index 0000000..9d531fd --- /dev/null +++ b/fimfarchive/converters/json_fpub/book.opf @@ -0,0 +1,30 @@ + + + + {{ title }} + en + {{ url }} + {{ short_description }} + Fimfiction + http://www.fimfiction.net + {{ author.name }} + {{ date_updated }} + {{ url }} + + + + {%- for chapter in chapters %} + {%- set cnum = chapter.chapter_number %} + + {%- endfor %} + + + {%- for chapter in chapters %} + + {%- endfor %} + + diff --git a/fimfarchive/converters/json_fpub/chapter.html b/fimfarchive/converters/json_fpub/chapter.html new file mode 100644 index 0000000..1133926 --- /dev/null +++ b/fimfarchive/converters/json_fpub/chapter.html @@ -0,0 +1,24 @@ + + + + + + {{ title }} + + + {%- autoescape false %} + {%- if authors_note_position == 'top' %} +

{{ "Author's Note" | e }}

+
{{ authors_note_html }}
+
+ {%- endif %} +

{{ title | e }}

+
{{ content_html }}
+ {%- if authors_note_position == 'bottom' %} +
+

{{ "Author's Note" | e }}

+
{{ authors_note_html }}
+ {%- endif %} + {%- endautoescape %} + + diff --git a/fimfarchive/converters/json_fpub/container.xml b/fimfarchive/converters/json_fpub/container.xml new file mode 100644 index 0000000..247b592 --- /dev/null +++ b/fimfarchive/converters/json_fpub/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/requirements.txt b/requirements.txt index 21a7083..4aa5c99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ blinker flake8 jsonapi-client importlib_resources +jinja2 jmespath mypy pytest diff --git a/setup.py b/setup.py index dac08a9..e508244 100755 --- a/setup.py +++ b/setup.py @@ -89,6 +89,7 @@ setup( 'bbcode', 'blinker', 'importlib_resources', + 'jinja2', 'jmespath', 'jsonapi-client', 'requests', diff --git a/tests/converters/test_json_fpub.json b/tests/converters/test_json_fpub.json new file mode 100644 index 0000000..cf9e96f --- /dev/null +++ b/tests/converters/test_json_fpub.json @@ -0,0 +1,151 @@ +{ + "pairs": [ + { + "fpub": [ + { + "name": "mimetype", + "text": "application/epub+zip" + }, + { + "name": "META-INF/container.xml", + "text": "\n\n \n \n \n\n" + }, + { + "name": "Chapter1.html", + "text": "\n\n\n \n \n Chapter 1\n \n \n

Chapter 1

\n

REDACTED

\n
\n

Author's Note

\n

REDACTED

\n \n\n" + }, + { + "name": "book.opf", + "text": "\n\n \n The Greatest Equine Who has Ever Lived!\n en\n https://www.fimfiction.net/story/9/the-greatest-equine-who-has-ever-lived\n REDACTED\n Fimfiction\n http://www.fimfiction.net\n Sethisto\n 2011-06-25T21:05:53+00:00\n https://www.fimfiction.net/story/9/the-greatest-equine-who-has-ever-lived\n \n \n \n \n \n \n \n \n\n" + }, + { + "name": "book.ncx", + "text": "\n\n\n \n \n \n \n \n \n \n \n The Greatest Equine Who has Ever Lived!\n \n \n Sethisto\n \n \n \n \n Chapter 1\n \n \n \n \n\n" + } + ], + "json": [ + { + "authors_note_html": "

REDACTED

", + "authors_note_position": "bottom", + "chapter_number": 1, + "content_html": "

REDACTED

", + "id": 10, + "title": "Chapter 1", + "url": "https://www.fimfiction.net/story/9/1/the-greatest-equine-who-has-ever-lived/chapter-1" + } + ], + "key": 9, + "meta": { + "author": { + "avatar": { + "128": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-128", + "160": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-160", + "192": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-192", + "256": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-256", + "32": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-32", + "320": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-320", + "384": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-384", + "48": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-48", + "512": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-512", + "64": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-64", + "96": "https://cdn-img.fimfiction.net/user/t74v-1431818459-18-96" + }, + "bio_html": "

REDACTED

", + "date_joined": "2011-06-25T16:53:48-04:00", + "id": 18, + "name": "Sethisto", + "num_blog_posts": 0, + "num_followers": 146, + "num_stories": 1, + "url": "https://www.fimfiction.net/user/18/Sethisto" + }, + "chapters": [ + { + "chapter_number": 1, + "date_modified": "2014-01-28T06:25:52-05:00", + "date_published": "2011-07-08T14:04:11-04:00", + "id": 10, + "num_views": 10549, + "num_words": 321, + "published": true, + "title": "Chapter 1", + "url": "https://www.fimfiction.net/story/9/1/the-greatest-equine-who-has-ever-lived/chapter-1" + } + ], + "color": { + "hex": "3e3e7e", + "rgb": [ + 62, + 62, + 126 + ] + }, + "completion_status": "incomplete", + "content_rating": "everyone", + "cover_image": { + "full": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-full", + "large": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-large", + "medium": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-medium", + "thumbnail": "https://cdn-img.fimfiction.net/story/vr3n-1432418803-9-tiny" + }, + "date_modified": "1970-01-01T00:00:00+00:00", + "date_published": "2011-07-08T18:04:11+00:00", + "date_updated": "2011-06-25T21:05:53+00:00", + "description_html": "

REDACTED

", + "id": 9, + "num_chapters": 1, + "num_comments": 228, + "num_dislikes": 52, + "num_likes": 399, + "num_views": 10549, + "num_words": 321, + "prequel": null, + "published": true, + "rating": 88, + "short_description": "REDACTED", + "status": "visible", + "submitted": true, + "tags": [ + { + "id": 20, + "name": "Trixie", + "old_id": "c:21", + "type": "character", + "url": "https://www.fimfiction.net/tag/trixie" + }, + { + "id": 6, + "name": "Twilight Sparkle", + "old_id": "c:7", + "type": "character", + "url": "https://www.fimfiction.net/tag/twilight-sparkle" + }, + { + "id": 234, + "name": "Random", + "old_id": "g:random", + "type": "genre", + "url": "https://www.fimfiction.net/tag/random" + }, + { + "id": 120, + "name": "Romance", + "old_id": "g:romance", + "type": "genre", + "url": "https://www.fimfiction.net/tag/romance" + }, + { + "id": 4, + "name": "My Little Pony: Friendship is Magic", + "old_id": "", + "type": "series", + "url": "https://www.fimfiction.net/tag/mlp-fim" + } + ], + "title": "The Greatest Equine Who has Ever Lived!", + "total_num_views": 10549, + "url": "https://www.fimfiction.net/story/9/the-greatest-equine-who-has-ever-lived" + } + } + ] +} diff --git a/tests/converters/test_json_fpub.py b/tests/converters/test_json_fpub.py new file mode 100644 index 0000000..86b365c --- /dev/null +++ b/tests/converters/test_json_fpub.py @@ -0,0 +1,232 @@ +""" +JSON to FPUB converter tests. +""" + + +# +# Fimfarchive, preserves stories from Fimfiction. +# Copyright (C) 2019 Joakim Soderlund +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import json +from copy import deepcopy +from io import BytesIO +from typing import Any, Dict, Iterator, List +from zipfile import ZipFile + +import pytest + +from fimfarchive.converters import JsonFpubConverter +from fimfarchive.fetchers import Fimfiction2Fetcher +from fimfarchive.flavors import DataFormat, MetaFormat +from fimfarchive.stories import Story +from fimfarchive.utils import JayWalker + + +@pytest.fixture(scope='module') +def data(): + """ + Returns test data from JSON. + """ + path = f'{__file__[:-3]}.json' + + with open(path, 'rt') as fobj: + return json.load(fobj) + + +class Redactor(JayWalker): + """ + Redacts samples. + """ + + def handle(self, data, key, value) -> None: + if str(key).endswith('_html'): + data[key] = '

REDACTED

' + elif key == 'short_description': + data[key] = "REDACTED" + else: + self.walk(value) + + +class JsonFpubConverterSampler: + """ + Generates sample conversions for tests. + + Samples must be manually inspected for correctness. + """ + + def __init__(self, token: str, *keys: int) -> None: + """ + Constructor. + + Args: + token: Fimfiction APIv2 access token. + keys: Stories to generate samples for. + """ + self.keys = sorted(int(key) for key in keys) + self.fetcher = Fimfiction2Fetcher(token) + self.convert = JsonFpubConverter() + self.redactor = Redactor() + + def sample(self, key: int) -> Dict[str, Any]: + """ + Generates a sample conversion. + """ + story = self.fetcher.fetch(key) + redacted = self.redact(story) + converted = self.convert(redacted) + + return { + 'key': int(key), + 'meta': redacted.meta, + 'json': json.loads(redacted.data.decode()), + 'fpub': self.extract(converted.data), + } + + def redact(self, story: Story) -> Story: + """ + Redacts a story. + """ + meta = deepcopy(story.meta) + data = json.loads(story.data.decode()) + + self.redactor.walk(meta) + self.redactor.walk(data) + + raw_data = json.dumps(data).encode() + + return story.merge(meta=meta, data=raw_data) + + def extract(self, data: bytes) -> List[Dict[str, Any]]: + """ + Lists the contents of a ZIP-file. + """ + output: List[Dict[str, Any]] = [] + zobj = ZipFile(BytesIO(data)) + + for info in zobj.infolist(): + output.append({ + 'name': info.filename, + 'text': zobj.read(info).decode(), + }) + + return output + + def __iter__(self) -> Iterator[Dict[str, Any]]: + """ + Yields all samples. + """ + for key in self.keys: + yield self.sample(key) + + def __str__(self) -> str: + """ + Serializes all samples. + """ + return json.dumps( + obj={'pairs': list(self)}, + ensure_ascii=False, + sort_keys=True, + indent=4, + ) + + +class TestJsonFpubConverter: + """ + JsonFpubConverter tests. + """ + + @pytest.fixture + def converter(self): + """ + Returns a JSON to FPUB converter instance. + """ + return JsonFpubConverter() + + @pytest.fixture(params=range(1)) + def pair(self, request, data): + """ + Returns test data pairs. + """ + return data['pairs'][request.param] + + @pytest.fixture + def json_story(self, pair): + """ + Returns a story in the JSON data format. + """ + return Story( + key=pair['key'], + meta=deepcopy(pair['meta']), + data=json.dumps(pair['json']).encode(), + flavors={MetaFormat.BETA, DataFormat.JSON}, + ) + + @pytest.fixture + def fpub_story(self, pair): + """ + Returns a story in the FPUB data format. + """ + stream = BytesIO() + + with ZipFile(stream, 'w') as zobj: + for info in pair['fpub']: + zobj.writestr(info['name'], info['text']) + + return Story( + key=pair['key'], + meta=deepcopy(pair['meta']), + data=stream.getvalue(), + flavors={MetaFormat.BETA, DataFormat.FPUB}, + ) + + def test_conversion(self, converter, json_story, fpub_story): + """ + Tests conversion of story data from JSON to FPUB format. + """ + converted = converter(json_story) + + exp = ZipFile(BytesIO(fpub_story.data)) + act = ZipFile(BytesIO(converted.data)) + + for einfo, ainfo in zip(exp.infolist(), act.infolist()): + assert einfo.filename == ainfo.filename + assert exp.read(einfo) == act.read(ainfo) + + def test_mimetype(self, converter, json_story): + """ + Tests mimetype is included correctly. + """ + converted = converter(json_story) + + zobj = ZipFile(BytesIO(converted.data)) + info = zobj.infolist()[0] + read = zobj.read(info).decode() + + assert 0 == info.compress_type + assert 'mimetype' == info.filename + assert 'application/epub+zip' == read + + def test_immutablilty(self, converter, json_story): + """ + Tests converter doesn't modify original. + """ + clone = deepcopy(json_story) + converter(json_story) + + for attr in ('key', 'fetcher', 'meta', 'data', 'flavors'): + assert getattr(clone, attr) == getattr(json_story, attr)