From 1a34c8112b0f7c62f4d41cfc61e8366393147e8c Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sat, 8 Jun 2019 17:21:04 +0200 Subject: [PATCH] Add data format mapper --- fimfarchive/mappers.py | 51 +++++++++++++++++-- tests/test_mappers.py | 110 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/fimfarchive/mappers.py b/fimfarchive/mappers.py index 2f8020b..eb4ab5b 100644 --- a/fimfarchive/mappers.py +++ b/fimfarchive/mappers.py @@ -25,8 +25,10 @@ Mappers for Fimfarchive. import string from abc import abstractmethod from html import unescape +from io import BytesIO from pathlib import Path from typing import Dict, Generic, Optional, Set, TypeVar, Union +from zipfile import ZipFile from arrow import api as arrow, Arrow @@ -39,6 +41,8 @@ from fimfarchive.utils import find_flavor __all__ = ( 'Mapper', 'StaticMapper', + 'DataFormatMapper', + 'MetaFormatMapper', 'StoryDateMapper', 'StoryPathMapper', 'StorySlugMapper', @@ -282,14 +286,55 @@ class MetaFormatMapper(Mapper[Optional[MetaFormat]]): def __call__(self, story: Story) -> Optional[MetaFormat]: flavor = find_flavor(story, MetaFormat) - if flavor: + if flavor is not None: return flavor items = self.spec.items() meta = set(story.meta.keys()) - matches = {fmt for fmt, spec in items if spec & meta} + matches = [fmt for fmt, spec in items if spec & meta] if len(matches) == 1: - return next(iter(matches)) + return matches[0] + else: + return None + + +class DataFormatMapper(Mapper[Optional[DataFormat]]): + """ + Guesses the data format of stories. + """ + spec: Dict[DataFormat, Set[str]] = { + DataFormat.EPUB: {'content.opf', 'mimetype', 'toc.ncx'}, + DataFormat.FPUB: {'book.ncx', 'book.opf', 'mimetype'}, + } + + zip_magic: Set[bytes] = { + b'PK\x03\x04', + b'PK\x05\x06', + b'PK\x07\x08', + } + + def __call__(self, story: Story) -> Optional[DataFormat]: + flavor = find_flavor(story, DataFormat) + + if flavor is not None: + return flavor + + data = story.data.rstrip() + + if data and data[0] == 123 and data[-1] == 125: + return DataFormat.JSON + + if data[:4] not in self.zip_magic: + return None + + with ZipFile(BytesIO(data)) as zobj: + names = set(zobj.namelist()) + + items = self.spec.items() + matches = [fmt for fmt, spec in items if spec <= names] + + if len(matches) == 1: + return matches[0] else: return None diff --git a/tests/test_mappers.py b/tests/test_mappers.py index d8180f0..2a1caa9 100644 --- a/tests/test_mappers.py +++ b/tests/test_mappers.py @@ -23,17 +23,19 @@ Mapper tests. import os +from io import BytesIO from pathlib import Path from typing import Any, Dict from unittest.mock import patch, MagicMock, PropertyMock +from zipfile import ZipFile import pytest from fimfarchive.exceptions import InvalidStoryError from fimfarchive.flavors import DataFormat, MetaFormat from fimfarchive.mappers import ( - MetaFormatMapper, StaticMapper, StoryDateMapper, - StoryPathMapper, StorySlugMapper + DataFormatMapper, MetaFormatMapper, StaticMapper, + StoryDateMapper, StoryPathMapper, StorySlugMapper, ) from fimfarchive.stories import Story @@ -460,3 +462,107 @@ class TestMetaFormatMapper: story = self.merge(story, beta, 'misc') assert mapper(story) is MetaFormat.ALPHA + + +class TestDataFormatMapper: + """ + DataFormatMapper tests. + """ + + @pytest.fixture + def mapper(self): + """ + Returns a data format mapper instance. + """ + return DataFormatMapper() + + def zip(self, names) -> bytes: + """ + Returns a populated ZIP-file as bytes. + """ + data = BytesIO() + + with ZipFile(data, 'w') as zobj: + for name in names: + zobj.writestr(name, name) + + return data.getvalue() + + @pytest.mark.parametrize('data', [ + b'{}', + b'{"id": 42}', + b'{"id": 42}\n', + ]) + def test_json_mapping(self, mapper, story, data): + """ + Tests detection of JSON data format. + """ + story = story.merge(data=data, flavors=[]) + + assert DataFormat.JSON is mapper(story) + + @pytest.mark.parametrize('files', [ + ['mimetype', 'book.ncx', 'book.opf'], + ['mimetype', 'book.opf', 'book.ncx', 'Chapter1.html'], + ]) + def test_fpub_mapping(self, mapper, story, files): + """ + Tests detection of FPUB data format. + """ + story = story.merge(data=self.zip(files), flavors=[]) + + assert DataFormat.FPUB is mapper(story) + + @pytest.mark.parametrize('files', [ + ['mimetype', 'content.opf', 'toc.ncx'], + ['mimetype', 'toc.ncx', 'content.opf', 'Chapter1.html'], + ]) + def test_epub_mapping(self, mapper, story, files): + """ + Tests detection of EPUB data format. + """ + story = story.merge(data=self.zip(files), flavors=[]) + + assert DataFormat.EPUB is mapper(story) + + @pytest.mark.parametrize('fmt', [ + DataFormat.EPUB, + DataFormat.JSON, + ]) + def test_included_mapping(self, mapper, story, fmt): + """ + Tests detection of included flavor. + """ + story = story.merge(flavors=[fmt]) + + assert fmt is mapper(story) + + @pytest.mark.parametrize('data', [ + b'', + b'P', + b'PK', + b'PK\x03', + b'PK\x03\x03', + ]) + def test_unknown_raw_mapping(self, mapper, story, data): + """ + Tests unknown raw data returns no flavor. + """ + story = story.merge(data=data, flavors=[]) + + assert None is mapper(story) + + @pytest.mark.parametrize('files', [ + [], + ['alpaca.jpg'], + ['book.opf', 'book.ncx'], + ['mimetype', 'book.ncx'], + ['content.opf', 'tox.ncx', 'Chapter1.html'], + ]) + def test_unknown_zip_mapping(self, mapper, story, files): + """ + Tests unknown ZIP data returns no flavor. + """ + story = story.merge(data=self.zip(files), flavors=[]) + + assert None is mapper(story)