Add data format mapper

This commit is contained in:
Joakim Soderlund 2019-06-08 17:21:04 +02:00
parent 1e91a64a36
commit 1a34c8112b
2 changed files with 156 additions and 5 deletions

View file

@ -25,8 +25,10 @@ Mappers for Fimfarchive.
import string import string
from abc import abstractmethod from abc import abstractmethod
from html import unescape from html import unescape
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Dict, Generic, Optional, Set, TypeVar, Union from typing import Dict, Generic, Optional, Set, TypeVar, Union
from zipfile import ZipFile
from arrow import api as arrow, Arrow from arrow import api as arrow, Arrow
@ -39,6 +41,8 @@ from fimfarchive.utils import find_flavor
__all__ = ( __all__ = (
'Mapper', 'Mapper',
'StaticMapper', 'StaticMapper',
'DataFormatMapper',
'MetaFormatMapper',
'StoryDateMapper', 'StoryDateMapper',
'StoryPathMapper', 'StoryPathMapper',
'StorySlugMapper', 'StorySlugMapper',
@ -282,14 +286,55 @@ class MetaFormatMapper(Mapper[Optional[MetaFormat]]):
def __call__(self, story: Story) -> Optional[MetaFormat]: def __call__(self, story: Story) -> Optional[MetaFormat]:
flavor = find_flavor(story, MetaFormat) flavor = find_flavor(story, MetaFormat)
if flavor: if flavor is not None:
return flavor return flavor
items = self.spec.items() items = self.spec.items()
meta = set(story.meta.keys()) meta = set(story.meta.keys())
matches = {fmt for fmt, spec in items if spec & meta} matches = [fmt for fmt, spec in items if spec & meta]
if len(matches) == 1: if len(matches) == 1:
return next(iter(matches)) return matches[0]
else:
return None
class DataFormatMapper(Mapper[Optional[DataFormat]]):
"""
Guesses the data format of stories.
"""
spec: Dict[DataFormat, Set[str]] = {
DataFormat.EPUB: {'content.opf', 'mimetype', 'toc.ncx'},
DataFormat.FPUB: {'book.ncx', 'book.opf', 'mimetype'},
}
zip_magic: Set[bytes] = {
b'PK\x03\x04',
b'PK\x05\x06',
b'PK\x07\x08',
}
def __call__(self, story: Story) -> Optional[DataFormat]:
flavor = find_flavor(story, DataFormat)
if flavor is not None:
return flavor
data = story.data.rstrip()
if data and data[0] == 123 and data[-1] == 125:
return DataFormat.JSON
if data[:4] not in self.zip_magic:
return None
with ZipFile(BytesIO(data)) as zobj:
names = set(zobj.namelist())
items = self.spec.items()
matches = [fmt for fmt, spec in items if spec <= names]
if len(matches) == 1:
return matches[0]
else: else:
return None return None

View file

@ -23,17 +23,19 @@ Mapper tests.
import os import os
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
from unittest.mock import patch, MagicMock, PropertyMock from unittest.mock import patch, MagicMock, PropertyMock
from zipfile import ZipFile
import pytest import pytest
from fimfarchive.exceptions import InvalidStoryError from fimfarchive.exceptions import InvalidStoryError
from fimfarchive.flavors import DataFormat, MetaFormat from fimfarchive.flavors import DataFormat, MetaFormat
from fimfarchive.mappers import ( from fimfarchive.mappers import (
MetaFormatMapper, StaticMapper, StoryDateMapper, DataFormatMapper, MetaFormatMapper, StaticMapper,
StoryPathMapper, StorySlugMapper StoryDateMapper, StoryPathMapper, StorySlugMapper,
) )
from fimfarchive.stories import Story from fimfarchive.stories import Story
@ -460,3 +462,107 @@ class TestMetaFormatMapper:
story = self.merge(story, beta, 'misc') story = self.merge(story, beta, 'misc')
assert mapper(story) is MetaFormat.ALPHA assert mapper(story) is MetaFormat.ALPHA
class TestDataFormatMapper:
"""
DataFormatMapper tests.
"""
@pytest.fixture
def mapper(self):
"""
Returns a data format mapper instance.
"""
return DataFormatMapper()
def zip(self, names) -> bytes:
"""
Returns a populated ZIP-file as bytes.
"""
data = BytesIO()
with ZipFile(data, 'w') as zobj:
for name in names:
zobj.writestr(name, name)
return data.getvalue()
@pytest.mark.parametrize('data', [
b'{}',
b'{"id": 42}',
b'{"id": 42}\n',
])
def test_json_mapping(self, mapper, story, data):
"""
Tests detection of JSON data format.
"""
story = story.merge(data=data, flavors=[])
assert DataFormat.JSON is mapper(story)
@pytest.mark.parametrize('files', [
['mimetype', 'book.ncx', 'book.opf'],
['mimetype', 'book.opf', 'book.ncx', 'Chapter1.html'],
])
def test_fpub_mapping(self, mapper, story, files):
"""
Tests detection of FPUB data format.
"""
story = story.merge(data=self.zip(files), flavors=[])
assert DataFormat.FPUB is mapper(story)
@pytest.mark.parametrize('files', [
['mimetype', 'content.opf', 'toc.ncx'],
['mimetype', 'toc.ncx', 'content.opf', 'Chapter1.html'],
])
def test_epub_mapping(self, mapper, story, files):
"""
Tests detection of EPUB data format.
"""
story = story.merge(data=self.zip(files), flavors=[])
assert DataFormat.EPUB is mapper(story)
@pytest.mark.parametrize('fmt', [
DataFormat.EPUB,
DataFormat.JSON,
])
def test_included_mapping(self, mapper, story, fmt):
"""
Tests detection of included flavor.
"""
story = story.merge(flavors=[fmt])
assert fmt is mapper(story)
@pytest.mark.parametrize('data', [
b'',
b'P',
b'PK',
b'PK\x03',
b'PK\x03\x03',
])
def test_unknown_raw_mapping(self, mapper, story, data):
"""
Tests unknown raw data returns no flavor.
"""
story = story.merge(data=data, flavors=[])
assert None is mapper(story)
@pytest.mark.parametrize('files', [
[],
['alpaca.jpg'],
['book.opf', 'book.ncx'],
['mimetype', 'book.ncx'],
['content.opf', 'tox.ncx', 'Chapter1.html'],
])
def test_unknown_zip_mapping(self, mapper, story, files):
"""
Tests unknown ZIP data returns no flavor.
"""
story = story.merge(data=self.zip(files), flavors=[])
assert None is mapper(story)