From 0f9492bc0fd31744028d341ab8cef77db0f5061a Mon Sep 17 00:00:00 2001
From: Joakim Soderlund <joakim.soderlund@gmail.com>
Date: Sun, 1 Sep 2019 15:10:14 +0200
Subject: [PATCH] Add optional SQLite cache to Fimfarchive fetcher

---
 fimfarchive/fetchers/fimfarchive.py | 96 ++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 17 deletions(-)

diff --git a/fimfarchive/fetchers/fimfarchive.py b/fimfarchive/fetchers/fimfarchive.py
index 5ff62bf..67a3836 100644
--- a/fimfarchive/fetchers/fimfarchive.py
+++ b/fimfarchive/fetchers/fimfarchive.py
@@ -24,8 +24,10 @@ Fimfarchive fetcher.
 
 import json
 import marshal
+import sqlite3
 from io import BufferedReader
 from multiprocessing import Pool
+from pathlib import Path
 from typing import (
     cast, Any, Callable, Dict, IO, Iterable, Iterator,
     Mapping, Optional, Sized, Tuple, Union,
@@ -66,6 +68,12 @@ class Index(Mapping[int, Dict[str, Any]]):
         Closes the index, if necessary.
         """
 
+    def iteritems(self) -> Iterator[Tuple[int, Dict[str, Any]]]:
+        """
+        Special items iterator, for performance.
+        """
+        yield from self.items()
+
     def load(self, source: IO[bytes]) -> Iterator[Tuple[int, bytes]]:
         """
         Yields index items from a byte stream.
@@ -140,10 +148,58 @@ class MemoryIndex(Index):
     def __len__(self) -> int:
         return len(self.data)
 
+    def iteritems(self) -> Iterator[Tuple[int, Dict[str, Any]]]:
+        for key, value in self.data.items():
+            yield key, deserialize(decompress(value))
+
     def close(self):
         self.data.clear()
 
 
+class SqliteIndex(Index):
+    """
+    Cached mapping from key to story meta.
+    """
+
+    CREATE = 'CREATE TABLE "cache" (key INT PRIMARY KEY, value BLOB)'
+    INSERT = 'INSERT INTO cache VALUES (?, ?)'
+    SELECT = 'SELECT value FROM cache WHERE key = ?'
+    LIST_KEYS = 'SELECT key FROM cache ORDER BY key'
+    LIST_ITEMS = 'SELECT key, value FROM cache ORDER BY key'
+
+    def __init__(self, name: str, stream: IO[bytes]) -> None:
+        if Path(name).exists():
+            self.db = sqlite3.connect(name)
+        else:
+            self.db = sqlite3.connect(name)
+            self.db.execute(self.CREATE)
+            self.db.executemany(self.INSERT, self.load(stream))
+            self.db.commit()
+
+        keys = self.db.execute(self.LIST_KEYS)
+        self._keys = set(row[0] for row in keys)
+
+    def __getitem__(self, key: int) -> Dict[str, Any]:
+        value = self.db.execute(self.SELECT, (key,))
+        return marshal.loads(value.fetchone()[0])
+
+    def __contains__(self, item) -> bool:
+        return item in self._keys
+
+    def __iter__(self) -> Iterator[int]:
+        return iter(sorted(self._keys))
+
+    def __len__(self) -> int:
+        return len(self._keys)
+
+    def iteritems(self) -> Iterator[Tuple[int, Dict[str, Any]]]:
+        items = self.db.execute(self.LIST_ITEMS)
+        return ((k, deserialize(v)) for k, v in items)
+
+    def close(self) -> None:
+        self.db.close()
+
+
 class FimfarchiveFetcher(Iterable[Story], Sized, Fetcher):
     """
     Fetcher for Fimfarchive.
@@ -216,10 +272,12 @@ class FimfarchiveFetcher(Iterable[Story], Sized, Fetcher):
         """
         Yields all stories in the archive, ordered by ID.
         """
-        for key in sorted(self.index.keys()):
-            yield self.fetch(key)
+        for key, meta in self.index.iteritems():
+            key = self.validate_key(key)
+            meta = self.validate_meta(key, meta)
+            yield Story(key, self, meta, None, self.flavors)
 
-    def validate(self, key: int) -> int:
+    def validate_key(self, key: int) -> int:
         """
         Ensures that the key matches a valid story
 
@@ -243,6 +301,20 @@ class FimfarchiveFetcher(Iterable[Story], Sized, Fetcher):
 
         return key
 
+    def validate_meta(self, key: int, meta: Dict[str, Any]) -> Dict[str, Any]:
+        actual = meta.get('id')
+
+        if key != actual:
+            raise StorySourceError(f"Invalid ID for {key}: {actual}")
+
+        try:
+            archive = meta.get('archive', meta)
+            self.paths[key] = archive['path']
+        except KeyError:
+            pass
+
+        return meta
+
     def fetch_path(self, key: int) -> Optional[str]:
         """
         Fetches the archive path of a story.
@@ -257,7 +329,7 @@ class FimfarchiveFetcher(Iterable[Story], Sized, Fetcher):
             InvalidStoryError: If a valid story is not found.
             StorySourceError: If the fetcher is closed.
         """
-        key = self.validate(key)
+        key = self.validate_key(key)
         path = self.paths.get(key)
 
         if path is not None:
@@ -284,23 +356,13 @@ class FimfarchiveFetcher(Iterable[Story], Sized, Fetcher):
             self.paths.clear()
 
     def fetch_meta(self, key: int) -> Dict[str, Any]:
-        key = self.validate(key)
-        meta = self.index[key]
-        actual = meta.get('id')
-
-        if key != actual:
-            raise StorySourceError(f"Invalid ID for {key}: {actual}")
-
-        try:
-            archive = meta.get('archive', meta)
-            self.paths[key] = archive['path']
-        except KeyError:
-            pass
+        key = self.validate_key(key)
+        meta = self.validate_meta(key, self.index[key])
 
         return meta
 
     def fetch_data(self, key: int) -> bytes:
-        key = self.validate(key)
+        key = self.validate_key(key)
         path = self.fetch_path(key)
 
         if not path: