Save on RAM by interning author meta

This commit is contained in:
Joakim Soderlund 2024-05-29 17:44:40 +02:00
parent 314b388c35
commit 6901aa3d66
3 changed files with 48 additions and 19 deletions

View file

@ -15,6 +15,8 @@ use zip::result::ZipError;
use super::parser::parse;
use super::story::Story;
use crate::archive::AUTHORS;
use crate::archive::TAGS;
use crate::error::Error;
use crate::error::Result;
@ -63,7 +65,13 @@ impl<T: Read + Seek> Fetcher<T> {
_ => Error::archive("Could not open story index"),
})?;
parse(BufReader::with_capacity(1048576, file)).map_err(Error::index)
let reader = BufReader::with_capacity(1048576, file);
let result = parse(reader).map_err(Error::index);
AUTHORS.clear();
TAGS.clear();
result
}
pub fn fetch(&self, key: i64) -> Option<&Story> {

View file

@ -1,35 +1,43 @@
//! Interner module.
use std::collections::HashSet;
use std::hash::Hash;
use std::sync::Arc;
use std::sync::RwLock;
pub struct Interner<T: 'static>(RwLock<HashSet<&'static T>>);
pub struct Interner<T>(RwLock<HashSet<Arc<T>>>);
impl<T> Interner<T>
where
T: Eq + std::hash::Hash,
T: Eq + Hash,
{
pub fn new() -> Self {
Self(RwLock::new(HashSet::new()))
}
fn get(&self, value: &T) -> Option<&'static T> {
fn get(&self, value: &T) -> Option<Arc<T>> {
let store = self.0.read().unwrap();
store.get(value).copied()
store.get(value).cloned()
}
fn set(&self, value: T) -> &'static T {
let boxed: Box<T> = Box::new(value);
let leaked: &'static T = Box::leak(boxed);
fn set(&self, value: T) -> Arc<T> {
let mut store = self.0.write().unwrap();
store.insert(leaked);
let arc = Arc::new(value);
store.insert(arc.clone());
leaked
arc
}
pub fn intern(&self, value: T) -> &'static T {
pub fn clear(&self) {
let mut store = self.0.write().unwrap();
store.clear();
store.shrink_to_fit();
}
pub fn intern(&self, value: T) -> Arc<T> {
self.get(&value).unwrap_or_else(|| self.set(value))
}
}

View file

@ -1,5 +1,7 @@
//! Story meta.
use std::sync::Arc;
use chrono::prelude::*;
use lazy_static::lazy_static;
use serde::de::Error;
@ -10,14 +12,16 @@ use serde_json::Value;
use super::interner::Interner;
lazy_static! {
static ref TAGS: Interner<Tag> = Interner::new();
pub(crate) static ref AUTHORS: Interner<Author> = Interner::new();
pub(crate) static ref TAGS: Interner<Tag> = Interner::new();
}
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Story {
pub archive: Archive,
pub author: Author,
#[serde(deserialize_with = "author_as_static")]
pub author: Arc<Author>,
pub chapters: Vec<Chapter>,
pub color: Option<Color>,
pub completion_status: CompletionStatus,
@ -43,7 +47,7 @@ pub struct Story {
pub status: Status,
pub submitted: bool,
#[serde(deserialize_with = "tags_as_static")]
pub tags: Vec<&'static Tag>,
pub tags: Vec<Arc<Tag>>,
#[serde(deserialize_with = "null_to_text")]
pub title: String,
pub total_num_views: i32,
@ -60,7 +64,7 @@ pub struct Archive {
pub path: String,
}
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
#[serde(deny_unknown_fields)]
pub struct Author {
pub avatar: Option<Avatar>,
@ -75,7 +79,7 @@ pub struct Author {
pub url: String,
}
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
#[serde(deny_unknown_fields)]
pub struct Avatar {
#[serde(rename = "16")]
@ -209,13 +213,22 @@ where
}
}
fn tags_as_static<'de, D>(d: D) -> Result<Vec<&'static Tag>, D::Error>
fn author_as_static<'de, D>(d: D) -> Result<Arc<Author>, D::Error>
where
D: Deserializer<'de>,
{
let tags: Vec<Tag> = Vec::deserialize(d)?;
Author::deserialize(d).map(|author| AUTHORS.intern(author))
}
Ok(tags.into_iter().map(|tag| TAGS.intern(tag)).collect())
fn tags_as_static<'de, D>(d: D) -> Result<Vec<Arc<Tag>>, D::Error>
where
D: Deserializer<'de>,
{
Vec::deserialize(d)?
.into_iter()
.map(|tag| TAGS.intern(tag))
.map(Result::Ok)
.collect()
}
impl<'de> Deserialize<'de> for Color {