From 6901aa3d66c363bd69b4666e87ebf54ae4274838 Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Wed, 29 May 2024 17:44:40 +0200 Subject: [PATCH] Save on RAM by interning author meta --- src/archive/fetcher.rs | 10 +++++++++- src/archive/interner.rs | 28 ++++++++++++++++++---------- src/archive/story.rs | 29 +++++++++++++++++++++-------- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/src/archive/fetcher.rs b/src/archive/fetcher.rs index 508143e..b27a03a 100644 --- a/src/archive/fetcher.rs +++ b/src/archive/fetcher.rs @@ -15,6 +15,8 @@ use zip::result::ZipError; use super::parser::parse; use super::story::Story; +use crate::archive::AUTHORS; +use crate::archive::TAGS; use crate::error::Error; use crate::error::Result; @@ -63,7 +65,13 @@ impl Fetcher { _ => Error::archive("Could not open story index"), })?; - parse(BufReader::with_capacity(1048576, file)).map_err(Error::index) + let reader = BufReader::with_capacity(1048576, file); + let result = parse(reader).map_err(Error::index); + + AUTHORS.clear(); + TAGS.clear(); + + result } pub fn fetch(&self, key: i64) -> Option<&Story> { diff --git a/src/archive/interner.rs b/src/archive/interner.rs index 3bc0424..76c24e3 100644 --- a/src/archive/interner.rs +++ b/src/archive/interner.rs @@ -1,35 +1,43 @@ //! Interner module. use std::collections::HashSet; +use std::hash::Hash; +use std::sync::Arc; use std::sync::RwLock; -pub struct Interner(RwLock>); +pub struct Interner(RwLock>>); impl Interner where - T: Eq + std::hash::Hash, + T: Eq + Hash, { pub fn new() -> Self { Self(RwLock::new(HashSet::new())) } - fn get(&self, value: &T) -> Option<&'static T> { + fn get(&self, value: &T) -> Option> { let store = self.0.read().unwrap(); - store.get(value).copied() + store.get(value).cloned() } - fn set(&self, value: T) -> &'static T { - let boxed: Box = Box::new(value); - let leaked: &'static T = Box::leak(boxed); + fn set(&self, value: T) -> Arc { let mut store = self.0.write().unwrap(); - store.insert(leaked); + let arc = Arc::new(value); + store.insert(arc.clone()); - leaked + arc } - pub fn intern(&self, value: T) -> &'static T { + pub fn clear(&self) { + let mut store = self.0.write().unwrap(); + + store.clear(); + store.shrink_to_fit(); + } + + pub fn intern(&self, value: T) -> Arc { self.get(&value).unwrap_or_else(|| self.set(value)) } } diff --git a/src/archive/story.rs b/src/archive/story.rs index 171915a..e759d5c 100644 --- a/src/archive/story.rs +++ b/src/archive/story.rs @@ -1,5 +1,7 @@ //! Story meta. +use std::sync::Arc; + use chrono::prelude::*; use lazy_static::lazy_static; use serde::de::Error; @@ -10,14 +12,16 @@ use serde_json::Value; use super::interner::Interner; lazy_static! { - static ref TAGS: Interner = Interner::new(); + pub(crate) static ref AUTHORS: Interner = Interner::new(); + pub(crate) static ref TAGS: Interner = Interner::new(); } #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct Story { pub archive: Archive, - pub author: Author, + #[serde(deserialize_with = "author_as_static")] + pub author: Arc, pub chapters: Vec, pub color: Option, pub completion_status: CompletionStatus, @@ -43,7 +47,7 @@ pub struct Story { pub status: Status, pub submitted: bool, #[serde(deserialize_with = "tags_as_static")] - pub tags: Vec<&'static Tag>, + pub tags: Vec>, #[serde(deserialize_with = "null_to_text")] pub title: String, pub total_num_views: i32, @@ -60,7 +64,7 @@ pub struct Archive { pub path: String, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(deny_unknown_fields)] pub struct Author { pub avatar: Option, @@ -75,7 +79,7 @@ pub struct Author { pub url: String, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(deny_unknown_fields)] pub struct Avatar { #[serde(rename = "16")] @@ -209,13 +213,22 @@ where } } -fn tags_as_static<'de, D>(d: D) -> Result, D::Error> +fn author_as_static<'de, D>(d: D) -> Result, D::Error> where D: Deserializer<'de>, { - let tags: Vec = Vec::deserialize(d)?; + Author::deserialize(d).map(|author| AUTHORS.intern(author)) +} - Ok(tags.into_iter().map(|tag| TAGS.intern(tag)).collect()) +fn tags_as_static<'de, D>(d: D) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + Vec::deserialize(d)? + .into_iter() + .map(|tag| TAGS.intern(tag)) + .map(Result::Ok) + .collect() } impl<'de> Deserialize<'de> for Color {