mirror of
https://github.com/JockeTF/fimfareader.git
synced 2024-11-30 08:57:59 +01:00
Add ineffective deduplication experiment
This commit is contained in:
parent
2547cf8346
commit
af2ce29d01
3 changed files with 50 additions and 38 deletions
|
@ -35,7 +35,7 @@ version = "*"
|
|||
|
||||
[dependencies.serde]
|
||||
version = "*"
|
||||
features = ["derive"]
|
||||
features = ["derive", "rc"]
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "*"
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
//! Index parser.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::io::BufRead;
|
||||
use std::sync::mpsc::{channel, Receiver};
|
||||
use std::sync::Arc;
|
||||
use std::thread::spawn;
|
||||
|
||||
use rayon::prelude::*;
|
||||
|
@ -9,7 +11,7 @@ use serde::de::Error;
|
|||
use serde_json::error::Result;
|
||||
use serde_json::from_str;
|
||||
|
||||
use super::story::Story;
|
||||
use super::story::{Author, Story, Tag};
|
||||
|
||||
const TRIM: &[char] = &['"', ',', ' ', '\t', '\n', '\r'];
|
||||
|
||||
|
@ -46,9 +48,48 @@ pub fn parse(reader: impl BufRead) -> Result<Vec<Story>> {
|
|||
return Err(Error::custom("Invalid file structure"));
|
||||
}
|
||||
|
||||
rx.recv().map_err(|e| match e {
|
||||
let result = rx.recv().map_err(|e| match e {
|
||||
_ => Error::custom("Missing parser result"),
|
||||
})?
|
||||
})?;
|
||||
|
||||
Ok(dedup(result?))
|
||||
}
|
||||
|
||||
fn dedup(mut stories: Vec<Story>) -> Vec<Story> {
|
||||
let mut authors: HashSet<Arc<Author>> = HashSet::new();
|
||||
let mut tags: HashSet<Arc<Tag>> = HashSet::new();
|
||||
|
||||
for story in stories.iter_mut() {
|
||||
if let Some(author) = authors.get(&story.author) {
|
||||
story.author = author.clone();
|
||||
} else {
|
||||
authors.insert(story.author.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for story in stories.iter_mut() {
|
||||
let unseen = story
|
||||
.tags
|
||||
.iter()
|
||||
.filter(|tag| !tags.contains(*tag))
|
||||
.map(|tag| tag.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
tags.extend(unseen);
|
||||
|
||||
story.tags = story
|
||||
.tags
|
||||
.iter()
|
||||
.filter_map(|tag| tags.get(tag))
|
||||
.map(|tag| tag.clone())
|
||||
.collect();
|
||||
}
|
||||
|
||||
for tag in tags.iter() {
|
||||
println!("{}: {}", tag.name, Arc::strong_count(tag));
|
||||
}
|
||||
|
||||
stories
|
||||
}
|
||||
|
||||
fn spawn_parser(stream: Receiver<String>) -> Receiver<Result<Vec<Story>>> {
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
//! Story meta.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::prelude::*;
|
||||
|
||||
|
@ -9,16 +8,10 @@ use serde::de::Error;
|
|||
use serde::{Deserialize, Deserializer};
|
||||
use serde_json::Value;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
static ref TAGS: Mutex<HashSet<&'static Tag>> = Mutex::new(HashSet::new());
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct Story {
|
||||
pub archive: Archive,
|
||||
pub author: Author,
|
||||
pub author: Arc<Author>,
|
||||
pub chapters: Vec<Chapter>,
|
||||
pub color: Option<Color>,
|
||||
pub completion_status: CompletionStatus,
|
||||
|
@ -43,8 +36,7 @@ pub struct Story {
|
|||
pub short_description: String,
|
||||
pub status: Status,
|
||||
pub submitted: bool,
|
||||
#[serde(deserialize_with = "interned_tag")]
|
||||
pub tags: Vec<&'static Tag>,
|
||||
pub tags: Vec<Arc<Tag>>,
|
||||
#[serde(deserialize_with = "null_to_text")]
|
||||
pub title: String,
|
||||
pub total_num_views: i32,
|
||||
|
@ -60,7 +52,7 @@ pub struct Archive {
|
|||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, Deserialize)]
|
||||
pub struct Author {
|
||||
pub avatar: Option<Avatar>,
|
||||
pub bio_html: Option<String>,
|
||||
|
@ -74,7 +66,7 @@ pub struct Author {
|
|||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, Deserialize)]
|
||||
pub struct Avatar {
|
||||
#[serde(rename = "16")]
|
||||
pub x16: Option<String>,
|
||||
|
@ -200,27 +192,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn interned_tag<'de, D>(d: D) -> Result<Vec<&'static Tag>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let tags = Vec::<Tag>::deserialize(d)?;
|
||||
let mut store = TAGS.lock().unwrap();
|
||||
|
||||
Ok(tags
|
||||
.into_iter()
|
||||
.map(|tag| match store.get(&tag) {
|
||||
Some(tag) => tag,
|
||||
None => {
|
||||
let boxed: Box<Tag> = Box::new(tag);
|
||||
let leaked: &'static Tag = Box::leak(boxed);
|
||||
store.insert(leaked);
|
||||
leaked
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Color {
|
||||
fn deserialize<D>(d: D) -> Result<Color, D::Error>
|
||||
where
|
||||
|
|
Loading…
Reference in a new issue