mirror of
https://github.com/JockeTF/fimfareader.git
synced 2024-11-30 08:57:59 +01:00
Add ineffective deduplication experiment
This commit is contained in:
parent
2547cf8346
commit
af2ce29d01
3 changed files with 50 additions and 38 deletions
|
@ -35,7 +35,7 @@ version = "*"
|
||||||
|
|
||||||
[dependencies.serde]
|
[dependencies.serde]
|
||||||
version = "*"
|
version = "*"
|
||||||
features = ["derive"]
|
features = ["derive", "rc"]
|
||||||
|
|
||||||
[dependencies.serde_json]
|
[dependencies.serde_json]
|
||||||
version = "*"
|
version = "*"
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
//! Index parser.
|
//! Index parser.
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::io::BufRead;
|
use std::io::BufRead;
|
||||||
use std::sync::mpsc::{channel, Receiver};
|
use std::sync::mpsc::{channel, Receiver};
|
||||||
|
use std::sync::Arc;
|
||||||
use std::thread::spawn;
|
use std::thread::spawn;
|
||||||
|
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
@ -9,7 +11,7 @@ use serde::de::Error;
|
||||||
use serde_json::error::Result;
|
use serde_json::error::Result;
|
||||||
use serde_json::from_str;
|
use serde_json::from_str;
|
||||||
|
|
||||||
use super::story::Story;
|
use super::story::{Author, Story, Tag};
|
||||||
|
|
||||||
const TRIM: &[char] = &['"', ',', ' ', '\t', '\n', '\r'];
|
const TRIM: &[char] = &['"', ',', ' ', '\t', '\n', '\r'];
|
||||||
|
|
||||||
|
@ -46,9 +48,48 @@ pub fn parse(reader: impl BufRead) -> Result<Vec<Story>> {
|
||||||
return Err(Error::custom("Invalid file structure"));
|
return Err(Error::custom("Invalid file structure"));
|
||||||
}
|
}
|
||||||
|
|
||||||
rx.recv().map_err(|e| match e {
|
let result = rx.recv().map_err(|e| match e {
|
||||||
_ => Error::custom("Missing parser result"),
|
_ => Error::custom("Missing parser result"),
|
||||||
})?
|
})?;
|
||||||
|
|
||||||
|
Ok(dedup(result?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dedup(mut stories: Vec<Story>) -> Vec<Story> {
|
||||||
|
let mut authors: HashSet<Arc<Author>> = HashSet::new();
|
||||||
|
let mut tags: HashSet<Arc<Tag>> = HashSet::new();
|
||||||
|
|
||||||
|
for story in stories.iter_mut() {
|
||||||
|
if let Some(author) = authors.get(&story.author) {
|
||||||
|
story.author = author.clone();
|
||||||
|
} else {
|
||||||
|
authors.insert(story.author.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for story in stories.iter_mut() {
|
||||||
|
let unseen = story
|
||||||
|
.tags
|
||||||
|
.iter()
|
||||||
|
.filter(|tag| !tags.contains(*tag))
|
||||||
|
.map(|tag| tag.clone())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
tags.extend(unseen);
|
||||||
|
|
||||||
|
story.tags = story
|
||||||
|
.tags
|
||||||
|
.iter()
|
||||||
|
.filter_map(|tag| tags.get(tag))
|
||||||
|
.map(|tag| tag.clone())
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
for tag in tags.iter() {
|
||||||
|
println!("{}: {}", tag.name, Arc::strong_count(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
stories
|
||||||
}
|
}
|
||||||
|
|
||||||
fn spawn_parser(stream: Receiver<String>) -> Receiver<Result<Vec<Story>>> {
|
fn spawn_parser(stream: Receiver<String>) -> Receiver<Result<Vec<Story>>> {
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
//! Story meta.
|
//! Story meta.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::sync::Arc;
|
||||||
use std::sync::Mutex;
|
|
||||||
|
|
||||||
use chrono::prelude::*;
|
use chrono::prelude::*;
|
||||||
|
|
||||||
|
@ -9,16 +8,10 @@ use serde::de::Error;
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
static ref TAGS: Mutex<HashSet<&'static Tag>> = Mutex::new(HashSet::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
pub struct Story {
|
pub struct Story {
|
||||||
pub archive: Archive,
|
pub archive: Archive,
|
||||||
pub author: Author,
|
pub author: Arc<Author>,
|
||||||
pub chapters: Vec<Chapter>,
|
pub chapters: Vec<Chapter>,
|
||||||
pub color: Option<Color>,
|
pub color: Option<Color>,
|
||||||
pub completion_status: CompletionStatus,
|
pub completion_status: CompletionStatus,
|
||||||
|
@ -43,8 +36,7 @@ pub struct Story {
|
||||||
pub short_description: String,
|
pub short_description: String,
|
||||||
pub status: Status,
|
pub status: Status,
|
||||||
pub submitted: bool,
|
pub submitted: bool,
|
||||||
#[serde(deserialize_with = "interned_tag")]
|
pub tags: Vec<Arc<Tag>>,
|
||||||
pub tags: Vec<&'static Tag>,
|
|
||||||
#[serde(deserialize_with = "null_to_text")]
|
#[serde(deserialize_with = "null_to_text")]
|
||||||
pub title: String,
|
pub title: String,
|
||||||
pub total_num_views: i32,
|
pub total_num_views: i32,
|
||||||
|
@ -60,7 +52,7 @@ pub struct Archive {
|
||||||
pub path: String,
|
pub path: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, Deserialize)]
|
||||||
pub struct Author {
|
pub struct Author {
|
||||||
pub avatar: Option<Avatar>,
|
pub avatar: Option<Avatar>,
|
||||||
pub bio_html: Option<String>,
|
pub bio_html: Option<String>,
|
||||||
|
@ -74,7 +66,7 @@ pub struct Author {
|
||||||
pub url: String,
|
pub url: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, Deserialize)]
|
||||||
pub struct Avatar {
|
pub struct Avatar {
|
||||||
#[serde(rename = "16")]
|
#[serde(rename = "16")]
|
||||||
pub x16: Option<String>,
|
pub x16: Option<String>,
|
||||||
|
@ -200,27 +192,6 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn interned_tag<'de, D>(d: D) -> Result<Vec<&'static Tag>, D::Error>
|
|
||||||
where
|
|
||||||
D: Deserializer<'de>,
|
|
||||||
{
|
|
||||||
let tags = Vec::<Tag>::deserialize(d)?;
|
|
||||||
let mut store = TAGS.lock().unwrap();
|
|
||||||
|
|
||||||
Ok(tags
|
|
||||||
.into_iter()
|
|
||||||
.map(|tag| match store.get(&tag) {
|
|
||||||
Some(tag) => tag,
|
|
||||||
None => {
|
|
||||||
let boxed: Box<Tag> = Box::new(tag);
|
|
||||||
let leaked: &'static Tag = Box::leak(boxed);
|
|
||||||
store.insert(leaked);
|
|
||||||
leaked
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect())
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de> Deserialize<'de> for Color {
|
impl<'de> Deserialize<'de> for Color {
|
||||||
fn deserialize<D>(d: D) -> Result<Color, D::Error>
|
fn deserialize<D>(d: D) -> Result<Color, D::Error>
|
||||||
where
|
where
|
||||||
|
|
Loading…
Reference in a new issue