From ea4b95b0e63645644a703ef4ab537a2450b0ccce Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sun, 21 Jul 2019 15:37:13 +0000 Subject: [PATCH] Add parser module for index loading --- src/main.rs | 1 + src/parser.rs | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 src/parser.rs diff --git a/src/main.rs b/src/main.rs index 669cf15..d675c0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ //! Main module. pub mod error; +pub mod parser; pub mod story; fn main() { diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..fbefcea --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,83 @@ +//! Index parser. + +use std::io::BufRead; +use std::sync::mpsc::{channel, Receiver}; +use std::thread::spawn; + +use serde_json::from_str; + +use crate::error::{Error, Result}; +use crate::story::Story; + +const TRIM: &'static [char] = &['"', ',', ' ', '\t', '\n', '\r']; + +pub fn parse(reader: impl BufRead) -> Result> { + use Error::*; + + let (tx, rx) = channel(); + let rx = spawn_parser(rx); + + for line in reader.lines() { + let line = line.map_err(|e| match e { + _ => SourceError("Could not read index line."), + })?; + + if 3 < line.len() { + tx.send(line).unwrap(); + } + } + + drop(tx); + + rx.recv().unwrap() +} + +fn spawn_parser(stream: Receiver) -> Receiver>> { + let (tx, rx) = channel(); + + spawn(move || { + let mut stories = Vec::with_capacity(250_000); + + while let Ok(line) = stream.recv() { + match deserialize(line) { + Ok(story) => stories.push(story), + Err(e) => return tx.send(Err(e)).unwrap(), + }; + } + + stories.shrink_to_fit(); + stories.sort_by_key(|story| story.id); + + tx.send(Ok(stories)).unwrap(); + }); + + rx +} + +fn deserialize(line: String) -> Result { + use Error::*; + + let split = line + .splitn(2, ':') + .map(|value| value.trim_matches(TRIM)) + .collect::>(); + + let (skey, json) = match split[..] { + [skey, json] => Ok((skey, json)), + _ => Err(SourceError("Invalid line format.")), + }?; + + let key: i64 = skey.parse().map_err(|e| match e { + _ => SourceError("Invalid meta key."), + })?; + + let story: Story = from_str(json).map_err(|e| match e { + _ => SourceError("Invalid meta value."), + })?; + + if key != story.id { + return Err(SourceError("Meta key mismatch.")); + } + + Ok(story) +}