diff --git a/Cargo.lock b/Cargo.lock index 9be74c0..ea401ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,6 +5,14 @@ name = "adler32" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "aho-corasick" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "arrayvec" version = "0.4.12" @@ -119,6 +127,7 @@ dependencies = [ "hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)", "zip 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -319,6 +328,22 @@ name = "redox_syscall" version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "regex" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.16 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rustc_version" version = "0.2.3" @@ -414,6 +439,14 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "time" version = "0.1.42" @@ -485,6 +518,7 @@ dependencies = [ [metadata] "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" +"checksum aho-corasick 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d5e63fd144e18ba274ae7095c0197a870a7b9468abc801dd62f190d80817d2ec" "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" @@ -522,6 +556,8 @@ dependencies = [ "checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" "checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" "checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +"checksum regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" +"checksum regex-syntax 0.6.16 (registry+https://github.com/rust-lang/crates.io-index)" = "1132f845907680735a84409c3bebc64d1364a5683ffbce899550cd09d5eaefc1" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum rustyline 6.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "de64be8eecbe428b6924f1d8430369a01719fbb182c26fa431ddbb0a95f5315d" "checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" @@ -534,6 +570,7 @@ dependencies = [ "checksum serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)" = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25" "checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" "checksum syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "123bd9499cfb380418d509322d7a6d52e5315f064fe4b3ad18a53d6b92c07859" +"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" "checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" "checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" diff --git a/Cargo.toml b/Cargo.toml index de0ce62..fe4af60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,9 @@ version = "5" [dependencies.rayon] version = "*" +[dependencies.regex] +version = "*" + [dependencies.serde] version = "*" features = ["derive"] diff --git a/src/query/optimizer.rs b/src/query/optimizer.rs index 06fa545..9e223d9 100644 --- a/src/query/optimizer.rs +++ b/src/query/optimizer.rs @@ -4,6 +4,9 @@ use chrono::prelude::*; use chrono_english::{parse_date_string, Dialect}; +use regex::RegexBuilder; +use regex::escape; + use super::parser::{Operator, Source}; use crate::archive::Story; use crate::error::{Error, Result}; @@ -32,14 +35,20 @@ pub fn optimize(src: Source, op: Operator, value: &str) -> Result { } fn strfn(f: StrFn, op: Operator, value: &str) -> Result { - let value: String = match op { - Fuzzy => value.to_lowercase(), - _ => value.to_owned(), - }; + let exact: String = value.into(); + + let result = RegexBuilder::new(&escape(value)) + .case_insensitive(true) + .size_limit(1_048_576) + .build(); + + let regex = result.map_err(|e| match e { + _ => Error::query("Invalid value for fuzzy match"), + })?; match op { - Exact => ok!(move |s| f(s) == value), - Fuzzy => ok!(move |s| f(s).to_lowercase().contains(&value)), + Exact => ok!(move |s| f(s) == exact), + Fuzzy => ok!(move |s| regex.is_match(f(s))), _ => Err(Error::query("Invalid operation for text type")), } }