mirror of
https://github.com/JockeTF/fimfareader.git
synced 2024-11-23 13:58:00 +01:00
Improve performance of fuzzy queries using regex
This commit is contained in:
parent
94d8bf9fe2
commit
96b7773e8f
3 changed files with 55 additions and 6 deletions
37
Cargo.lock
generated
37
Cargo.lock
generated
|
@ -5,6 +5,14 @@ name = "adler32"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayvec"
|
name = "arrayvec"
|
||||||
version = "0.4.12"
|
version = "0.4.12"
|
||||||
|
@ -119,6 +127,7 @@ dependencies = [
|
||||||
"hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"zip 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"zip 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@ -319,6 +328,22 @@ name = "redox_syscall"
|
||||||
version = "0.1.56"
|
version = "0.1.56"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"regex-syntax 0.6.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
|
@ -414,6 +439,14 @@ dependencies = [
|
||||||
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thread_local"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.1.42"
|
version = "0.1.42"
|
||||||
|
@ -485,6 +518,7 @@ dependencies = [
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
|
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
|
||||||
|
"checksum aho-corasick 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d5e63fd144e18ba274ae7095c0197a870a7b9468abc801dd62f190d80817d2ec"
|
||||||
"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
||||||
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
|
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
|
||||||
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||||
|
@ -522,6 +556,8 @@ dependencies = [
|
||||||
"checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
|
"checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
|
||||||
"checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
|
"checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
|
||||||
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
||||||
|
"checksum regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8"
|
||||||
|
"checksum regex-syntax 0.6.16 (registry+https://github.com/rust-lang/crates.io-index)" = "1132f845907680735a84409c3bebc64d1364a5683ffbce899550cd09d5eaefc1"
|
||||||
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||||
"checksum rustyline 6.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "de64be8eecbe428b6924f1d8430369a01719fbb182c26fa431ddbb0a95f5315d"
|
"checksum rustyline 6.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "de64be8eecbe428b6924f1d8430369a01719fbb182c26fa431ddbb0a95f5315d"
|
||||||
"checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8"
|
"checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8"
|
||||||
|
@ -534,6 +570,7 @@ dependencies = [
|
||||||
"checksum serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)" = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25"
|
"checksum serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)" = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25"
|
||||||
"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
||||||
"checksum syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "123bd9499cfb380418d509322d7a6d52e5315f064fe4b3ad18a53d6b92c07859"
|
"checksum syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "123bd9499cfb380418d509322d7a6d52e5315f064fe4b3ad18a53d6b92c07859"
|
||||||
|
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
||||||
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
|
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
|
||||||
"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
|
"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
|
||||||
"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
||||||
|
|
|
@ -32,6 +32,9 @@ version = "5"
|
||||||
[dependencies.rayon]
|
[dependencies.rayon]
|
||||||
version = "*"
|
version = "*"
|
||||||
|
|
||||||
|
[dependencies.regex]
|
||||||
|
version = "*"
|
||||||
|
|
||||||
[dependencies.serde]
|
[dependencies.serde]
|
||||||
version = "*"
|
version = "*"
|
||||||
features = ["derive"]
|
features = ["derive"]
|
||||||
|
|
|
@ -4,6 +4,9 @@ use chrono::prelude::*;
|
||||||
|
|
||||||
use chrono_english::{parse_date_string, Dialect};
|
use chrono_english::{parse_date_string, Dialect};
|
||||||
|
|
||||||
|
use regex::RegexBuilder;
|
||||||
|
use regex::escape;
|
||||||
|
|
||||||
use super::parser::{Operator, Source};
|
use super::parser::{Operator, Source};
|
||||||
use crate::archive::Story;
|
use crate::archive::Story;
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
|
@ -32,14 +35,20 @@ pub fn optimize(src: Source, op: Operator, value: &str) -> Result<Filter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strfn(f: StrFn, op: Operator, value: &str) -> Result<Filter> {
|
fn strfn(f: StrFn, op: Operator, value: &str) -> Result<Filter> {
|
||||||
let value: String = match op {
|
let exact: String = value.into();
|
||||||
Fuzzy => value.to_lowercase(),
|
|
||||||
_ => value.to_owned(),
|
let result = RegexBuilder::new(&escape(value))
|
||||||
};
|
.case_insensitive(true)
|
||||||
|
.size_limit(1_048_576)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let regex = result.map_err(|e| match e {
|
||||||
|
_ => Error::query("Invalid value for fuzzy match"),
|
||||||
|
})?;
|
||||||
|
|
||||||
match op {
|
match op {
|
||||||
Exact => ok!(move |s| f(s) == value),
|
Exact => ok!(move |s| f(s) == exact),
|
||||||
Fuzzy => ok!(move |s| f(s).to_lowercase().contains(&value)),
|
Fuzzy => ok!(move |s| regex.is_match(f(s))),
|
||||||
_ => Err(Error::query("Invalid operation for text type")),
|
_ => Err(Error::query("Invalid operation for text type")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue