diff --git a/.gitignore b/.gitignore index 53eaa21..ebcc59a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +/cache /target **/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock index 0acbf4c..e01a5c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -38,18 +44,75 @@ version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "async-trait" +version = "0.1.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "bitpacking" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bon" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7acc34ff59877422326db7d6f2d845a582b16396b6b08194942bf34c6528ab" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4159dd617a7fbc9be6a692fe69dc2954f8e6bb6bb5e4d7578467441390d77fd0" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -68,9 +131,17 @@ version = "1.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" dependencies = [ + "jobserver", + "libc", "shlex", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cfg-if" version = "1.0.0" @@ -122,6 +193,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -147,6 +227,47 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "dateparser" version = "0.2.1" @@ -159,6 +280,16 @@ dependencies = [ "regex", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + [[package]] name = "derive_more" version = "2.0.1" @@ -179,18 +310,52 @@ dependencies = [ "syn", ] +[[package]] +name = "downcast-rs" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf" + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "error-code" version = "3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fimfareader" version = "0.1.0" @@ -208,7 +373,7 @@ name = "fimfareader-cli" version = "0.1.0" dependencies = [ "fimfareader", - "fimfareader-query", + "fimfareader-search", "rustyline", ] @@ -225,6 +390,15 @@ dependencies = [ "regex", ] +[[package]] +name = "fimfareader-search" +version = "0.1.0" +dependencies = [ + "fimfareader", + "tantivy", + "zip", +] + [[package]] name = "flate2" version = "1.0.35" @@ -235,12 +409,120 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + [[package]] name = "iana-time-zone" version = "0.1.61" @@ -264,12 +546,36 @@ dependencies = [ "cc", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -286,24 +592,75 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "log" version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" + +[[package]] +name = "measure_time" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" +dependencies = [ + "log", +] + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -319,6 +676,12 @@ dependencies = [ "adler2", ] +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + [[package]] name = "nix" version = "0.29.0" @@ -341,6 +704,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-traits" version = "0.2.19" @@ -348,6 +717,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -356,6 +726,63 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "oneshot" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" + +[[package]] +name = "ownedbytes" +version = "0.7.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.93" @@ -374,6 +801,46 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rayon" version = "1.10.0" @@ -423,6 +890,35 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rustversion" version = "1.0.19" @@ -445,7 +941,7 @@ dependencies = [ "unicode-segmentation", "unicode-width", "utf8parse", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -492,6 +988,42 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "sketches-ddsketch" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +dependencies = [ + "serde", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.98" @@ -503,6 +1035,207 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tantivy" +version = "0.23.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "bon", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "hyperloglogplus", + "itertools", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.6.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.3.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.7.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.22.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.3.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "futures-util", + "itertools", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.3.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.3.0" +source = "git+https://github.com/quickwit-oss/tantivy.git#58c0739953a9065afb4d677a8f569556216ef36a" +dependencies = [ + "serde", +] + +[[package]] +name = "tempfile" +version = "3.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +dependencies = [ + "cfg-if", + "fastrand", + "getrandom 0.3.1", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "unicode-ident" version = "1.0.17" @@ -521,12 +1254,43 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1" +dependencies = [ + "getrandom 0.3.1", + "serde", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -585,6 +1349,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" @@ -594,6 +1380,15 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" @@ -667,6 +1462,36 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zip" version = "0.6.6" @@ -678,3 +1503,31 @@ dependencies = [ "crossbeam-utils", "flate2", ] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3051792fbdc2e1e143244dc28c60f73d8470e93f3f9cbd0ead44da5ed802722" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.14+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb060d4926e4ac3a3ad15d864e99ceb5f343c6b34f5bd6d81ae6ed417311be5" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 9136d02..ce4eb76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2024" members = [ "cli", "query", + "search", ] default-members = [ diff --git a/cli/Cargo.toml b/cli/Cargo.toml index cfcd3e0..9016585 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -7,8 +7,8 @@ edition = "2024" [dependencies.fimfareader] path = ".." -[dependencies.fimfareader-query] -path = "../query" +[dependencies.fimfareader-search] +path = "../search" [dependencies.rustyline] default-features = false diff --git a/cli/src/main.rs b/cli/src/main.rs index 1a84bf7..2f9c35b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -5,10 +5,11 @@ use std::error::Error; use std::result::Result; use std::time::Instant; -use fimfareader::archive::Fetcher; -use fimfareader_query::parse; use rustyline::DefaultEditor; +use fimfareader::archive::Fetcher; +use fimfareader_search::Searcher; + fn main() -> Result<(), Box> { let argv = args().collect::>(); let mut editor = DefaultEditor::new()?; @@ -28,33 +29,41 @@ fn main() -> Result<(), Box> { println!("Finished loading in {finish:?}."); println!("The archive contains {count} stories."); + let searcher = Searcher::new(&fetcher); + while let Ok(line) = editor.readline(">>> ") { editor.add_history_entry(&line)?; - - let filter = match parse(&line) { - Ok(filter) => filter, - Err(error) => { - println!("{}", error); - continue; - } - }; - let start = Instant::now(); - let stories = fetcher.filter(&filter); + + let result = searcher + .search(&line) + .into_iter() + .filter(|(_sid, score)| *score > 10f32) + .map(|(sid, score)| (i32::try_from(sid).unwrap(), score)) + .filter_map(|(sid, score)| Some((fetcher.fetch(sid)?, score))) + .collect::>(); + let finish = (Instant::now() - start).as_millis(); - let count = stories.len(); + let count = result.len(); println!("Found {} stories in {} milliseconds!", count, finish); - if count > 32 { - continue; - } - - for story in stories.iter() { + for (story, score) in result { let key = &story.id; let title = &story.title; - println!("[{}] {}", key, title); + let tags = story + .tags + .iter() + .map(|tag| tag.name.to_string()) + .collect::>() + .join(", "); + + println!("{:02.02}% [{:>6}] {}", score, key, title); + println!("{}", tags); + println!("{}", story.short_description); + println!("{}", story.url); + println!(); } } diff --git a/search/Cargo.toml b/search/Cargo.toml new file mode 100644 index 0000000..8617ee6 --- /dev/null +++ b/search/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "fimfareader-search" +version = "0.1.0" +authors = ["Joakim Soderlund "] +edition = "2024" + +[dependencies.fimfareader] +path = ".." + +[dependencies.tantivy] +git = "https://github.com/quickwit-oss/tantivy.git" + +[dependencies.zip] +version = "=0.6.6" +features = ["deflate"] +default-features = false diff --git a/search/src/lib.rs b/search/src/lib.rs new file mode 100644 index 0000000..43ea408 --- /dev/null +++ b/search/src/lib.rs @@ -0,0 +1,160 @@ +//! Main module. + +use std::fs::create_dir_all; +use std::io::Cursor; +use std::io::Read; +use std::io::Seek; +use std::io::Write; +use std::io::stdout; +use std::path::Path; +use std::time::Instant; + +use tantivy::Index; +use tantivy::ReloadPolicy; +use tantivy::TantivyDocument; +use tantivy::collector::TopDocs; +use tantivy::query::QueryParser; +use tantivy::schema; +use tantivy::schema::Schema; +use tantivy::schema::Value; +use zip::read::ZipArchive; + +use fimfareader::archive::Fetcher; +use fimfareader::archive::Story; + +pub struct Searcher { + index: Index, +} + +impl Searcher { + pub fn new(fetcher: &Fetcher) -> Self + where + T: Read + Seek + Send, + { + Searcher { + index: Self::load_index(fetcher), + } + } + + fn schema() -> Schema { + let mut builder = Schema::builder(); + + builder.add_i64_field("sid", schema::INDEXED | schema::STORED); + builder.add_text_field("content", schema::TEXT); + + builder.build() + } + + fn load_index(fetcher: &Fetcher) -> Index + where + T: Read + Seek + Send, + { + let identity = fetcher.identity().unwrap(); + let path = Path::new("cache").join(identity); + + if path.exists() { + Index::open_in_dir(path).unwrap() + } else { + Self::make_index(&path, fetcher) + } + } + + fn make_index(path: &Path, fetcher: &Fetcher) -> Index + where + T: Read + Seek + Send, + { + let start = Instant::now(); + print!("\r\rIndexing archive...\r\r"); + create_dir_all(path).unwrap(); + + let schema = Self::schema(); + let index = Index::create_in_dir(path, schema).unwrap(); + let mut writer = index.writer(1_073_741_824).unwrap(); + let mut buffer = String::with_capacity(1_048_576); + + let schema = index.schema(); + let identifier = schema.get_field("sid").unwrap(); + let content = schema.get_field("content").unwrap(); + let story_count = fetcher.iter().count() as f64; + + for (i, story) in fetcher.iter().enumerate() { + let progress = (i * 100) as f64 / story_count; + print!("\r\rIndexing archive... {progress:.2}%\r\r"); + + let cursor = Cursor::new(fetcher.read(story).unwrap()); + let mut epub = ZipArchive::new(cursor).unwrap(); + let mut document = TantivyDocument::default(); + + document.add_i64(identifier, story.id.into()); + + for i in 0..epub.len() { + let mut file = epub.by_index(i).unwrap(); + + if !file.name().ends_with(".html") { + continue; + } + + file.read_to_string(&mut buffer).unwrap(); + document.add_text(content, &buffer); + buffer.clear(); + } + + writer.add_document(document).unwrap(); + } + + print!("\r\rCommitting archive index...\r\r"); + stdout().flush().unwrap(); + + writer.commit().unwrap(); + writer.wait_merging_threads().unwrap(); + + let finish = (Instant::now() - start).as_secs(); + println!("Index generated in {finish} seconds."); + + index + } + + pub fn search(&self, text: &str) -> Vec<(i64, f32)> { + let reader = self + .index + .reader_builder() + .reload_policy(ReloadPolicy::OnCommitWithDelay) + .try_into() + .unwrap(); + + let schema = self.index.schema(); + let identifier = schema.get_field("sid").unwrap(); + let content = schema.get_field("content").unwrap(); + + let parser = QueryParser::for_index(&self.index, vec![content]); + let query = parser.parse_query(text).unwrap(); + + let searcher = reader.searcher(); + let limit = TopDocs::with_limit(32); + let docs = searcher.search(&query, &limit).unwrap(); + + docs.into_iter() + .map(|(score, address)| { + let doc: TantivyDocument = searcher.doc(address).unwrap(); + + match doc.get_first(identifier).map(|v| v.as_i64()) { + Some(Some(value)) => (value, score), + _ => panic!("Invalid story key type!"), + } + }) + .collect() + } + + pub fn parse(&self, text: &str) -> impl Fn(&Story) -> bool + Sync { + let mut sids: Vec<_> = self + .search(text) + .into_iter() + .filter(|(_, score)| *score > 10f32) + .map(|(sid, _)| sid) + .collect(); + + sids.sort(); + + move |story| sids.binary_search(&story.id.into()).is_ok() + } +} diff --git a/src/archive/fetcher.rs b/src/archive/fetcher.rs index 06f4737..6bc38c4 100644 --- a/src/archive/fetcher.rs +++ b/src/archive/fetcher.rs @@ -104,10 +104,26 @@ impl Fetcher { Ok(buf) } + pub fn identity(&self) -> Result { + let Ok(mut archive) = self.archive.lock() else { + return Err(Error::archive("Could not acquire fetcher lock")); + }; + + let Ok(index) = archive.by_name("index.json") else { + return Err(Error::archive("Could not open archive index")); + }; + + Ok(format!("{}", index.crc32())) + } + pub fn iter(&self) -> impl Iterator { self.index.iter() } + pub fn par_iter(&self) -> impl ParallelIterator { + self.index.par_iter() + } + pub fn filter(&self, function: &F) -> Vec<&Story> where F: Sync + Fn(&Story) -> bool,