From 83e5fdb0a85b10065ee628d2fae58069701a8739 Mon Sep 17 00:00:00 2001 From: Joakim Soderlund Date: Sun, 8 Mar 2020 00:54:56 +0100 Subject: [PATCH] Use Tantivy to query story content --- Cargo.lock | 1003 +++++++++++++++++++++++++++++++++++----- cli/Cargo.toml | 11 + cli/src/main.rs | 127 ++++- src/archive/fetcher.rs | 16 + 4 files changed, 1019 insertions(+), 138 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc80605..b5b4222 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -17,6 +28,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "arrayvec" version = "0.5.2" @@ -24,10 +44,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] -name = "autocfg" -version = "1.0.1" +name = "async-trait" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" [[package]] name = "bitflags" @@ -35,6 +72,21 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bumpalo" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + [[package]] name = "byteorder" version = "1.4.3" @@ -43,9 +95,15 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" -version = "1.0.72" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "census" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5927edd8345aef08578bcbb4aea7314f340d80c7f4931f99fbeb40b99d8f5060" [[package]] name = "cfg-if" @@ -63,26 +121,25 @@ dependencies = [ "num-integer", "num-traits", "serde", - "time", + "time 0.1.44", "winapi", ] [[package]] name = "chrono-english" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0be5180df5f7c41fc2416bc038bc8d78d44db8136c415b94ccbc95f523dc38e9" +checksum = "f73d909da7eb4a7d88c679c3f5a1bc09d965754e0adb2e7627426cef96a00d6f" dependencies = [ "chrono", "scanlex", - "time", ] [[package]] name = "clipboard-win" -version = "4.2.2" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db8340083d28acb43451166543b98c838299b7e0863621be53a338adceea0ed" +checksum = "2f3e1238132dc01f081e1cbb9dace14e5ef4c3a51ee244bd982275fb514605db" dependencies = [ "error-code", "str-buf", @@ -90,19 +147,28 @@ dependencies = [ ] [[package]] -name = "crc32fast" -version = "1.3.0" +name = "combine" +version = "4.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "738c290dfaea84fc1ca15ad9c168d083b05a714e1efddd8edaab678dc28d2836" +checksum = "2a604e93b79d1808327a6fca85a6f2d69de66461e7620f5a4cbf5fb4d1d7c948" +dependencies = [ + "memchr", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" -version = "0.5.1" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" dependencies = [ "cfg-if", "crossbeam-utils", @@ -121,10 +187,11 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ + "autocfg", "cfg-if", "crossbeam-utils", "lazy_static", @@ -134,14 +201,42 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" dependencies = [ "cfg-if", "lazy_static", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "ctor" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "diff" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" + +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "either" version = "1.6.1" @@ -155,23 +250,80 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] -name = "error-code" -version = "2.3.0" +name = "errno" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5115567ac25674e0043e472be13d14e537f37ea8aa4bdc4aef0c89add1db1ff" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "error-code" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" dependencies = [ "libc", "str-buf", ] [[package]] -name = "fd-lock" -version = "3.0.1" +name = "fail" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc110fe50727d46a428eed832df40affe9bf74d077cac1bf3f2718e823f14c5" +checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" +dependencies = [ + "lazy_static", + "log", + "rand", +] + +[[package]] +name = "fastdivide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" + +[[package]] +name = "fastfield_codecs" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dff2ee906bb242438742b5ecac909c0719cbd9db546f6c3d9ac86bd93f5b07e" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "fd-lock" +version = "3.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e245f4c8ec30c6415c56cb132c07e69e74f1942f6b4a4061da748b49f486ca" dependencies = [ "cfg-if", - "libc", + "rustix", "windows-sys", ] @@ -194,7 +346,10 @@ version = "0.1.0" dependencies = [ "fimfareader", "fimfareader-query", + "rayon", "rustyline", + "tantivy", + "zip", ] [[package]] @@ -211,14 +366,61 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "generator" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1d9279ca822891c1a4dae06d185612cf8fc6acfe5dff37781b41297811b12ee" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "winapi", +] + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" dependencies = [ "cfg-if", - "crc32fast", "libc", - "miniz_oxide", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", ] [[package]] @@ -237,10 +439,52 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "itoa" -version = "0.4.8" +name = "htmlescape" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "io-lifetimes" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9448015e586b611e5d322f6703812bbca2f1e709d5773ecd38ddb4e3bb649504" + +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "js-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397" +dependencies = [ + "wasm-bindgen", +] [[package]] name = "lazy_static" @@ -248,6 +492,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "lexical-core" version = "0.7.6" @@ -263,24 +513,86 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.109" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "linux-raw-sys" +version = "0.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" [[package]] name = "log" -version = "0.4.14" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] -name = "memchr" -version = "2.4.1" +name = "loom" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lru" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8015d95cb7b2ddd3c0d32ca38283ceb1eea09b4713ee380bceb942d85a244228" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "lz4_flex" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74141c8af4bb8136dafb5705826bdd9dce823021db897c1129191804140ddf84" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "measure_time" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56220900f1a0923789ecd6bf25fbae8af3b2f1ff3e9e297fc9b6b8674dd4d852" +dependencies = [ + "instant", + "log", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memmap2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5172b50c23043ff43dd53e51392f36519d9b35a8f3a410d30ece5d1aedd58ae" +dependencies = [ + "libc", +] [[package]] name = "memoffset" @@ -293,12 +605,20 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.4.4" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" dependencies = [ "adler", - "autocfg", +] + +[[package]] +name = "murmurhash32" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" +dependencies = [ + "byteorder", ] [[package]] @@ -312,9 +632,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.23.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f305c2c2e4c39a82f7bf0bf65fb557f9070ce06781d4f2454295cc34b1c43188" +checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" dependencies = [ "bitflags", "cc", @@ -336,9 +656,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -346,37 +666,103 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", "libc", ] [[package]] -name = "proc-macro2" -version = "1.0.32" +name = "num_threads" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" dependencies = [ - "unicode-xid", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + +[[package]] +name = "oneshot" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3b8d98df258e762e901eb4349d66d5a5f5a7a994db8df82ff596011773be535" +dependencies = [ + "loom", +] + +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + +[[package]] +name = "ownedbytes" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2981bd7cfb2a70e6c50083c60561275a269fc7458f151c53b126ec1b15cc040" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "pretty_assertions" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c89f989ac94207d048d92db058e4f6ec7342b0971fc58d1271ca148b799b3563" +dependencies = [ + "ansi_term", + "ctor", + "diff", + "output_vt100", +] + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", ] [[package]] name = "quote" -version = "1.0.10" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] @@ -392,10 +778,40 @@ dependencies = [ ] [[package]] -name = "rayon" -version = "1.5.1" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" dependencies = [ "autocfg", "crossbeam-deque", @@ -405,39 +821,101 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "lazy_static", "num_cpus", ] [[package]] -name = "regex" -version = "1.5.4" +name = "redox_syscall" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.6.26", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.26", ] [[package]] name = "regex-syntax" -version = "0.6.25" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "8e931c58b93d86f080c734bfd2bce7dd0079ae2331235818133c8be7f422e20e" + +[[package]] +name = "regex-syntax" +version = "0.6.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustix" +version = "0.34.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2079c267b8394eb529872c3cf92e181c378b41fea36e68130357b52493701d2e" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "winapi", +] + +[[package]] +name = "rustversion" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" [[package]] name = "rustyline" -version = "9.1.0" +version = "9.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec400b53af4f6551ad23214ba08af344125785a0508228af83a84d498a5ce33" +checksum = "db7826789c0e25614b03e5a54a0717a86f9ff6e6e5247f92b369472869320039" dependencies = [ "bitflags", "cfg-if", @@ -458,9 +936,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.6" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" [[package]] name = "scanlex" @@ -468,6 +946,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "088c5d71572124929ea7549a8ce98e1a6fd33d0a38367b09027b382e67c033db" +[[package]] +name = "scoped-tls" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" + [[package]] name = "scopeguard" version = "1.1.0" @@ -476,18 +960,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", @@ -496,9 +980,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.72" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" dependencies = [ "itoa", "ryu", @@ -506,10 +990,25 @@ dependencies = [ ] [[package]] -name = "smallvec" -version = "1.7.0" +name = "sharded-slab" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "static_assertions" @@ -519,41 +1018,153 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "str-buf" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a" +checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" [[package]] name = "syn" -version = "1.0.82" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" +checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", +] + +[[package]] +name = "tantivy" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d3da9d47a874779d5a97c5f01c2d0a85fc96e38ed6d51ad21a3cf6d6eb8c47" +dependencies = [ + "async-trait", + "base64", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fail", + "fastdivide", + "fastfield_codecs", + "fnv", + "fs2", + "htmlescape", + "itertools", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "murmurhash32", + "num_cpus", + "once_cell", + "oneshot", + "ownedbytes", + "pretty_assertions", + "rayon", + "regex", + "rust-stemmers", + "serde", + "serde_json", + "smallvec", + "stable_deref_trait", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tempfile", + "thiserror", + "time 0.3.9", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f95c862d26a32e1fdb161ab139c5a3bba221f5fac512af40034e13e25f3131" + +[[package]] +name = "tantivy-common" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec19155b3ed963ae1653bc4995ab8175281f68400c39081205ae25b53fd9750" +dependencies = [ + "byteorder", + "ownedbytes", +] + +[[package]] +name = "tantivy-fst" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb20cdc0d83e9184560bdde9cd60142dbb4af2e0f770e88fce45770495224205" +dependencies = [ + "byteorder", + "regex-syntax 0.4.2", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6bbdce99f2b8dcbe24ee25acffb36a2b45b31344531374df1008f6a64bb583" +dependencies = [ + "combine", + "once_cell", + "regex", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "thiserror" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + [[package]] name = "time" version = "0.1.44" @@ -566,10 +1177,90 @@ dependencies = [ ] [[package]] -name = "unicode-segmentation" -version = "1.8.0" +name = "time" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" +checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", +] + +[[package]] +name = "tracing" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f" +dependencies = [ + "lazy_static", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bc28f93baff38037f64e6f43d34cfa1605f27a49c34e8a04c5e78b0babf2596" +dependencies = [ + "ansi_term", + "lazy_static", + "matchers", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" [[package]] name = "unicode-width" @@ -578,10 +1269,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] -name = "unicode-xid" -version = "0.2.2" +name = "utf8-ranges" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" [[package]] name = "utf8parse" @@ -590,10 +1281,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" [[package]] -name = "version_check" -version = "0.9.3" +name = "uuid" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "c6d5d669b51467dcf7b2f1a796ce0f955f05f01cafda6c19d6e95f730df29238" +dependencies = [ + "getrandom", + "serde", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" @@ -601,6 +1308,70 @@ version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +[[package]] +name = "wasm-bindgen" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744" + +[[package]] +name = "web-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -625,9 +1396,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82ca39602d5cbfa692c4b67e3bcbb2751477355141c1ed434c94da4186836ff6" +checksum = "030b7ff91626e57a05ca64a07c481973cbb2db774e4852c9c7ca342408c6a99a" dependencies = [ "windows_aarch64_msvc", "windows_i686_gnu", @@ -638,42 +1409,42 @@ dependencies = [ [[package]] name = "windows_aarch64_msvc" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52695a41e536859d5308cc613b4a022261a274390b25bd29dfff4bf08505f3c2" +checksum = "29277a4435d642f775f63c7d1faeb927adba532886ce0287bd985bffb16b6bca" [[package]] name = "windows_i686_gnu" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f54725ac23affef038fecb177de6c9bf065787c2f432f79e3c373da92f3e1d8a" +checksum = "1145e1989da93956c68d1864f32fb97c8f561a8f89a5125f6a2b7ea75524e4b8" [[package]] name = "windows_i686_msvc" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d5158a43cc43623c0729d1ad6647e62fa384a3d135fd15108d37c683461f64" +checksum = "d4a09e3a0d4753b73019db171c1339cd4362c8c44baf1bcea336235e955954a6" [[package]] name = "windows_x86_64_gnu" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc31f409f565611535130cfe7ee8e6655d3fa99c1c61013981e491921b5ce954" +checksum = "8ca64fcb0220d58db4c119e050e7af03c69e6f4f415ef69ec1773d9aab422d5a" [[package]] name = "windows_x86_64_msvc" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f2b8c7cbd3bfdddd9ab98769f9746a7fad1bca236554cd032b78d768bc0e89f" +checksum = "08cabc9f0066848fef4bc6a1c1668e6efce38b661d2aeec75d18d8617eebb5f1" [[package]] name = "zip" -version = "0.5.13" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" +checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d" dependencies = [ "byteorder", "crc32fast", + "crossbeam-utils", "flate2", - "thiserror", ] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index d60f343..bb725a9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -13,3 +13,14 @@ path = "../query" [dependencies.rustyline] default-features = false version = "*" + +[dependencies.rayon] +version = "*" + +[dependencies.tantivy] +version = "*" + +[dependencies.zip] +version = "*" +features = ["deflate"] +default-features = false diff --git a/cli/src/main.rs b/cli/src/main.rs index b632ec8..6d49c2b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,12 +1,30 @@ //! Main module. use std::env::args; +use std::io::BufReader; +use std::io::Cursor; +use std::io::Read; +use std::io::Seek; +use std::path::Path; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; use std::time::Instant; +use rayon::iter::ParallelIterator; use rustyline::Editor; +use zip::read::ZipArchive; + +use tantivy::collector::TopDocs; +use tantivy::directory::MmapDirectory; +use tantivy::query::QueryParser; +use tantivy::schema; +use tantivy::schema::Document; +use tantivy::schema::Schema; +use tantivy::schema::Value; +use tantivy::Index; +use tantivy::ReloadPolicy; use fimfareader::prelude::*; -use fimfareader_query::parse; fn exit(error: Error) -> ! { eprintln!("{}", error); @@ -14,6 +32,68 @@ fn exit(error: Error) -> ! { std::process::exit(1) } +fn load_index(schema: Schema, fetcher: &Fetcher) -> Index +where + T: Read + Seek + Send, +{ + let identity = fetcher.identity().unwrap(); + let directory = Path::new("search").join(identity); + + if directory.exists() { + let store = MmapDirectory::open(&directory).unwrap(); + return Index::open_or_create(store, schema).unwrap(); + } + + std::fs::create_dir_all(&directory).unwrap(); + let store = MmapDirectory::open(&directory).unwrap(); + let index = Index::create(store, schema).unwrap(); + + let schema = index.schema(); + let sid = schema.get_field("sid").unwrap(); + let content = schema.get_field("content").unwrap(); + let mut writer = index.writer(250_000_000).unwrap(); + + let counter = AtomicUsize::new(0); + let total = fetcher.iter().count(); + let start = Instant::now(); + + fetcher.par_iter().for_each(|story| { + let mut doc = Document::default(); + doc.add_i64(sid, story.id); + + let data = fetcher.read(story).unwrap(); + let count = counter.fetch_add(1, Ordering::SeqCst); + let mut arch = ZipArchive::new(Cursor::new(data)).unwrap(); + let mut text = String::with_capacity(1_048_576); + + if (count % 16) == 0 { + let percentage = (count as f64 / total as f64) * 100f64; + print!("\rIndexing archive... {:.2}%", percentage); + } + + for i in 0..arch.len() { + let file = arch.by_index(i).unwrap(); + + if !file.name().ends_with(".html") { + continue; + } + + BufReader::new(file).read_to_string(&mut text).unwrap(); + doc.add_text(content, &text); + text.clear(); + } + + writer.add_document(doc); + }); + + writer.commit().unwrap(); + + let finish = (Instant::now() - start).as_secs(); + println!("\rIndex generated in {} seconds.", finish); + + index +} + fn main() { let argv = args().collect::>(); let mut editor = Editor::<()>::new(); @@ -34,33 +114,36 @@ fn main() { println!("Finished loading in {} milliseconds.", finish); println!("The archive contains {} stories.", count); + let mut builder = Schema::builder(); + let sid = builder.add_i64_field("sid", schema::INDEXED | schema::STORED); + let content = builder.add_text_field("content", schema::TEXT); + let index = load_index(builder.build(), &fetcher); + + let reader = index + .reader_builder() + .reload_policy(ReloadPolicy::OnCommit) + .try_into() + .unwrap(); + + let searcher = reader.searcher(); + let parser = QueryParser::for_index(&index, vec![content]); + while let Ok(line) = editor.readline(">>> ") { editor.add_history_entry(&line); - let filter = match parse(&line) { - Ok(filter) => filter, - Err(error) => { - println!("{}", error); - continue; - } - }; + let limit = TopDocs::with_limit(16); + let query = parser.parse_query(&line).unwrap(); + let docs = searcher.search(&query, &limit).unwrap(); - let start = Instant::now(); - let stories = fetcher.filter(&filter); - let finish = (Instant::now() - start).as_millis(); - let count = stories.len(); + for (score, address) in docs { + let doc = searcher.doc(address).unwrap(); - println!("Found {} stories in {} milliseconds!", count, finish); + let story = match doc.get_first(sid).unwrap() { + Value::I64(value) => fetcher.fetch(*value).unwrap(), + _ => panic!("Invalid story key type!"), + }; - if count > 32 { - continue; - } - - for story in stories.iter() { - let key = &story.id; - let title = &story.title; - - println!("[{}] {}", key, title); + println!("{:02.0}% [{:06}] {}", score, story.id, story.title); } } } diff --git a/src/archive/fetcher.rs b/src/archive/fetcher.rs index fc014c5..dad670b 100644 --- a/src/archive/fetcher.rs +++ b/src/archive/fetcher.rs @@ -94,10 +94,26 @@ impl Fetcher { Ok(buf) } + pub fn identity(&self) -> Result { + let mut archive = self.archive.lock().map_err(|e| match e { + _ => Error::archive("Could not acquire fetcher lock"), + })?; + + let index = archive.by_name("index.json").map_err(|e| match e { + _ => Error::archive("Could not open archive index"), + })?; + + Ok(format!("{}", index.crc32())) + } + pub fn iter(&self) -> impl Iterator { self.index.iter() } + pub fn par_iter(&self) -> impl ParallelIterator { + self.index.par_iter() + } + pub fn filter(&self, function: &F) -> Vec<&Story> where F: Sync + Fn(&Story) -> bool,