Add initial HPM (Hugs Per Million (words)) counter

This commit is contained in:
Joakim Soderlund 2022-02-09 22:46:06 +01:00
parent e5fa23021f
commit f87ae9410f
5 changed files with 307 additions and 67 deletions

249
Cargo.lock generated
View file

@ -25,9 +25,9 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.0.1" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
@ -69,20 +69,19 @@ dependencies = [
[[package]] [[package]]
name = "chrono-english" name = "chrono-english"
version = "0.1.6" version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0be5180df5f7c41fc2416bc038bc8d78d44db8136c415b94ccbc95f523dc38e9" checksum = "f73d909da7eb4a7d88c679c3f5a1bc09d965754e0adb2e7627426cef96a00d6f"
dependencies = [ dependencies = [
"chrono", "chrono",
"scanlex", "scanlex",
"time",
] ]
[[package]] [[package]]
name = "clipboard-win" name = "clipboard-win"
version = "4.2.2" version = "4.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3db8340083d28acb43451166543b98c838299b7e0863621be53a338adceea0ed" checksum = "2f3e1238132dc01f081e1cbb9dace14e5ef4c3a51ee244bd982275fb514605db"
dependencies = [ dependencies = [
"error-code", "error-code",
"str-buf", "str-buf",
@ -90,19 +89,32 @@ dependencies = [
] ]
[[package]] [[package]]
name = "crc32fast" name = "console"
version = "1.3.0" version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "738c290dfaea84fc1ca15ad9c168d083b05a714e1efddd8edaab678dc28d2836" checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"terminal_size",
"winapi",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.1" version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"crossbeam-utils", "crossbeam-utils",
@ -121,9 +133,9 @@ dependencies = [
[[package]] [[package]]
name = "crossbeam-epoch" name = "crossbeam-epoch"
version = "0.9.5" version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"crossbeam-utils", "crossbeam-utils",
@ -134,9 +146,9 @@ dependencies = [
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.5" version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"lazy_static", "lazy_static",
@ -148,6 +160,12 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "endian-type" name = "endian-type"
version = "0.1.2" version = "0.1.2"
@ -155,10 +173,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
[[package]] [[package]]
name = "error-code" name = "errno"
version = "2.3.0" version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5115567ac25674e0043e472be13d14e537f37ea8aa4bdc4aef0c89add1db1ff" checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
dependencies = [
"errno-dragonfly",
"libc",
"winapi",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "error-code"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21"
dependencies = [ dependencies = [
"libc", "libc",
"str-buf", "str-buf",
@ -166,12 +205,12 @@ dependencies = [
[[package]] [[package]]
name = "fd-lock" name = "fd-lock"
version = "3.0.1" version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfc110fe50727d46a428eed832df40affe9bf74d077cac1bf3f2718e823f14c5" checksum = "fcef756dea9cf3db5ce73759cf0467330427a786b47711b8d6c97620d718ceb9"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "rustix",
"windows-sys", "windows-sys",
] ]
@ -197,6 +236,18 @@ dependencies = [
"rustyline", "rustyline",
] ]
[[package]]
name = "fimfareader-hpm"
version = "0.1.0"
dependencies = [
"chrono",
"fimfareader",
"indicatif",
"rayon",
"regex",
"zip",
]
[[package]] [[package]]
name = "fimfareader-query" name = "fimfareader-query"
version = "0.1.0" version = "0.1.0"
@ -237,10 +288,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]] [[package]]
name = "itoa" name = "indicatif"
version = "0.4.8" version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b"
dependencies = [
"console",
"lazy_static",
"number_prefix",
"rayon",
"regex",
]
[[package]]
name = "io-lifetimes"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ef6787e7f0faedc040f95716bdd0e62bcfcf4ba93da053b62dea2691c13864"
dependencies = [
"winapi",
]
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
@ -263,9 +336,15 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.109" version = "0.2.117"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c"
[[package]]
name = "linux-raw-sys"
version = "0.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95f5690fef754d905294c56f7ac815836f2513af966aa47f2e07ac79be07827f"
[[package]] [[package]]
name = "log" name = "log"
@ -312,9 +391,9 @@ dependencies = [
[[package]] [[package]]
name = "nix" name = "nix"
version = "0.23.0" version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f305c2c2e4c39a82f7bf0bf65fb557f9070ce06781d4f2454295cc34b1c43188" checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"cc", "cc",
@ -355,28 +434,40 @@ dependencies = [
[[package]] [[package]]
name = "num_cpus" name = "num_cpus"
version = "1.13.0" version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
dependencies = [ dependencies = [
"hermit-abi", "hermit-abi",
"libc", "libc",
] ]
[[package]] [[package]]
name = "proc-macro2" name = "number_prefix"
version = "1.0.32" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "proc-macro2"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
dependencies = [ dependencies = [
"unicode-xid", "unicode-xid",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.10" version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
@ -434,10 +525,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]] [[package]]
name = "rustyline" name = "rustix"
version = "9.1.0" version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec400b53af4f6551ad23214ba08af344125785a0508228af83a84d498a5ce33" checksum = "7cee647393af53c750e15dcbf7781cdd2e550b246bde76e46c326e7ea3c73773"
dependencies = [
"bitflags",
"errno",
"io-lifetimes",
"libc",
"linux-raw-sys",
"winapi",
]
[[package]]
name = "rustyline"
version = "9.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db7826789c0e25614b03e5a54a0717a86f9ff6e6e5247f92b369472869320039"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"cfg-if", "cfg-if",
@ -458,9 +563,9 @@ dependencies = [
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.6" version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]] [[package]]
name = "scanlex" name = "scanlex"
@ -476,18 +581,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.130" version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.130" version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -496,9 +601,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.72" version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527" checksum = "d23c1ba4cf0efd44be32017709280b32d1cea5c3f1275c3b6d9e8bc54f758085"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -507,9 +612,9 @@ dependencies = [
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.7.0" version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]] [[package]]
name = "static_assertions" name = "static_assertions"
@ -525,15 +630,25 @@ checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.82" version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"unicode-xid", "unicode-xid",
] ]
[[package]]
name = "terminal_size"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.30" version = "1.0.30"
@ -567,9 +682,9 @@ dependencies = [
[[package]] [[package]]
name = "unicode-segmentation" name = "unicode-segmentation"
version = "1.8.0" version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
@ -591,9 +706,9 @@ checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.3" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]] [[package]]
name = "wasi" name = "wasi"
@ -625,9 +740,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82ca39602d5cbfa692c4b67e3bcbb2751477355141c1ed434c94da4186836ff6" checksum = "030b7ff91626e57a05ca64a07c481973cbb2db774e4852c9c7ca342408c6a99a"
dependencies = [ dependencies = [
"windows_aarch64_msvc", "windows_aarch64_msvc",
"windows_i686_gnu", "windows_i686_gnu",
@ -638,33 +753,33 @@ dependencies = [
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52695a41e536859d5308cc613b4a022261a274390b25bd29dfff4bf08505f3c2" checksum = "29277a4435d642f775f63c7d1faeb927adba532886ce0287bd985bffb16b6bca"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f54725ac23affef038fecb177de6c9bf065787c2f432f79e3c373da92f3e1d8a" checksum = "1145e1989da93956c68d1864f32fb97c8f561a8f89a5125f6a2b7ea75524e4b8"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d5158a43cc43623c0729d1ad6647e62fa384a3d135fd15108d37c683461f64" checksum = "d4a09e3a0d4753b73019db171c1339cd4362c8c44baf1bcea336235e955954a6"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc31f409f565611535130cfe7ee8e6655d3fa99c1c61013981e491921b5ce954" checksum = "8ca64fcb0220d58db4c119e050e7af03c69e6f4f415ef69ec1773d9aab422d5a"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.28.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f2b8c7cbd3bfdddd9ab98769f9746a7fad1bca236554cd032b78d768bc0e89f" checksum = "08cabc9f0066848fef4bc6a1c1668e6efce38b661d2aeec75d18d8617eebb5f1"
[[package]] [[package]]
name = "zip" name = "zip"

View file

@ -7,6 +7,7 @@ edition = "2021"
[workspace] [workspace]
members = [ members = [
"cli", "cli",
"hpm",
"query", "query",
] ]

26
hpm/Cargo.toml Normal file
View file

@ -0,0 +1,26 @@
[package]
name = "fimfareader-hpm"
version = "0.1.0"
authors = ["Joakim Soderlund <joakim.soderlund@gmail.com>"]
edition = "2021"
[dependencies.fimfareader]
path = ".."
[dependencies.chrono]
version = "*"
[dependencies.indicatif]
version = "*"
features = ["rayon"]
[dependencies.rayon]
version = "*"
[dependencies.regex]
version = "*"
[dependencies.zip]
version = "*"
features = ["deflate"]
default-features = false

94
hpm/src/main.rs Normal file
View file

@ -0,0 +1,94 @@
use std::collections::HashMap;
use std::env::args;
use std::io::Cursor;
use std::io::Read;
use chrono::prelude::*;
use fimfareader::prelude::*;
use rayon::prelude::*;
use indicatif::ParallelProgressIterator;
use regex::Regex;
use regex::RegexBuilder;
use zip::ZipArchive;
// TODO: More varied and less literal matches.
const PATTERN: &str = "[^a-z]hug(s|ged|ging)?[^a-z]";
struct Stat {
date: DateTime<Utc>,
count: u64,
words: u64,
}
fn count(regex: &Regex, story: &Story, data: Vec<u8>) -> Vec<Stat> {
let mut archive = ZipArchive::new(Cursor::new(data)).unwrap();
// TODO: Statistics per chapter.
let date = match story.date_published {
Some(published) => published,
None => return Vec::new(),
};
let mut matches = 0;
for i in 0..archive.len() {
let mut file = archive.by_index(i).unwrap();
let mut data = String::with_capacity(file.size() as usize);
file.read_to_string(&mut data).unwrap();
matches += regex.find_iter(&data).count();
}
let count = matches as u64;
let words = story.num_words as u64;
vec![Stat { date, count, words }]
}
fn main() {
let argv = args().collect::<Vec<String>>();
if argv.len() != 2 {
eprintln!("Usage: {} <ARCHIVE>", argv[0]);
std::process::exit(1);
}
let fetcher = Fetcher::new(&argv[1]).unwrap();
let pattern = RegexBuilder::new(PATTERN)
.case_insensitive(true)
.build()
.unwrap();
let stats = fetcher
.index()
.par_iter()
.progress_count(fetcher.index().len() as u64)
.map(|story| (story, fetcher.read(story).unwrap()))
.flat_map_iter(|(story, data)| count(&pattern, story, data))
.collect::<Vec<Stat>>();
// TODO: Finer granularity for better graphing.
let mut yearly = HashMap::<i32, (u64, u64)>::new();
for stat in stats {
let year = stat.date.year();
let value = yearly.remove(&year).unwrap_or_else(|| (0, 0));
yearly.insert(year, (value.0 + stat.count, value.1 + stat.words));
}
let mut yearly = yearly.into_iter().collect::<Vec<_>>();
yearly.sort();
for (year, (count, words)) in yearly.into_iter() {
let modifier = 1_000_000f64 / words as f64;
let hpm = modifier * count as f64;
println!("{year}: {hpm:.04}");
}
}

View file

@ -94,6 +94,10 @@ impl<T: Read + Seek> Fetcher<T> {
Ok(buf) Ok(buf)
} }
pub fn index(&self) -> &Vec<Story> {
&self.index
}
pub fn iter(&self) -> impl Iterator<Item = &Story> { pub fn iter(&self) -> impl Iterator<Item = &Story> {
self.index.iter() self.index.iter()
} }