From 501a50cb5566df1cc38c5f0c62f8e8d453e7a9ee Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 1 Jan 2024 01:10:19 +0100 Subject: [PATCH 01/35] Added example slightly documented config --- res/ExampleConfig.edn | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 res/ExampleConfig.edn diff --git a/res/ExampleConfig.edn b/res/ExampleConfig.edn new file mode 100644 index 0000000..f379940 --- /dev/null +++ b/res/ExampleConfig.edn @@ -0,0 +1,33 @@ +;; :board-defaults is default config for every board and can be +;; overriden for every board +{:port 6969 + :default-board "/mlp/" ;Board to be used when no board=x param given + :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" + ;; /$board/catalog.json will be appended to this link + :target "https://api.4chan.org" + :boards-defaults {:refresh-rate 300 + :starting-page 7 + :default-chod 94 + ;; Commented parts bellow are still unimplemented + ;; Only download catalog when someone requests feed and cache is old + ;; Saves request to 4chan, usefull for borads that are checked rarely + ;; :lazy-load false + ;; Whether to allow regex search thru the threads (&qr= param) + ;; :regex-enable true + ;; Wheter to create cache by downloading whole catalog or every required + ;; one by one + ;; :request-type [:catalog] :pages + ;; If you want to do some preprocessing beforehand, you can override + ;; Target URL for the board, but the response must be samechan API would return + ;; :target-override + } + ;; List of all boards that are enabled for feed generation + ;; Yes they must be all listed manualy for now + :boards-enabled {"/mlp/" {} ;; Empty override map means that defaults are used + "/g/" {} + "/po/" {} + "/p/" {:starting-page 8 + :refresh-rate 1800} ;30 min + } + ;; When user requests board that is not enabled, this message is returned + } From 5551be7012504a23d655370cd30dea0da1c1a0d7 Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 1 Jan 2024 14:58:05 +0100 Subject: [PATCH 02/35] Prepare for Config parsing and config extension --- res/ExampleConfig.edn | 1 + src/rss_thread_watch/core.clj | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/res/ExampleConfig.edn b/res/ExampleConfig.edn index f379940..f94c566 100644 --- a/res/ExampleConfig.edn +++ b/res/ExampleConfig.edn @@ -3,6 +3,7 @@ {:port 6969 :default-board "/mlp/" ;Board to be used when no board=x param given :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" + :enable-board-listing true ;Whether to show list of enabled boards in /boards ;; /$board/catalog.json will be appended to this link :target "https://api.4chan.org" :boards-defaults {:refresh-rate 300 diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index a19555f..1d42065 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -13,20 +13,26 @@ ;; along with this program. If not, see . (ns rss-thread-watch.core - (:require [ring.adapter.jetty :as jetty] + (:require [clojure.java.io :As io] + [ring.adapter.jetty :as jetty] [ring.middleware.params :as rp] [rss-thread-watch.watcher :as watcher] [rss-thread-watch.feed-generator :as feed]) (:gen-class)) ;; Internal default config -(def CONFIG +(def CONFIG-DEFAULT "Internal default config" - {:target "https://api.4chan.org/mlp/catalog.json" ;Where to download catalog from - :starting-page 7 ;only monitor threads from this from this page and up - :refresh-delay (* 60 5) ;Redownload catalog every 5 mins - :port 6969 ;Listen on 6969 - }) + {:port 6969 + :default-board "/mlp/" + :enable-board-listing true + :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" + :target "https://api.4chan.org" + :boards-defaults {:refresh-rate 300 + :starting-page 7 + :default-chod 94} + :boards-enabled {"/mlp/" {} + "/g/" {}}}) (defn set-interval "Calls function every ms" @@ -36,14 +42,14 @@ (println "Recached") (catch Exception e (binding [*out* *err*] - (println "Error while updating cache: " e ", retrying in 5 minutes")))) + (println "Error while updating cache: " e ", retrying in " (/ ms 1000 60) " minutes")))) (Thread/sleep ms))))) (defn -main "Entry point, starts webserver" [& args] - (println "Starting on port: " (:port CONFIG) - "\nGonna recache every: " (:refresh-delay CONFIG) "s") + ;; Parse and validate config + ;; Think of a way to start repeated download for every catalog efficiently (set-interval (fn [] (println "Starting cache update") (watcher/update-thread-cache! (:target CONFIG) (:starting-page CONFIG))) From 43a9781893256d5bedfc0ffc0fa4b9edd00203f6 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 01:45:41 +0100 Subject: [PATCH 03/35] Refactor generic functions to separate utils namespace, finaly bump version --- project.clj | 2 +- src/rss_thread_watch/feed_generator.clj | 11 +++---- src/rss_thread_watch/utils.clj | 41 +++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 src/rss_thread_watch/utils.clj diff --git a/project.clj b/project.clj index 73ebdc5..7a7f6b9 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject rss-thread-watch "0.1.0-SNAPSHOT" +(defproject rss-thread-watch "0.3.0-SNAPSHOT" :description "RSS based thread watcher" :url "http://example.com/FIXME" :license {:name "AGPL-3.0-only" diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index 2ec8388..c5c4c63 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -18,15 +18,12 @@ [ring.util.response :as response] [clj-rss.core :as rss] [clojure.string :as s] - [rss-thread-watch.watcher :as watcher]) + [rss-thread-watch.watcher :as watcher] + [rss-thread-watch.utils :as ut]) (:gen-class)) -(defn indices - ;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure - "Returns indexes of elements passing predicate" - [pred coll] - (keep-indexed #(when (pred %2) %1) coll)) + (defn new-guid-always "Generates always unique GUID for Feed item. @@ -56,7 +53,7 @@ (let [time-of-generation @watcher/time-of-cache guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation)) update-only-guid) - cache-start-index (first (indices (fn [x] (>= (:chod x) chod-treshold)) + cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold)) cache)) ;; So we don't have to search thru everything we have cached needed-cache-part (subvec cache cache-start-index) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj new file mode 100644 index 0000000..492677f --- /dev/null +++ b/src/rss_thread_watch/utils.clj @@ -0,0 +1,41 @@ +(ns rss-thread-watch.utils + "Util functions" + (:gen-class)) +;; ===== Generic functions ==== + +(defn indices + ;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure + "Returns indexes of elements passing predicate" + [pred coll] + (keep-indexed #(when (pred %2) %1) coll)) + +;; ===== Macros ===== +(defmacro nil?-else + "Return x unless it's nil, the return y" + [x y] + `(let [result# ~x] + (if (nil? result#) + ~y + result#))) + +(defmacro when-else + "Evaluates tst, if it's true returns it's result. + If it's not, return else" + [tst else] + `(let [res# ~tst] + (if res# + res# + ~else))) + +(defmacro ret= + "compares two values using [=]. If the result is true + returns the value, else the result of [=]. + + Usefull with if-else" + [x y] + `(let [x# ~x + y# ~y + result# ~(= x y)] + (if result# + ~x + result#))) From 017c18112c3c3edbc7f67d4aa644dae68cf0dc9e Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 11:13:25 +0100 Subject: [PATCH 04/35] SYNC commit --- src/rss_thread_watch/core.clj | 36 ++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 1d42065..f564bb4 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -13,7 +13,8 @@ ;; along with this program. If not, see . (ns rss-thread-watch.core - (:require [clojure.java.io :As io] + (:require [clojure.java.io :as io] + [clojure.edn :as edn] [ring.adapter.jetty :as jetty] [ring.middleware.params :as rp] [rss-thread-watch.watcher :as watcher] @@ -45,16 +46,41 @@ (println "Error while updating cache: " e ", retrying in " (/ ms 1000 60) " minutes")))) (Thread/sleep ms))))) +(defn load-config + "Attempts to load config from file [f]. + Returns loaded config map or nil if failed" + [f] + (let [fl (io/as-file f)] + (when (.exists fl) + (with-open [r (io/reader fl)] + (edn/read (java.io.PushbackReader. r)))))) + +(defn get-some-config + "Attempts to get config somehow, + first from command line argument + then from ./config.edn file + lastly uses default internal" + ;; args do not include path to executable so first arg + ;; should be config file + [cmd-args]) + +;; Todo: Add option to write default config to stdout +;; Todo: Discover config file if not arguments else +;; ./config.edn if not found spit error about +;; using default (defn -main "Entry point, starts webserver" [& args] ;; Parse and validate config ;; Think of a way to start repeated download for every catalog efficiently + (let [config (get-some-config args)]) + (println args) + (System/exit 0) (set-interval (fn [] (println "Starting cache update") - (watcher/update-thread-cache! (:target CONFIG) (:starting-page CONFIG))) - (* 1000 (:refresh-delay CONFIG))) - (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG) + (watcher/update-thread-cache! (:target config) (:starting-page config))) + (* 1000 (:refresh-delay config))) + (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) :join? true})) ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started @@ -62,7 +88,7 @@ "Development entry point" [] (jetty/run-jetty (rp/wrap-params #'feed/http-handler) - {:port (:port CONFIG) + {:port (:port CONFIG-DEFAULT) ;; Dont block REPL thread :join? false})) ;; (repl-main) From 9bb30b9863616010882cdf3664930f1e6b58fbb5 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 14:14:47 +0100 Subject: [PATCH 05/35] Experimental when-else2 macro added, needs testing when I get online It seems to be working but I forgot everything about macros already lol --- src/rss_thread_watch/utils.clj | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 492677f..fa95d4f 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -27,6 +27,17 @@ res# ~else))) +;; Todo: Test this properly and if working, replace when-else with this +;; Todo: find a way to download clojure documentation for offline use +(defmacro when-else2 + "Evaluates [tst], if it's truthy value returns that value. + If it's not, execute everything in [else] and return last expr." + [tst & else] + `(let [res# ~tst] + (if res# + res# + (do ~@else)))) + (defmacro ret= "compares two values using [=]. If the result is true returns the value, else the result of [=]. From e230e33a55b3e63c5d84c9afc9d26914577ec480 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 14:16:01 +0100 Subject: [PATCH 06/35] Add config loading from custom file, default file and internal config --- src/rss_thread_watch/core.clj | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index f564bb4..9b25b22 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -18,7 +18,8 @@ [ring.adapter.jetty :as jetty] [ring.middleware.params :as rp] [rss-thread-watch.watcher :as watcher] - [rss-thread-watch.feed-generator :as feed]) + [rss-thread-watch.feed-generator :as feed] + [rss-thread-watch.utils :as u]) (:gen-class)) ;; Internal default config @@ -62,17 +63,18 @@ lastly uses default internal" ;; args do not include path to executable so first arg ;; should be config file - [cmd-args]) + [cmd-args] + (let [file-to-try (u/nil?-else (first cmd-args) + "./config.edn")] + (u/when-else2 (load-config file-to-try) + (println "WARN: Using default internal config because suggessted " file-to-try " not found.") + CONFIG-DEFAULT))) ;; Todo: Add option to write default config to stdout -;; Todo: Discover config file if not arguments else -;; ./config.edn if not found spit error about -;; using default (defn -main "Entry point, starts webserver" [& args] - ;; Parse and validate config - ;; Think of a way to start repeated download for every catalog efficiently + ;; Todo: Think of a way to start repeated download for every catalog efficiently (let [config (get-some-config args)]) (println args) (System/exit 0) From bb02765233ece0e578d39631f90591f5c62d6714 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 14:30:27 +0100 Subject: [PATCH 07/35] Added function for merging default board-config with custom parts Very important in the future. Needs mega testing, I even started writing tests because of this. --- src/rss_thread_watch/utils.clj | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index fa95d4f..13944fd 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -9,6 +9,18 @@ [pred coll] (keep-indexed #(when (pred %2) %1) coll)) +(defn map-deep-merge-missing + "Merges two maps but only keys missing from first map" + [m1 m2] + (into m1 + (for [k (keys m2)] + (let [val1 (get m1 k) + val2 (get m2 k)] + (if (and (map? val1) + (map? val2)) + {k (map-deep-merge-missing val1 val2)} + {k (nil?-else val2 val1)}))))) + ;; ===== Macros ===== (defmacro nil?-else "Return x unless it's nil, the return y" From c575d75de7c19339754dddb5f4dc58e89aacaa27 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 5 Jan 2024 14:31:44 +0100 Subject: [PATCH 08/35] SYNC commit Added incomplete tests for utils --- test/rss_thread_watch/utils_test.clj | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 test/rss_thread_watch/utils_test.clj diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj new file mode 100644 index 0000000..332dc54 --- /dev/null +++ b/test/rss_thread_watch/utils_test.clj @@ -0,0 +1,12 @@ +(ns rss-thread-watch.utils-test + (:require [clojure.test :refer :all] + [rss-thread-watch.utils :refer :all])) + +(defn first-map {:a :b + :c "c" + + }) + +(deftest map-deep-merge-missing-test + (testing "Map deep merge missing" + (is ()))) From 008d609a8bdcd1ea7e92ef5e977245026abea03c Mon Sep 17 00:00:00 2001 From: Felisp Date: Sat, 6 Jan 2024 05:39:29 +0100 Subject: [PATCH 09/35] When-else2 should be correct, replaced original implementation What am i doing with my life, it's 5:37 in the morning ffs --- src/rss_thread_watch/core.clj | 2 +- src/rss_thread_watch/utils.clj | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 9b25b22..c87da0c 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -66,7 +66,7 @@ [cmd-args] (let [file-to-try (u/nil?-else (first cmd-args) "./config.edn")] - (u/when-else2 (load-config file-to-try) + (u/when-else (load-config file-to-try) (println "WARN: Using default internal config because suggessted " file-to-try " not found.") CONFIG-DEFAULT))) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 13944fd..788852a 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -31,17 +31,6 @@ result#))) (defmacro when-else - "Evaluates tst, if it's true returns it's result. - If it's not, return else" - [tst else] - `(let [res# ~tst] - (if res# - res# - ~else))) - -;; Todo: Test this properly and if working, replace when-else with this -;; Todo: find a way to download clojure documentation for offline use -(defmacro when-else2 "Evaluates [tst], if it's truthy value returns that value. If it's not, execute everything in [else] and return last expr." [tst & else] From 7679844e084490173ebaad38328ea12d03d5871e Mon Sep 17 00:00:00 2001 From: Felisp Date: Sat, 6 Jan 2024 05:54:50 +0100 Subject: [PATCH 10/35] Move macros above functions so functions can use them --- src/rss_thread_watch/utils.clj | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 788852a..98e3b20 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -1,25 +1,6 @@ (ns rss-thread-watch.utils "Util functions" (:gen-class)) -;; ===== Generic functions ==== - -(defn indices - ;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure - "Returns indexes of elements passing predicate" - [pred coll] - (keep-indexed #(when (pred %2) %1) coll)) - -(defn map-deep-merge-missing - "Merges two maps but only keys missing from first map" - [m1 m2] - (into m1 - (for [k (keys m2)] - (let [val1 (get m1 k) - val2 (get m2 k)] - (if (and (map? val1) - (map? val2)) - {k (map-deep-merge-missing val1 val2)} - {k (nil?-else val2 val1)}))))) ;; ===== Macros ===== (defmacro nil?-else @@ -51,3 +32,23 @@ (if result# ~x result#))) + +;; ===== Generic functions ==== + +(defn indices + ;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure + "Returns indexes of elements passing predicate" + [pred coll] + (keep-indexed #(when (pred %2) %1) coll)) + +(defn map-deep-merge-missing + "Merges two maps but only keys missing from first map" + [m1 m2] + (into m1 + (for [k (keys m2)] + (let [val1 (get m1 k) + val2 (get m2 k)] + (if (and (map? val1) + (map? val2)) + {k (map-deep-merge-missing val1 val2)} + {k (nil?-else val2 val1)}))))) From 68a74cf1b8bf3bf3ebc1e5d593ded47cc734f094 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sat, 6 Jan 2024 06:05:21 +0100 Subject: [PATCH 11/35] Refactor map-deep-merge-missing -> map-apply-defaults; Tests for map-apply-defaults Needs more tests --- src/rss_thread_watch/utils.clj | 16 +++++++++------- test/rss_thread_watch/utils_test.clj | 14 ++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 98e3b20..847637b 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -41,13 +41,15 @@ [pred coll] (keep-indexed #(when (pred %2) %1) coll)) -(defn map-deep-merge-missing - "Merges two maps but only keys missing from first map" - [m1 m2] - (into m1 - (for [k (keys m2)] - (let [val1 (get m1 k) - val2 (get m2 k)] +(defn map-apply-defaults + "Apply default values from [defaults] to keys not present in [conf] + Order is very important. + Thus all missing values from config are replaced by defaults" + [conf defaults] + (into conf + (for [k (keys defaults)] + (let [val1 (get conf k) + val2 (get defaults k)] (if (and (map? val1) (map? val2)) {k (map-deep-merge-missing val1 val2)} diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index 332dc54..6673fee 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -2,11 +2,13 @@ (:require [clojure.test :refer :all] [rss-thread-watch.utils :refer :all])) -(defn first-map {:a :b - :c "c" - - }) +(def first-map {:a :b + :c "c"}) +(def empty-map {}) (deftest map-deep-merge-missing-test - (testing "Map deep merge missing" - (is ()))) + (testing "Default values in place of missing keys" + (is (= first-map (map-apply-defaults first-map empty-map)) + "No defaults should return conf map unchanged") + (is (= first-map (map-apply-defaults empty-map first-map)) + "Empty map should be completely replaced by defaults"))) From d1666da9466c671c2bd4ab610a406955255f665d Mon Sep 17 00:00:00 2001 From: Felisp Date: Sat, 6 Jan 2024 06:22:26 +0100 Subject: [PATCH 12/35] Make it actually compile And start repl, because it's annoying when that doesnt happen --- src/rss_thread_watch/core.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index c87da0c..bdd7cab 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -75,7 +75,7 @@ "Entry point, starts webserver" [& args] ;; Todo: Think of a way to start repeated download for every catalog efficiently - (let [config (get-some-config args)]) + (let [config (get-some-config args)] (println args) (System/exit 0) (set-interval (fn [] @@ -83,7 +83,7 @@ (watcher/update-thread-cache! (:target config) (:starting-page config))) (* 1000 (:refresh-delay config))) (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) - :join? true})) + :join? true}))) ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started (defn repl-main From d57175ecade1dfd897b08e56d4bf86a12bc37b3c Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 02:15:22 +0100 Subject: [PATCH 13/35] Add copyrights --- src/rss_thread_watch/utils.clj | 14 ++++++++++++++ test/rss_thread_watch/utils_test.clj | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 847637b..5265958 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -1,3 +1,17 @@ +;; Copyright (C) 2023 Felisp +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU Affero General Public License as published by +;; the Free Software Foundation, version 3 of the License. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU Affero General Public License for more details. +;; +;; You should have received a copy of the GNU Affero General Public License +;; along with this program. If not, see . + (ns rss-thread-watch.utils "Util functions" (:gen-class)) diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index 6673fee..e90a305 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -1,3 +1,17 @@ +;; Copyright (C) 2023 Felisp +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU Affero General Public License as published by +;; the Free Software Foundation, version 3 of the License. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU Affero General Public License for more details. +;; +;; You should have received a copy of the GNU Affero General Public License +;; along with this program. If not, see . + (ns rss-thread-watch.utils-test (:require [clojure.test :refer :all] [rss-thread-watch.utils :refer :all])) From 2325b154d1dc77300de3cbdf01b2185dc4ddf269 Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 02:24:16 +0100 Subject: [PATCH 14/35] Fix compilation --- src/rss_thread_watch/utils.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 5265958..5e28a87 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -66,5 +66,5 @@ val2 (get defaults k)] (if (and (map? val1) (map? val2)) - {k (map-deep-merge-missing val1 val2)} + {k (map-apply-defaults val1 val2)} {k (nil?-else val2 val1)}))))) From 67268acbc513921b12239e5fbd3256c2f600889c Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 03:01:42 +0100 Subject: [PATCH 15/35] Finish tests for ``map-apply-defaults`` and refactor internal names --- src/rss_thread_watch/utils.clj | 12 ++++---- test/rss_thread_watch/utils_test.clj | 42 +++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 5e28a87..0de1280 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -62,9 +62,9 @@ [conf defaults] (into conf (for [k (keys defaults)] - (let [val1 (get conf k) - val2 (get defaults k)] - (if (and (map? val1) - (map? val2)) - {k (map-apply-defaults val1 val2)} - {k (nil?-else val2 val1)}))))) + (let [conf-val (get conf k) + default-val (get defaults k)] + (if (and (map? conf-val) ; both are maps, we have to go level deeper + (map? default-val)) ; If only one is, we don't care cus then it's just assigment + {k (map-apply-defaults conf-val default-val)} + {k (nil?-else conf-val default-val)}))))) diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index e90a305..0115dce 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -16,13 +16,47 @@ (:require [clojure.test :refer :all] [rss-thread-watch.utils :refer :all])) -(def first-map {:a :b - :c "c"}) +(def first-map + "Example config map with two keys" + {:a :b + :c "c" + :nested {:fst 1 :scnd {:super :nested}}}) + +(def pony-map + "Map containing none of the items in map 1" + {:best-pony "Twilight Sparkle"}) + +(def conflicting-basic-merge (conj pony-map {:a 17 :c 15})) + +(def deep-pony-map {:a "x" + :c :something-else + :nested {:ponies "everywhere" + :fst 69}}) + (def empty-map {}) (deftest map-deep-merge-missing-test - (testing "Default values in place of missing keys" + (testing "Full and no-replace" (is (= first-map (map-apply-defaults first-map empty-map)) "No defaults should return conf map unchanged") (is (= first-map (map-apply-defaults empty-map first-map)) - "Empty map should be completely replaced by defaults"))) + "Empty map should be completely replaced by defaults")) + + (testing "Basic merge" + (is (= (conj pony-map first-map) (map-apply-defaults first-map pony-map)) + "When all keys unique, maps should be conjd") + (is (= (conj first-map pony-map) (map-apply-defaults first-map pony-map)) + "When all keys unique, maps should be conjd, order matters") + (is (= (conj first-map pony-map) (map-apply-defaults pony-map first-map)) + "When all keys unique, maps should be conjd, more order that matters") + (is (= (conj first-map pony-map) (map-apply-defaults first-map pony-map)) + "Conflicting basic merge")) + ;; Most important part, this is the reason we have the function in the first place + ;; Conj wont merge deep + (testing "Nested merge" + (is (= {:a :b + :c "c" + :nested {:ponies "everywhere" + :fst 1 + :scnd {:super :nested}}} + (map-apply-defaults first-map deep-pony-map))))) From 8a627ce2560e4277d61c480c0cb4d00033351884 Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 03:22:50 +0100 Subject: [PATCH 16/35] Fixed incorrect test name --- test/rss_thread_watch/utils_test.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index 0115dce..c481037 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -35,7 +35,7 @@ (def empty-map {}) -(deftest map-deep-merge-missing-test +(deftest map-apply-defaults-test (testing "Full and no-replace" (is (= first-map (map-apply-defaults first-map empty-map)) "No defaults should return conf map unchanged") From 618304f5ebe041ef8d98cf24c0b03e7a8a2e07d7 Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 03:23:03 +0100 Subject: [PATCH 17/35] Add [fmap] for mapping over map --- src/rss_thread_watch/utils.clj | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 0de1280..0781220 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -68,3 +68,11 @@ (map? default-val)) ; If only one is, we don't care cus then it's just assigment {k (map-apply-defaults conf-val default-val)} {k (nil?-else conf-val default-val)}))))) + +(defn fmap [f m] + "Applies function [f] to every key and value in map [m] + Function signature should be (f [key value])." + (into + (empty m) + (for [[key val] m] + [key (f key val)]))) From 039c48fe16f843081b48795218a3bca30e4fae5c Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 03:29:53 +0100 Subject: [PATCH 18/35] Add tests for fmap --- test/rss_thread_watch/utils_test.clj | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index c481037..ccc31ad 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -60,3 +60,8 @@ :fst 1 :scnd {:super :nested}}} (map-apply-defaults first-map deep-pony-map))))) + +(deftest fmap-test + (testing "Applying function to values of map" + (is (= {:a 2 :b 3} (fmap (fn [k v] (inc v)) + {:a 1 :b 2}))))) From 6d08796568816e20894d3f787ae42bb3e2ab2005 Mon Sep 17 00:00:00 2001 From: Felisp Date: Mon, 8 Jan 2024 03:30:26 +0100 Subject: [PATCH 19/35] FIX typo, refactor config structure --- res/ExampleConfig.edn | 9 ++++----- src/rss_thread_watch/core.clj | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/res/ExampleConfig.edn b/res/ExampleConfig.edn index f94c566..5892e8f 100644 --- a/res/ExampleConfig.edn +++ b/res/ExampleConfig.edn @@ -4,11 +4,13 @@ :default-board "/mlp/" ;Board to be used when no board=x param given :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" :enable-board-listing true ;Whether to show list of enabled boards in /boards - ;; /$board/catalog.json will be appended to this link - :target "https://api.4chan.org" :boards-defaults {:refresh-rate 300 :starting-page 7 :default-chod 94 + ;; If you want to do some preprocessing beforehand, you can override + ;; Target URL for the board, but the response must be same 4chan API would return + ;; /$board/catalog.json will be appended to this link + :target "https://api.4chan.org" ;; Commented parts bellow are still unimplemented ;; Only download catalog when someone requests feed and cache is old ;; Saves request to 4chan, usefull for borads that are checked rarely @@ -18,9 +20,6 @@ ;; Wheter to create cache by downloading whole catalog or every required ;; one by one ;; :request-type [:catalog] :pages - ;; If you want to do some preprocessing beforehand, you can override - ;; Target URL for the board, but the response must be samechan API would return - ;; :target-override } ;; List of all boards that are enabled for feed generation ;; Yes they must be all listed manualy for now diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index bdd7cab..79ff61a 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -28,7 +28,7 @@ {:port 6969 :default-board "/mlp/" :enable-board-listing true - :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" + :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact] and I may enable it for you" :target "https://api.4chan.org" :boards-defaults {:refresh-rate 300 :starting-page 7 From 693c47d46569eb1d56b3d54b018bad0aef112ee9 Mon Sep 17 00:00:00 2001 From: Felisp Date: Fri, 19 Jan 2024 00:55:42 +0100 Subject: [PATCH 20/35] Bump copyrights, sync --- src/rss_thread_watch/core.clj | 7 +++++-- src/rss_thread_watch/feed_generator.clj | 2 +- src/rss_thread_watch/utils.clj | 2 +- src/rss_thread_watch/watcher.clj | 2 +- test/rss_thread_watch/utils_test.clj | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 79ff61a..c84131e 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -1,4 +1,4 @@ -;; Copyright (C) 2023 Felisp +;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by @@ -75,7 +75,10 @@ "Entry point, starts webserver" [& args] ;; Todo: Think of a way to start repeated download for every catalog efficiently - (let [config (get-some-config args)] + (let [config (get-some-config args) + expanded-config + + ] (println args) (System/exit 0) (set-interval (fn [] diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index c5c4c63..83b7438 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -1,4 +1,4 @@ -;; Copyright (C) 2023 Felisp +;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 0781220..782ae5c 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -1,4 +1,4 @@ -;; Copyright (C) 2023 Felisp +;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by diff --git a/src/rss_thread_watch/watcher.clj b/src/rss_thread_watch/watcher.clj index eaf2df8..5eba442 100644 --- a/src/rss_thread_watch/watcher.clj +++ b/src/rss_thread_watch/watcher.clj @@ -1,4 +1,4 @@ -;; Copyright (C) 2023 Felisp +;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by diff --git a/test/rss_thread_watch/utils_test.clj b/test/rss_thread_watch/utils_test.clj index ccc31ad..92525c3 100644 --- a/test/rss_thread_watch/utils_test.clj +++ b/test/rss_thread_watch/utils_test.clj @@ -1,4 +1,4 @@ -;; Copyright (C) 2023 Felisp +;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by From 22f6c54bf4fde676ff578c14fdb58f42b7e86bb9 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 28 Jul 2024 14:56:55 +0200 Subject: [PATCH 21/35] Add some docs to example config --- res/ExampleConfig.edn | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/res/ExampleConfig.edn b/res/ExampleConfig.edn index 5892e8f..7ed8364 100644 --- a/res/ExampleConfig.edn +++ b/res/ExampleConfig.edn @@ -4,7 +4,7 @@ :default-board "/mlp/" ;Board to be used when no board=x param given :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" :enable-board-listing true ;Whether to show list of enabled boards in /boards - :boards-defaults {:refresh-rate 300 + :boards-defaults {:refresh-rate 300 ;how often new data should be downloaded in seconds :starting-page 7 :default-chod 94 ;; If you want to do some preprocessing beforehand, you can override @@ -14,7 +14,8 @@ ;; Commented parts bellow are still unimplemented ;; Only download catalog when someone requests feed and cache is old ;; Saves request to 4chan, usefull for borads that are checked rarely - ;; :lazy-load false + ;; Generally the better option, first request in :refresh-rate may take longer + :lazy-load true ;; Whether to allow regex search thru the threads (&qr= param) ;; :regex-enable true ;; Wheter to create cache by downloading whole catalog or every required From cd540fda369c8007f046819de0699f05eff477a1 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 28 Jul 2024 14:57:35 +0200 Subject: [PATCH 22/35] Config prep, but Mainly SYNC to desktop --- src/rss_thread_watch/core.clj | 19 ++++++++++++++++++- src/rss_thread_watch/watcher.clj | 12 +++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index c84131e..f14cd61 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -56,6 +56,21 @@ (with-open [r (io/reader fl)] (edn/read (java.io.PushbackReader. r)))))) +(defn config-fill-board-defaults + "Fills every enabled board with default values" + [conifg] + ;; Získat všechny klíče desek které musíme vyplnit + ;; Každý klíč mergnout s default mapou + (u/map-apply-defaults ) + (let [board-defaults (:boards-default config) + boards (keys (:boards-enabled config))] + (update-in config '(:boards-enabled) (fn [val] + (u/fmap (fn []) + val) ) + ;; just do fmap of boards + ()) + ) + (defn get-some-config "Attempts to get config somehow, first from command line argument @@ -67,8 +82,10 @@ (let [file-to-try (u/nil?-else (first cmd-args) "./config.edn")] (u/when-else (load-config file-to-try) - (println "WARN: Using default internal config because suggessted " file-to-try " not found.") + (println "WARN: Using default internal config because suggessted file: '" file-to-try "' not found.") CONFIG-DEFAULT))) +;;TODO: We want to copy default board-local config into every board, that way we won't have to always look +;; into defaults, but ;; Todo: Add option to write default config to stdout (defn -main diff --git a/src/rss_thread_watch/watcher.clj b/src/rss_thread_watch/watcher.clj index 5eba442..1a00299 100644 --- a/src/rss_thread_watch/watcher.clj +++ b/src/rss_thread_watch/watcher.clj @@ -19,10 +19,16 @@ (:gen-class)) (def chod-threads-cache - "Cached vector of threads that have CHanceOfDeath > configured" - (atom [])) + "Cached map of threads that have CHanceOfDeath > configured" + nil) -(def time-of-cache (atom 0)) +(def time-of-cache nil) + +(defn init-global-cache + "Initializes global cache of catalogs" + [config] + (keys (:boards-enabled config)) + ) (defn process-page "Procesess every thread in page, leaving only relevant information From 6391e115d1a26f2be7133cfe46f3d9876bb1f5db Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 28 Jul 2024 18:26:38 +0200 Subject: [PATCH 23/35] Copy global boards-defaults to every board-config --- src/rss_thread_watch/core.clj | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index f14cd61..ee96881 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -57,19 +57,15 @@ (edn/read (java.io.PushbackReader. r)))))) (defn config-fill-board-defaults - "Fills every enabled board with default values" - [conifg] - ;; Získat všechny klíče desek které musíme vyplnit - ;; Každý klíč mergnout s default mapou - (u/map-apply-defaults ) - (let [board-defaults (:boards-default config) - boards (keys (:boards-enabled config))] - (update-in config '(:boards-enabled) (fn [val] - (u/fmap (fn []) - val) ) - ;; just do fmap of boards - ()) - ) + "Fills every enabled board with default config values" + [config] + (let [defaults (:boards-defaults config)] + (update-in config + '(:boards-enabled) + (fn [mp] + (u/fmap (fn [k v] + (u/map-apply-defaults v defaults)) + mp))))) (defn get-some-config "Attempts to get config somehow, @@ -79,13 +75,12 @@ ;; args do not include path to executable so first arg ;; should be config file [cmd-args] - (let [file-to-try (u/nil?-else (first cmd-args) + (config-fill-board-defaults + (let [file-to-try (u/nil?-else (first cmd-args) "./config.edn")] (u/when-else (load-config file-to-try) (println "WARN: Using default internal config because suggessted file: '" file-to-try "' not found.") - CONFIG-DEFAULT))) -;;TODO: We want to copy default board-local config into every board, that way we won't have to always look -;; into defaults, but + CONFIG-DEFAULT)))) ;; Todo: Add option to write default config to stdout (defn -main From 66b2b445dff8bef83dd47cc9a6bc62763fb68781 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 28 Jul 2024 18:31:46 +0200 Subject: [PATCH 24/35] Formating and incorrect docstring position Curse you Hy and your inversed do string --- src/rss_thread_watch/utils.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index 782ae5c..daed328 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -53,7 +53,7 @@ ;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure "Returns indexes of elements passing predicate" [pred coll] - (keep-indexed #(when (pred %2) %1) coll)) + (keep-indexed #(when (pred %2) %1) coll)) (defn map-apply-defaults "Apply default values from [defaults] to keys not present in [conf] @@ -69,9 +69,10 @@ {k (map-apply-defaults conf-val default-val)} {k (nil?-else conf-val default-val)}))))) -(defn fmap [f m] +(defn fmap "Applies function [f] to every key and value in map [m] Function signature should be (f [key value])." + [f m] (into (empty m) (for [[key val] m] From 98a58ca1c373db74c3ffe7db49590fc4fdffea65 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 28 Jul 2024 18:39:32 +0200 Subject: [PATCH 25/35] Fix unwanted case sensitivity as noted in #9 fixes: #9 --- src/rss_thread_watch/feed_generator.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index 83b7438..965524a 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -63,7 +63,7 @@ ;; Would be so much easier for user to figure out why is it showing ;; and it would solve the problem of super long titles (or OPs instead of titles) (when (some (fn [querry] - (s/includes? title querry)) + (s/includes? (s/lower-case title) (s/lower-case querry))) query-vec) t))) (reverse needed-cache-part))] From 8c1cfbed332b5b7e6e0865d9405185377e3ba31d Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 30 Jul 2024 01:20:32 +0200 Subject: [PATCH 26/35] Update interla config to corespondent to Example This should be solved differently, who wants to keep updating this by hand --- src/rss_thread_watch/core.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index ee96881..1543206 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -29,10 +29,11 @@ :default-board "/mlp/" :enable-board-listing true :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact] and I may enable it for you" - :target "https://api.4chan.org" :boards-defaults {:refresh-rate 300 :starting-page 7 - :default-chod 94} + :default-chod 94 + :target "https://api.4chan.org" + :lazy-load true} :boards-enabled {"/mlp/" {} "/g/" {}}}) From 8a7981c27a99a25328e46b1b066b4a8779d3814a Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 30 Jul 2024 02:55:15 +0200 Subject: [PATCH 27/35] Implement multiboard and lazyloading as result, added todos And shitton of them --- project.clj | 2 +- src/rss_thread_watch/core.clj | 39 ++++++------ src/rss_thread_watch/feed_generator.clj | 46 ++++++++------ src/rss_thread_watch/watcher.clj | 81 +++++++++++++++++++------ 4 files changed, 111 insertions(+), 57 deletions(-) diff --git a/project.clj b/project.clj index 7a7f6b9..6438fc2 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject rss-thread-watch "0.3.0-SNAPSHOT" +(defproject rss-thread-watch "0.3.5-SNAPSHOT" :description "RSS based thread watcher" :url "http://example.com/FIXME" :license {:name "AGPL-3.0-only" diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 1543206..bd47155 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -61,12 +61,13 @@ "Fills every enabled board with default config values" [config] (let [defaults (:boards-defaults config)] - (update-in config - '(:boards-enabled) - (fn [mp] - (u/fmap (fn [k v] - (u/map-apply-defaults v defaults)) - mp))))) + (dissoc (update-in config + '(:boards-enabled) + (fn [mp] + (u/fmap (fn [k v] + (u/map-apply-defaults v defaults)) + mp))) + :boards-defaults))) (defn get-some-config "Attempts to get config somehow, @@ -88,18 +89,20 @@ "Entry point, starts webserver" [& args] ;; Todo: Think of a way to start repeated download for every catalog efficiently - (let [config (get-some-config args) - expanded-config - - ] - (println args) - (System/exit 0) - (set-interval (fn [] - (println "Starting cache update") - (watcher/update-thread-cache! (:target config) (:starting-page config))) - (* 1000 (:refresh-delay config))) - (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) - :join? true}))) + (let [config (get-some-config args)] + ;; Init the few globals we have + (reset! watcher/GLOBAL-CONFIG config) + (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled)))) + (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config)) + (println args) + (clojure.pprint/pprint config) + ;; Needs to be redone and probably removed from here + ;; (set-interval (fn [] + ;; (println "Starting cache update") + ;; (watcher/update-board-cache! (:target config) (:starting-page config))) + ;; (* 1000 (:refresh-delay config))) + (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) + :join? true}))) ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started (defn repl-main diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index 965524a..e619965 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -22,8 +22,8 @@ [rss-thread-watch.utils :as ut]) (:gen-class)) - - +(def boards-enabled-cache + (atom nil)) (defn new-guid-always "Generates always unique GUID for Feed item. @@ -48,9 +48,11 @@ (defn filter-chod-posts "Return list of all threads with equal or higher ChoD than requested - READS FROM GLOBALS: watcher.time-of-cache" ;Todo: best thing would be to add timestamp to cache - [query-vec chod-treshold repeat? cache] - (let [time-of-generation @watcher/time-of-cache + READS FROM GLOBALS: watcher.time-of-cache" + [query-vec chod-treshold repeat? board-cache] + + (let [{time-of-generation :time + cache :data} board-cache guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation)) update-only-guid) cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold)) @@ -73,9 +75,9 @@ (defn thread-to-rss-item "If I wasnt retarded I could have made the cached version look like rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says - [t] + [t] ;TODO: oh Luna the hardcodes ;;RESUME (let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it - {:title (format "%.2f%% - %s" (:chod t) (:title t)) + {:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response ;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html :description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t)) :link link-url @@ -97,9 +99,11 @@ READS FROM GLOBALS: rss-thread-watch.watcher.chod-threads-cache - rss-thread-watch.core.CONFIG" + rss-thread-watch.watcher.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore [rqst] - (try (let [{{chod "chod" :or {chod "94"} + (try (let [{{chod "chod" + board "board" :or {chod "94" + board (get @watcher/GLOBAL-CONFIG :default-board)} :as prms} :params uri :uri} rqst qrs (prms "q") @@ -110,19 +114,23 @@ chod)] (try ;If we can't parse number from chod, use default 94 (if (or (vector? chod) - (<= (Integer/parseInt chod) 60)) ; Never accept chod lower that 60 TODO: don't hardcode this + (<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this 60 (Integer/parseInt chod)) (catch Exception e 94))) cache @watcher/chod-threads-cache] - ;; (println "RCVD: " rqst) - (println rqst) + (println "\n\nRCVD: " rqst) + ;; (println rqst) ;; ====== Errors ===== ;; Something other than feed.xml requested (when-not (s/ends-with? uri "feed.xml") (throw (ex-info "404" {:status 404 :header {"Content-Type" "text/plain"} :body "404 This server has nothing but /feed.xml"}))) + (when-not (contains? @boards-enabled-cache board) + (throw (ex-info "403" {:status 403 + :header {"Content-Type" "text/plain"} + :body (get @watcher/GLOBAL-CONFIG :board-disabled-message)}))) ;; No url params -> we redirect to documentation about params (when (empty? prms) (throw (ex-info "302" @@ -146,13 +154,15 @@ ;; There shouldn't be any problems with this mime type but if there are ;; replace with "text/xml", or even better, get RSS reader that is not utter shit :header {"Content-Type" "application/rss+xml"} - :body (generate-feed queries real-chod repeat? cache)}) + :body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @watcher/GLOBAL-CONFIG))}) (catch Exception e ;; Ex-info has been crafted to match HTTP response body so we can send it (if-let [caught (ex-data e)] caught ;We have custom crafted error - {:status 500 ;Something else fucked up, we print what happened - :header {"Content-Type" "text/plain"} - :body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n" - (ex-cause e) "\n" - e)})))) + (do + (print "WTF??: " e) + {:status 500 ;Something else fucked up, we print what happened + :header {"Content-Type" "text/plain"} + :body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n" + (ex-cause e) "\n" + e)}))))) diff --git a/src/rss_thread_watch/watcher.clj b/src/rss_thread_watch/watcher.clj index 1a00299..6b03f27 100644 --- a/src/rss_thread_watch/watcher.clj +++ b/src/rss_thread_watch/watcher.clj @@ -18,17 +18,23 @@ [clojure.data.json :as js]) (:gen-class)) +(def GLOBAL-CONFIG + "Global config with defaults for missing entires" + ;; I know globals are ew in Clojure but I don't know any + ;; better way of doing this + (atom nil)) + (def chod-threads-cache "Cached map of threads that have CHanceOfDeath > configured" - nil) + (atom {})) -(def time-of-cache nil) - -(defn init-global-cache - "Initializes global cache of catalogs" +(defn generate-chod-cache-structure + "Generates initial structure for global cache + Structure is returned, you have to set it yourself" [config] - (keys (:boards-enabled config)) - ) + (let [ks (keys (:boards-enabled config))] + (zipmap ks + (repeatedly (count ks) #(atom nil))))) (defn process-page "Procesess every thread in page, leaving only relevant information @@ -50,27 +56,62 @@ (defn build-cache "Build cache of near-death threads so the values don't have to be recalculated on each request." [pages-to-index pages-total threads-per-page threads-total] - (vec (flatten (map (fn [single-page] - ;; We have to (dec page-number) bcs otherwise we would get the total number of threads - ;; including the whole page of threads - (let [page-number (dec (:page single-page))] ; inc to get to the actuall page - (process-page (:threads single-page) threads-total (inc (* page-number threads-per-page))))) - pages-to-index)))) + {:time (System/currentTimeMillis) + :data (vec (flatten (map (fn [single-page] + ;; We have to (dec page-number) bcs otherwise we would get the total number of threads + ;; including the whole page of threads + (let [page-number (dec (:page single-page))] ; inc to get to the actuall page + (process-page (:threads single-page) threads-total (inc (* page-number threads-per-page))))) + pages-to-index)))}) -(defn update-thread-cache! +(defn update-board-cache! "Updates cache of near-death threads. Writes to chod-threads-cache as side effect. [url] - Url to download data from - [starting-page] - From which page consider threads to be fit for near-death cache" - [url starting-page] - ;; Todo: surround with try so we can timeout and other stuff + [board] - Board to assign cached data to, it's existence is NOT checked here + [starting-page] - From which page consider threads to be fit for near-death cache + THIS FUNCTION WRITES TO chod-threads-cache + Returns :data part of [board] cache" + [url board starting-page] + ;; Todo: surround with try so we can timeout, 40x and other stuff (let [catalog (with-open [readr (io/reader url)] (js/read readr :key-fn keyword)) pages-total (count catalog) ;; universal calculation for total number of threads: - ;; (pages-total-1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages + ;; (pages-total -1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages threads-per-page (count (:threads (first catalog))) threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let to-index (filter (fn [item] (<= starting-page (:page item))) catalog)] - (reset! chod-threads-cache (build-cache to-index pages-total threads-per-page threads-total)) - (reset! time-of-cache (System/currentTimeMillis)))) + ;; TODO: there absolutely must be try catch for missing - not enabled boards, + ;; This will return nill and that fuck everything up + (reset! (get @chod-threads-cache board) + (build-cache to-index pages-total threads-per-page threads-total)))) + +(defn board-enabled? + "Checks whether board is enabled in config" + [board config] + (contains? board (keys (get config :boards-enabled)))) + +(defn get-board-url + "Gets board url from :target if " + [board config] + ;; TODO: jesus, this needs sanitization and should be probably crafted by some URL class + (str (get-in config [:boards-enabled board :target]) board "catalog.json")) + +(defn get-thread-data + "Gets thread cache for given board. + If board is lazy loaded, downloads new one if needed. + + MAY CAUSE WRITE TO chod-thread-cache IF NECCESARRY" + [board config] + (let [refresh-rate (* 1000 (get-in config `(:boards-enabled ~board :refresh-rate))) + {data :data + time-downloaded :time + :or {time-downloaded 0} + :as board-atom } @(get @chod-threads-cache board) + ;; TODO: This also makes it implictly lazy-load -> if disabled make the check here + time-to-update? (or (nil? board-atom) + (> (System/currentTimeMillis) (+ refresh-rate time-downloaded)))] + (if time-to-update? + (update-board-cache! (get-board-url "/mlp/" config) board (get-in config [:boards-enabled board :starting-page])) + @(get @chod-threads-cache board)))) From 69111968a6c49de1384416ff254f8f10e3561318 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 4 Aug 2024 10:52:01 +0200 Subject: [PATCH 28/35] Fix very dumb bug This is why you keep notes about hardcoded things when starting a project --- src/rss_thread_watch/watcher.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rss_thread_watch/watcher.clj b/src/rss_thread_watch/watcher.clj index 6b03f27..d4844a5 100644 --- a/src/rss_thread_watch/watcher.clj +++ b/src/rss_thread_watch/watcher.clj @@ -113,5 +113,5 @@ time-to-update? (or (nil? board-atom) (> (System/currentTimeMillis) (+ refresh-rate time-downloaded)))] (if time-to-update? - (update-board-cache! (get-board-url "/mlp/" config) board (get-in config [:boards-enabled board :starting-page])) + (update-board-cache! (get-board-url board config) board (get-in config [:boards-enabled board :starting-page])) @(get @chod-threads-cache board)))) From d6327a526514e7f7e22d34ad7d1a4a4526722212 Mon Sep 17 00:00:00 2001 From: Felisp Date: Sun, 4 Aug 2024 18:24:11 +0200 Subject: [PATCH 29/35] Added tools.cli dependency for command line parsing --- project.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 6438fc2..db93458 100644 --- a/project.clj +++ b/project.clj @@ -7,7 +7,8 @@ [ring/ring-core "1.8.2"] [ring/ring-jetty-adapter "1.8.2"] [clj-rss "0.4.0"] - [org.clojure/data.json "2.4.0"]] + [org.clojure/data.json "2.4.0"] + [org.clojure/tools.cli "1.1.230"]] :main ^:skip-aot rss-thread-watch.core :target-path "target/%s" :profiles {:uberjar {:aot :all}}) From ec73dd0c1adb3ff7f111a98984121a18b5cb2be5 Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 02:56:20 +0200 Subject: [PATCH 30/35] Added CLI parsing template --- project.clj | 2 +- src/rss_thread_watch/core.clj | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/project.clj b/project.clj index db93458..3345fd6 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject rss-thread-watch "0.3.5-SNAPSHOT" +(defproject rss-thread-watch "0.3.7-SNAPSHOT" :description "RSS based thread watcher" :url "http://example.com/FIXME" :license {:name "AGPL-3.0-only" diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index bd47155..6802b1f 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -15,6 +15,7 @@ (ns rss-thread-watch.core (:require [clojure.java.io :as io] [clojure.edn :as edn] + [clojure.tools.cli :refer [parse-opts]] [ring.adapter.jetty :as jetty] [ring.middleware.params :as rp] [rss-thread-watch.watcher :as watcher] @@ -22,6 +23,8 @@ [rss-thread-watch.utils :as u]) (:gen-class)) +(def VERSION "0.3.7") + ;; Internal default config (def CONFIG-DEFAULT "Internal default config" @@ -37,8 +40,19 @@ :boards-enabled {"/mlp/" {} "/g/" {}}}) +(def cli-options + "Configuration defining program arguments for cli.tools" + [["-v" "--version" "Print version and license information"] + ["-h" "--help" "Prints help"] + ["-c" "--config CONFIG_FILE" "Specify config file to use for this run" + :default "./config.edn" + :validate [#(u/file-exists? %) "Specified config file does not exist or is not readable"]] + [nil "--print-default-config" "Prints internal default config file to STDOUT and exits"]]) + +;; Todo: Think of a way to start repeated download for every catalog efficiently (defn set-interval "Calls function every ms" + ^{:deprecated true} [callback ms] (future (while true (do (try (callback) From 2ca45803e53efd259ad281a4c6b1f295d65b71fb Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 02:57:32 +0200 Subject: [PATCH 31/35] Add support for config file, implement CLI args --- project.clj | 2 +- src/rss_thread_watch/core.clj | 63 ++++++++++++++++++++-------------- src/rss_thread_watch/utils.clj | 22 ++++++++++++ 3 files changed, 60 insertions(+), 27 deletions(-) diff --git a/project.clj b/project.clj index 3345fd6..c9a1d3b 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject rss-thread-watch "0.3.7-SNAPSHOT" +(defproject rss-thread-watch "0.3.8-SNAPSHOT" :description "RSS based thread watcher" :url "http://example.com/FIXME" :license {:name "AGPL-3.0-only" diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 6802b1f..6a08fe5 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -23,7 +23,7 @@ [rss-thread-watch.utils :as u]) (:gen-class)) -(def VERSION "0.3.7") +(def VERSION "0.3.8") ;; Internal default config (def CONFIG-DEFAULT @@ -85,38 +85,49 @@ (defn get-some-config "Attempts to get config somehow, - first from command line argument - then from ./config.edn file - lastly uses default internal" - ;; args do not include path to executable so first arg - ;; should be config file - [cmd-args] + first from [custom-file], if it's nil, + then from ./config.edn file. + If is neither exists, default internal one is used." + [custom-file] (config-fill-board-defaults - (let [file-to-try (u/nil?-else (first cmd-args) + ;; TODO: There has to be try/catch for when file is invalid edn + ;; This is gonna be done when config validation comes in Beta 2 + (let [file-to-try (u/nil?-else custom-file "./config.edn")] - (u/when-else (load-config file-to-try) - (println "WARN: Using default internal config because suggessted file: '" file-to-try "' not found.") + (u/when-else (load-config file-to-try) + CONFIG-DEFAULT)))) -;; Todo: Add option to write default config to stdout (defn -main "Entry point, starts webserver" [& args] - ;; Todo: Think of a way to start repeated download for every catalog efficiently - (let [config (get-some-config args)] - ;; Init the few globals we have - (reset! watcher/GLOBAL-CONFIG config) - (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled)))) - (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config)) - (println args) - (clojure.pprint/pprint config) - ;; Needs to be redone and probably removed from here - ;; (set-interval (fn [] - ;; (println "Starting cache update") - ;; (watcher/update-board-cache! (:target config) (:starting-page config))) - ;; (* 1000 (:refresh-delay config))) - (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) - :join? true}))) + (let [parsed-args (parse-opts args cli-options) + options (get parsed-args :options)] + (when-let [err (get parsed-args :errors)] + (println "Error: " err) + (System/exit 1)) + (when (get options :version) + (println "RSS Thread Watcher " VERSION " Licensed under AGPL-3.0-only") + (System/exit 0)) + (when (get options :help) + (println "RSS Thread Watcher help:\n" (get parsed-args :summary)) + (System/exit 0)) + (when (get options :print-default-config) + (println ";;Default internal config file from RSS Thread Watcher " VERSION) + (clojure.pprint/pprint CONFIG-DEFAULT) + ;; In case someone was copying by hand, this might be useful + (println ";;END of Default internal config file") + (System/exit 0)) + + (let [config (get-some-config (:config options))] + ;; TODO: probably refactor to use separate config.clj file when validation will be added + ;; Init the few globals we have + (reset! watcher/GLOBAL-CONFIG config) + (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled)))) + (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config)) + (clojure.pprint/pprint config) + (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) + :join? true})))) ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started (defn repl-main diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index daed328..db53c12 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -77,3 +77,25 @@ (empty m) (for [[key val] m] [key (f key val)]))) + +(defn expand-home + "Expands ~ to home directory" + ;;modified from sauce: https://stackoverflow.com/questions/29585928/how-to-substitute-path-to-home-for + [s] + (if (clojure.string/starts-with? s "~") + (clojure.string/replace-first s "~" (System/getProperty "user.home")) + s)) + +(defn expand-path + [s] + (if (clojure.string/starts-with? s "./") + (clojure.string/replace-first s "." (System/getProperty "user.dir")) + (expand-home s))) + +(defn file-exists? + "Returns true if file exists" + [file] + (let [path (if (vector? file) + (first file) + file)] + (.exists (clojure.java.io/file (expand-path path))))) From 7f5d560baeebf210785418e88babe7c9cc3b4d4a Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 03:12:43 +0200 Subject: [PATCH 32/35] Added documented default config --- res/ExampleConfig-documented.edn | 47 ++++++++++++++++++++++++++++++++ res/ExampleConfig.edn | 34 ----------------------- 2 files changed, 47 insertions(+), 34 deletions(-) create mode 100644 res/ExampleConfig-documented.edn delete mode 100644 res/ExampleConfig.edn diff --git a/res/ExampleConfig-documented.edn b/res/ExampleConfig-documented.edn new file mode 100644 index 0000000..87d2a8f --- /dev/null +++ b/res/ExampleConfig-documented.edn @@ -0,0 +1,47 @@ +{:port 6969 ;Port to listen on + :default-board "/mlp/" ;Board to be used when no board=x param given + ;; Message displayed when requested board is not enabled + :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" + ;; :enable-board-listing true ;Whether to show list of enabled boards in /boards UNIMPLEMENTED + + ;; This map defines default values for all enabled boards, if you wish for some board + ;; to use different values, specify them bellow in :borads-enabled + :boards-defaults { + ;; After how many seconds get fresh catalog.json from :target + :refresh-rate 300 + ;; Page from which to start indexing threads, threads on pages with lower + ;; numbers will not be detectable by the feed watcher + :starting-page 7 + ;; Default ChOD to use if none is specified by the user + :default-chod 94 + ;; If you want to do some preprocessing beforehand, you can override + ;; Target URL for the board, but the response must be same the 4chan API would return + ;; /$board/catalog.json will be appended to this link + :target "https://api.4chan.org" + ;; Commented parts bellow are still unimplemented + ;; ------ + ;; Only download catalog when someone requests feed and cache is old + ;; Saves requests to 4chan, usefull for boards that are checked rarely + ;; Generally the better option, first request in taken in :refresh-rate may take longer + ;; Currently the only option + :lazy-load true + ;; Whether to allow regex search thru the threads (&qr= param) UNIMPLEMENTED + ;; :regex-enable true + ;; Wheter to create cache by downloading whole catalog or every required + ;; page one by one UNIMPLEMENTED + ;; :request-type [:catalog] :pages + } + ;; List of all boards that are enabled for feed generation + ;; Yes they must be all listed manualy for now + ;; Each such board must have map of altered config options if aplicable + ;; otherwise empty one must be provided + :boards-enabled {"/mlp/" {} ;; Empty override map means that defaults are used + ;; This means that board "/g/" will have :starting-page set to 7 but all + ;; the other config options are copied from :board-defaults + "/g/" {:starting-page 7} + "/po/" {:starting-page 8 + :refresh-rate 86400} ;1 day + "/p/" {:starting-page 8 + :refresh-rate 1800} ;30 min + } +} diff --git a/res/ExampleConfig.edn b/res/ExampleConfig.edn deleted file mode 100644 index 7ed8364..0000000 --- a/res/ExampleConfig.edn +++ /dev/null @@ -1,34 +0,0 @@ -;; :board-defaults is default config for every board and can be -;; overriden for every board -{:port 6969 - :default-board "/mlp/" ;Board to be used when no board=x param given - :board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact]" - :enable-board-listing true ;Whether to show list of enabled boards in /boards - :boards-defaults {:refresh-rate 300 ;how often new data should be downloaded in seconds - :starting-page 7 - :default-chod 94 - ;; If you want to do some preprocessing beforehand, you can override - ;; Target URL for the board, but the response must be same 4chan API would return - ;; /$board/catalog.json will be appended to this link - :target "https://api.4chan.org" - ;; Commented parts bellow are still unimplemented - ;; Only download catalog when someone requests feed and cache is old - ;; Saves request to 4chan, usefull for borads that are checked rarely - ;; Generally the better option, first request in :refresh-rate may take longer - :lazy-load true - ;; Whether to allow regex search thru the threads (&qr= param) - ;; :regex-enable true - ;; Wheter to create cache by downloading whole catalog or every required - ;; one by one - ;; :request-type [:catalog] :pages - } - ;; List of all boards that are enabled for feed generation - ;; Yes they must be all listed manualy for now - :boards-enabled {"/mlp/" {} ;; Empty override map means that defaults are used - "/g/" {} - "/po/" {} - "/p/" {:starting-page 8 - :refresh-rate 1800} ;30 min - } - ;; When user requests board that is not enabled, this message is returned - } From 02ceccb97323499291418f06160f36019b6deb9b Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 03:16:19 +0200 Subject: [PATCH 33/35] Syncrhonize external default config and internal one --- src/rss_thread_watch/core.clj | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 6a08fe5..558ba0a 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -37,8 +37,12 @@ :default-chod 94 :target "https://api.4chan.org" :lazy-load true} - :boards-enabled {"/mlp/" {} - "/g/" {}}}) + :boards-enabled {"/mlp/" {:lazy-load false} + "/g/" {:starting-page 7} + "/po/" {:starting-page 8 + :refresh-rate 86400} + "/p/" {:starting-page 8 + :refresh-rate 1800}}}) (def cli-options "Configuration defining program arguments for cli.tools" From 533dcc992424f5226ccb171e9f6c33a8668899fb Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 03:43:25 +0200 Subject: [PATCH 34/35] Redon README a little bit beafore release But will need proof reading, it's almost 4 in the morning and thats not good for writing accuracy --- README.org | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/README.org b/README.org index df34812..e5537c4 100644 --- a/README.org +++ b/README.org @@ -1,7 +1,7 @@ #+OPTIONS: toc:nil * RSS based thread watcher -Get notifications from your feed reader when your favourite /mlp/ thread is about to die +Get notifications from your feed reader when your favourite thread is about to die ** Usage @@ -24,11 +24,14 @@ Right now there is no automated way to generate your feed url but making one by **** URL parameters +Please note that default values may vary depending on which host you use, these are the defaults that come with this software but +anyone running instance of RSS thread watcher can change them + | Param name | Values [default] | Can have multiple? | Mandatory? | Short description | |------------+-------------------------+--------------------+-------------------------+--------------------------------------------------------------------------------------------------| -| board | "mlp" | No | No (not implemented) | Which board to generate feed for, *ONLY* ​/mlp​/ is supported | -| q | nil | Yes | Yes (1 or more) | This string is used to filter threads according to their titles | -| chod | 60-99 [94] | No | No | CHanceOfDeath - will include thread in the feed if it's chance to death i > chod | +| board | "mlp" | No | No | Which board to generate feed for, only boards enabled by host will work | +| q | nil | Yes | Yes (1 or more) | This string is used to filter threads according to their titles, *REGEX NOT supported* yet | +| chod | 60-99 [94] | No | No | CHanceOfDeath - will include thread in the feed if it's chance to death is > chod | | repeat | true, paranoid, [false] | No | No (partly implemented) | Whether to make new notification on every server update even when thread doesnt have higher chod | | recreate | ~bool~ | Not implemented | Not implemented | Whether to notify when creation of new thread matching querry is detected (uses 4chans RSS) | @@ -50,62 +53,54 @@ Standart rules of URLs apply, if you know how to pass params in URL to any websi - Are in the lowest 98% part of catalog (it's on position ~147/150 e.g. 3 threads before being bumped off) - Note that ~//~ are not special characters ~q=/general/~ will work as expected and match thread with "​/general​/" in it's title - Also note that regex is *NOT* supported for now, so something like ~q=rainbow*~ will only match threads with "rainbow" followed - immedidatelly by ~*~ - in their title + immedidatelly by ~*~ in their title *** Generating URL interactively -Coming soon +Coming soon (not really) -** Limitations +** Bugs -This is an experimental project. There are several limitations: -- Only supported board is ​/mlp​/ (You can choose your own when self hosting) -- Only searched threads are those who are in the 50% closer to death part of the catalog - -*** Bugs - -See [[https://git.treebrary.org/Treebrary.org/rss-thread-watcher/issues][issues]] +See [[https://git.treebrary.org/Treebrary.org/rss-thread-watcher/issues?q=&type=all&state=open&labels=1&milestone=0&assignee=0&poster=0][issues]] ** Feature set -- Planned/finnished features [23%] +- Planned/finnished features [38%] - [X] [DONE] Super basic features done (feed, query, repeat) - [X] Have proper sorting - The most likely to die threads first - [X] No params request should redirect to url generator or (for now) documentation - - [ ] Config file instead of hardcoding config values + - [X] Config file instead of hardcoding config values - [ ] Include time of latest data fetch - [ ] Make threads have preview images taken from the actuall thread OP - [ ] Show which query matched the thread you were notified of - [ ] Option to include advanced HTML formating of text (different color text for ChoD etc) - [ ] Support notification on watched thread re-creation after it died - [ ] Support notification for thread death - - [ ] Support multiple boards at once + - [X] Support multiple boards at once - [ ] Support async responses - [ ] Graal VM support for native configuration ** Self hosting -This is not supported until release 1.0. You can do it if you figure it out (probably not that hard tbh) but there will be much -more detailed instructions in the future. +As of first Beta release, self hosting is supported, please refer to [[file:res/ExampleConfig-documented.edn][documented example config]] for infomration on configuration +options. *** Prebuilt -There will be instructions at some point I promise. Until then you can download binaries from the releases page and run them like -you would any other java executable, default port is ~6969~. - -And you need Java for now if that isn't clear. - +Download newest release from [[https://git.treebrary.org/Treebrary.org/rss-thread-watcher/releases][releases]] and run them like you would any other java executable, default port is ~6969~ ~$ java -jar whatEverNameTheReleaseHas.jar~~ *** From source +Not officially supported, if you'll attempt this, please, use source from release tarball or checkout ~release~ or ~stable~ +branch. ~dev~ branch is unstable and untested, may not even build. ~stable~ branch should always build, may contain newer version +than is released. -If you know Clojure, then just clone and build with lein. If you don't either RTFM to lein or wait before instructions will be +If you know Clojure, then just clone and build with lein. If you don't either RTFM for lein or wait before instructions will be avaiabile here. *** Configuring -Self hosting is not supported at the moment so no configuration for you. +All documentation is for now included in [[file:res/ExampleConfig-documented.edn][documented exmample config]]. *** Contributing From 2266ecae354d4b1180c0097a71e546232eff2db7 Mon Sep 17 00:00:00 2001 From: Felisp Date: Tue, 13 Aug 2024 17:45:31 +0200 Subject: [PATCH 35/35] Final version bumps and todos Ready for release --- project.clj | 2 +- src/rss_thread_watch/core.clj | 3 +-- src/rss_thread_watch/feed_generator.clj | 2 +- src/rss_thread_watch/watcher.clj | 8 +++++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/project.clj b/project.clj index c9a1d3b..89fb513 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject rss-thread-watch "0.3.8-SNAPSHOT" +(defproject rss-thread-watch "0.4.0-SNAPSHOT" :description "RSS based thread watcher" :url "http://example.com/FIXME" :license {:name "AGPL-3.0-only" diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 558ba0a..7799a10 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -23,7 +23,7 @@ [rss-thread-watch.utils :as u]) (:gen-class)) -(def VERSION "0.3.8") +(def VERSION "0.4.0") ;; Internal default config (def CONFIG-DEFAULT @@ -99,7 +99,6 @@ (let [file-to-try (u/nil?-else custom-file "./config.edn")] (u/when-else (load-config file-to-try) - CONFIG-DEFAULT)))) (defn -main diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index e619965..7c6c15d 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -87,7 +87,7 @@ "Generates feed from matching items" [query-vec chod-treshold repeat? cache] (let [items (filter-chod-posts query-vec chod-treshold repeat? cache) - head {:title "RSS Thread watcher v0.1" + head {:title "RSS Thread watcher v0.4" ;TODO: hardcoded string here, remake to reference to config.clj :link "https://tools.treebrary.org/thread-watcher/feed.xml" :feed-url "https://tools.treebrary.org/thread-watcher/feed.xml" :description "RSS based thread watcher"} diff --git a/src/rss_thread_watch/watcher.clj b/src/rss_thread_watch/watcher.clj index d4844a5..93b068c 100644 --- a/src/rss_thread_watch/watcher.clj +++ b/src/rss_thread_watch/watcher.clj @@ -78,14 +78,16 @@ pages-total (count catalog) ;; universal calculation for total number of threads: ;; (pages-total -1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages - threads-per-page (count (:threads (first catalog))) + threads-per-page (count (:threads (first catalog))) ;; TODO: last could be remade to peek if it's a vector threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let to-index (filter (fn [item] (<= starting-page (:page item))) catalog)] ;; TODO: there absolutely must be try catch for missing - not enabled boards, + ;; This is probably resolved now, but keeping it just in case ;; This will return nill and that fuck everything up - (reset! (get @chod-threads-cache board) - (build-cache to-index pages-total threads-per-page threads-total)))) + (println "Refreshed cache for " board) + (reset! (get @chod-threads-cache board) + (build-cache to-index pages-total threads-per-page threads-total)))) (defn board-enabled? "Checks whether board is enabled in config"