Implement multiboard and lazyloading as result, added todos

And shitton of them
This commit is contained in:
Felisp 2024-07-30 02:55:15 +02:00
parent 8c1cfbed33
commit 8a7981c27a
4 changed files with 111 additions and 57 deletions

View file

@ -1,4 +1,4 @@
(defproject rss-thread-watch "0.3.0-SNAPSHOT" (defproject rss-thread-watch "0.3.5-SNAPSHOT"
:description "RSS based thread watcher" :description "RSS based thread watcher"
:url "http://example.com/FIXME" :url "http://example.com/FIXME"
:license {:name "AGPL-3.0-only" :license {:name "AGPL-3.0-only"

View file

@ -61,12 +61,13 @@
"Fills every enabled board with default config values" "Fills every enabled board with default config values"
[config] [config]
(let [defaults (:boards-defaults config)] (let [defaults (:boards-defaults config)]
(update-in config (dissoc (update-in config
'(:boards-enabled) '(:boards-enabled)
(fn [mp] (fn [mp]
(u/fmap (fn [k v] (u/fmap (fn [k v]
(u/map-apply-defaults v defaults)) (u/map-apply-defaults v defaults))
mp))))) mp)))
:boards-defaults)))
(defn get-some-config (defn get-some-config
"Attempts to get config somehow, "Attempts to get config somehow,
@ -88,18 +89,20 @@
"Entry point, starts webserver" "Entry point, starts webserver"
[& args] [& args]
;; Todo: Think of a way to start repeated download for every catalog efficiently ;; Todo: Think of a way to start repeated download for every catalog efficiently
(let [config (get-some-config args) (let [config (get-some-config args)]
expanded-config ;; Init the few globals we have
(reset! watcher/GLOBAL-CONFIG config)
] (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled))))
(println args) (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config))
(System/exit 0) (println args)
(set-interval (fn [] (clojure.pprint/pprint config)
(println "Starting cache update") ;; Needs to be redone and probably removed from here
(watcher/update-thread-cache! (:target config) (:starting-page config))) ;; (set-interval (fn []
(* 1000 (:refresh-delay config))) ;; (println "Starting cache update")
(jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT) ;; (watcher/update-board-cache! (:target config) (:starting-page config)))
:join? true}))) ;; (* 1000 (:refresh-delay config)))
(jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT)
:join? true})))
;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started
(defn repl-main (defn repl-main

View file

@ -22,8 +22,8 @@
[rss-thread-watch.utils :as ut]) [rss-thread-watch.utils :as ut])
(:gen-class)) (:gen-class))
(def boards-enabled-cache
(atom nil))
(defn new-guid-always (defn new-guid-always
"Generates always unique GUID for Feed item. "Generates always unique GUID for Feed item.
@ -48,9 +48,11 @@
(defn filter-chod-posts (defn filter-chod-posts
"Return list of all threads with equal or higher ChoD than requested "Return list of all threads with equal or higher ChoD than requested
READS FROM GLOBALS: watcher.time-of-cache" ;Todo: best thing would be to add timestamp to cache READS FROM GLOBALS: watcher.time-of-cache"
[query-vec chod-treshold repeat? cache] [query-vec chod-treshold repeat? board-cache]
(let [time-of-generation @watcher/time-of-cache
(let [{time-of-generation :time
cache :data} board-cache
guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation)) guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation))
update-only-guid) update-only-guid)
cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold)) cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold))
@ -73,9 +75,9 @@
(defn thread-to-rss-item (defn thread-to-rss-item
"If I wasnt retarded I could have made the cached version look like "If I wasnt retarded I could have made the cached version look like
rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says
[t] [t] ;TODO: oh Luna the hardcodes ;;RESUME
(let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it (let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it
{:title (format "%.2f%% - %s" (:chod t) (:title t)) {:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html ;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t)) :description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
:link link-url :link link-url
@ -97,9 +99,11 @@
READS FROM GLOBALS: READS FROM GLOBALS:
rss-thread-watch.watcher.chod-threads-cache rss-thread-watch.watcher.chod-threads-cache
rss-thread-watch.core.CONFIG" rss-thread-watch.watcher.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore
[rqst] [rqst]
(try (let [{{chod "chod" :or {chod "94"} (try (let [{{chod "chod"
board "board" :or {chod "94"
board (get @watcher/GLOBAL-CONFIG :default-board)}
:as prms} :params :as prms} :params
uri :uri} rqst uri :uri} rqst
qrs (prms "q") qrs (prms "q")
@ -110,19 +114,23 @@
chod)] chod)]
(try ;If we can't parse number from chod, use default 94 (try ;If we can't parse number from chod, use default 94
(if (or (vector? chod) (if (or (vector? chod)
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower that 60 TODO: don't hardcode this (<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this
60 (Integer/parseInt chod)) 60 (Integer/parseInt chod))
(catch Exception e (catch Exception e
94))) 94)))
cache @watcher/chod-threads-cache] cache @watcher/chod-threads-cache]
;; (println "RCVD: " rqst) (println "\n\nRCVD: " rqst)
(println rqst) ;; (println rqst)
;; ====== Errors ===== ;; ====== Errors =====
;; Something other than feed.xml requested ;; Something other than feed.xml requested
(when-not (s/ends-with? uri "feed.xml") (when-not (s/ends-with? uri "feed.xml")
(throw (ex-info "404" {:status 404 (throw (ex-info "404" {:status 404
:header {"Content-Type" "text/plain"} :header {"Content-Type" "text/plain"}
:body "404 This server has nothing but /feed.xml"}))) :body "404 This server has nothing but /feed.xml"})))
(when-not (contains? @boards-enabled-cache board)
(throw (ex-info "403" {:status 403
:header {"Content-Type" "text/plain"}
:body (get @watcher/GLOBAL-CONFIG :board-disabled-message)})))
;; No url params -> we redirect to documentation about params ;; No url params -> we redirect to documentation about params
(when (empty? prms) (when (empty? prms)
(throw (ex-info "302" (throw (ex-info "302"
@ -146,13 +154,15 @@
;; There shouldn't be any problems with this mime type but if there are ;; There shouldn't be any problems with this mime type but if there are
;; replace with "text/xml", or even better, get RSS reader that is not utter shit ;; replace with "text/xml", or even better, get RSS reader that is not utter shit
:header {"Content-Type" "application/rss+xml"} :header {"Content-Type" "application/rss+xml"}
:body (generate-feed queries real-chod repeat? cache)}) :body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @watcher/GLOBAL-CONFIG))})
(catch Exception e (catch Exception e
;; Ex-info has been crafted to match HTTP response body so we can send it ;; Ex-info has been crafted to match HTTP response body so we can send it
(if-let [caught (ex-data e)] (if-let [caught (ex-data e)]
caught ;We have custom crafted error caught ;We have custom crafted error
{:status 500 ;Something else fucked up, we print what happened (do
:header {"Content-Type" "text/plain"} (print "WTF??: " e)
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n" {:status 500 ;Something else fucked up, we print what happened
(ex-cause e) "\n" :header {"Content-Type" "text/plain"}
e)})))) :body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
(ex-cause e) "\n"
e)})))))

View file

@ -18,17 +18,23 @@
[clojure.data.json :as js]) [clojure.data.json :as js])
(:gen-class)) (:gen-class))
(def GLOBAL-CONFIG
"Global config with defaults for missing entires"
;; I know globals are ew in Clojure but I don't know any
;; better way of doing this
(atom nil))
(def chod-threads-cache (def chod-threads-cache
"Cached map of threads that have CHanceOfDeath > configured" "Cached map of threads that have CHanceOfDeath > configured"
nil) (atom {}))
(def time-of-cache nil) (defn generate-chod-cache-structure
"Generates initial structure for global cache
(defn init-global-cache Structure is returned, you have to set it yourself"
"Initializes global cache of catalogs"
[config] [config]
(keys (:boards-enabled config)) (let [ks (keys (:boards-enabled config))]
) (zipmap ks
(repeatedly (count ks) #(atom nil)))))
(defn process-page (defn process-page
"Procesess every thread in page, leaving only relevant information "Procesess every thread in page, leaving only relevant information
@ -50,27 +56,62 @@
(defn build-cache (defn build-cache
"Build cache of near-death threads so the values don't have to be recalculated on each request." "Build cache of near-death threads so the values don't have to be recalculated on each request."
[pages-to-index pages-total threads-per-page threads-total] [pages-to-index pages-total threads-per-page threads-total]
(vec (flatten (map (fn [single-page] {:time (System/currentTimeMillis)
;; We have to (dec page-number) bcs otherwise we would get the total number of threads :data (vec (flatten (map (fn [single-page]
;; including the whole page of threads ;; We have to (dec page-number) bcs otherwise we would get the total number of threads
(let [page-number (dec (:page single-page))] ; inc to get to the actuall page ;; including the whole page of threads
(process-page (:threads single-page) threads-total (inc (* page-number threads-per-page))))) (let [page-number (dec (:page single-page))] ; inc to get to the actuall page
pages-to-index)))) (process-page (:threads single-page) threads-total (inc (* page-number threads-per-page)))))
pages-to-index)))})
(defn update-thread-cache! (defn update-board-cache!
"Updates cache of near-death threads. Writes to chod-threads-cache as side effect. "Updates cache of near-death threads. Writes to chod-threads-cache as side effect.
[url] - Url to download data from [url] - Url to download data from
[starting-page] - From which page consider threads to be fit for near-death cache" [board] - Board to assign cached data to, it's existence is NOT checked here
[url starting-page] [starting-page] - From which page consider threads to be fit for near-death cache
;; Todo: surround with try so we can timeout and other stuff THIS FUNCTION WRITES TO chod-threads-cache
Returns :data part of [board] cache"
[url board starting-page]
;; Todo: surround with try so we can timeout, 40x and other stuff
(let [catalog (with-open [readr (io/reader url)] (let [catalog (with-open [readr (io/reader url)]
(js/read readr :key-fn keyword)) (js/read readr :key-fn keyword))
pages-total (count catalog) pages-total (count catalog)
;; universal calculation for total number of threads: ;; universal calculation for total number of threads:
;; (pages-total-1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages ;; (pages-total -1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages
threads-per-page (count (:threads (first catalog))) threads-per-page (count (:threads (first catalog)))
threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let
to-index (filter (fn [item] to-index (filter (fn [item]
(<= starting-page (:page item))) catalog)] (<= starting-page (:page item))) catalog)]
(reset! chod-threads-cache (build-cache to-index pages-total threads-per-page threads-total)) ;; TODO: there absolutely must be try catch for missing - not enabled boards,
(reset! time-of-cache (System/currentTimeMillis)))) ;; This will return nill and that fuck everything up
(reset! (get @chod-threads-cache board)
(build-cache to-index pages-total threads-per-page threads-total))))
(defn board-enabled?
"Checks whether board is enabled in config"
[board config]
(contains? board (keys (get config :boards-enabled))))
(defn get-board-url
"Gets board url from :target if "
[board config]
;; TODO: jesus, this needs sanitization and should be probably crafted by some URL class
(str (get-in config [:boards-enabled board :target]) board "catalog.json"))
(defn get-thread-data
"Gets thread cache for given board.
If board is lazy loaded, downloads new one if needed.
MAY CAUSE WRITE TO chod-thread-cache IF NECCESARRY"
[board config]
(let [refresh-rate (* 1000 (get-in config `(:boards-enabled ~board :refresh-rate)))
{data :data
time-downloaded :time
:or {time-downloaded 0}
:as board-atom } @(get @chod-threads-cache board)
;; TODO: This also makes it implictly lazy-load -> if disabled make the check here
time-to-update? (or (nil? board-atom)
(> (System/currentTimeMillis) (+ refresh-rate time-downloaded)))]
(if time-to-update?
(update-board-cache! (get-board-url "/mlp/" config) board (get-in config [:boards-enabled board :starting-page]))
@(get @chod-threads-cache board))))