Implement multiboard and lazyloading as result, added todos
And shitton of them
This commit is contained in:
parent
8c1cfbed33
commit
8a7981c27a
4 changed files with 111 additions and 57 deletions
|
@ -1,4 +1,4 @@
|
|||
(defproject rss-thread-watch "0.3.0-SNAPSHOT"
|
||||
(defproject rss-thread-watch "0.3.5-SNAPSHOT"
|
||||
:description "RSS based thread watcher"
|
||||
:url "http://example.com/FIXME"
|
||||
:license {:name "AGPL-3.0-only"
|
||||
|
|
|
@ -61,12 +61,13 @@
|
|||
"Fills every enabled board with default config values"
|
||||
[config]
|
||||
(let [defaults (:boards-defaults config)]
|
||||
(update-in config
|
||||
(dissoc (update-in config
|
||||
'(:boards-enabled)
|
||||
(fn [mp]
|
||||
(u/fmap (fn [k v]
|
||||
(u/map-apply-defaults v defaults))
|
||||
mp)))))
|
||||
mp)))
|
||||
:boards-defaults)))
|
||||
|
||||
(defn get-some-config
|
||||
"Attempts to get config somehow,
|
||||
|
@ -88,16 +89,18 @@
|
|||
"Entry point, starts webserver"
|
||||
[& args]
|
||||
;; Todo: Think of a way to start repeated download for every catalog efficiently
|
||||
(let [config (get-some-config args)
|
||||
expanded-config
|
||||
|
||||
]
|
||||
(let [config (get-some-config args)]
|
||||
;; Init the few globals we have
|
||||
(reset! watcher/GLOBAL-CONFIG config)
|
||||
(reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled))))
|
||||
(reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config))
|
||||
(println args)
|
||||
(System/exit 0)
|
||||
(set-interval (fn []
|
||||
(println "Starting cache update")
|
||||
(watcher/update-thread-cache! (:target config) (:starting-page config)))
|
||||
(* 1000 (:refresh-delay config)))
|
||||
(clojure.pprint/pprint config)
|
||||
;; Needs to be redone and probably removed from here
|
||||
;; (set-interval (fn []
|
||||
;; (println "Starting cache update")
|
||||
;; (watcher/update-board-cache! (:target config) (:starting-page config)))
|
||||
;; (* 1000 (:refresh-delay config)))
|
||||
(jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT)
|
||||
:join? true})))
|
||||
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
[rss-thread-watch.utils :as ut])
|
||||
(:gen-class))
|
||||
|
||||
|
||||
|
||||
(def boards-enabled-cache
|
||||
(atom nil))
|
||||
|
||||
(defn new-guid-always
|
||||
"Generates always unique GUID for Feed item.
|
||||
|
@ -48,9 +48,11 @@
|
|||
(defn filter-chod-posts
|
||||
"Return list of all threads with equal or higher ChoD than requested
|
||||
|
||||
READS FROM GLOBALS: watcher.time-of-cache" ;Todo: best thing would be to add timestamp to cache
|
||||
[query-vec chod-treshold repeat? cache]
|
||||
(let [time-of-generation @watcher/time-of-cache
|
||||
READS FROM GLOBALS: watcher.time-of-cache"
|
||||
[query-vec chod-treshold repeat? board-cache]
|
||||
|
||||
(let [{time-of-generation :time
|
||||
cache :data} board-cache
|
||||
guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation))
|
||||
update-only-guid)
|
||||
cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold))
|
||||
|
@ -73,9 +75,9 @@
|
|||
(defn thread-to-rss-item
|
||||
"If I wasnt retarded I could have made the cached version look like
|
||||
rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says
|
||||
[t]
|
||||
[t] ;TODO: oh Luna the hardcodes ;;RESUME
|
||||
(let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it
|
||||
{:title (format "%.2f%% - %s" (:chod t) (:title t))
|
||||
{:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response
|
||||
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
|
||||
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
|
||||
:link link-url
|
||||
|
@ -97,9 +99,11 @@
|
|||
|
||||
READS FROM GLOBALS:
|
||||
rss-thread-watch.watcher.chod-threads-cache
|
||||
rss-thread-watch.core.CONFIG"
|
||||
rss-thread-watch.watcher.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore
|
||||
[rqst]
|
||||
(try (let [{{chod "chod" :or {chod "94"}
|
||||
(try (let [{{chod "chod"
|
||||
board "board" :or {chod "94"
|
||||
board (get @watcher/GLOBAL-CONFIG :default-board)}
|
||||
:as prms} :params
|
||||
uri :uri} rqst
|
||||
qrs (prms "q")
|
||||
|
@ -110,19 +114,23 @@
|
|||
chod)]
|
||||
(try ;If we can't parse number from chod, use default 94
|
||||
(if (or (vector? chod)
|
||||
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower that 60 TODO: don't hardcode this
|
||||
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this
|
||||
60 (Integer/parseInt chod))
|
||||
(catch Exception e
|
||||
94)))
|
||||
cache @watcher/chod-threads-cache]
|
||||
;; (println "RCVD: " rqst)
|
||||
(println rqst)
|
||||
(println "\n\nRCVD: " rqst)
|
||||
;; (println rqst)
|
||||
;; ====== Errors =====
|
||||
;; Something other than feed.xml requested
|
||||
(when-not (s/ends-with? uri "feed.xml")
|
||||
(throw (ex-info "404" {:status 404
|
||||
:header {"Content-Type" "text/plain"}
|
||||
:body "404 This server has nothing but /feed.xml"})))
|
||||
(when-not (contains? @boards-enabled-cache board)
|
||||
(throw (ex-info "403" {:status 403
|
||||
:header {"Content-Type" "text/plain"}
|
||||
:body (get @watcher/GLOBAL-CONFIG :board-disabled-message)})))
|
||||
;; No url params -> we redirect to documentation about params
|
||||
(when (empty? prms)
|
||||
(throw (ex-info "302"
|
||||
|
@ -146,13 +154,15 @@
|
|||
;; There shouldn't be any problems with this mime type but if there are
|
||||
;; replace with "text/xml", or even better, get RSS reader that is not utter shit
|
||||
:header {"Content-Type" "application/rss+xml"}
|
||||
:body (generate-feed queries real-chod repeat? cache)})
|
||||
:body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @watcher/GLOBAL-CONFIG))})
|
||||
(catch Exception e
|
||||
;; Ex-info has been crafted to match HTTP response body so we can send it
|
||||
(if-let [caught (ex-data e)]
|
||||
caught ;We have custom crafted error
|
||||
(do
|
||||
(print "WTF??: " e)
|
||||
{:status 500 ;Something else fucked up, we print what happened
|
||||
:header {"Content-Type" "text/plain"}
|
||||
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
|
||||
(ex-cause e) "\n"
|
||||
e)}))))
|
||||
e)})))))
|
||||
|
|
|
@ -18,17 +18,23 @@
|
|||
[clojure.data.json :as js])
|
||||
(:gen-class))
|
||||
|
||||
(def GLOBAL-CONFIG
|
||||
"Global config with defaults for missing entires"
|
||||
;; I know globals are ew in Clojure but I don't know any
|
||||
;; better way of doing this
|
||||
(atom nil))
|
||||
|
||||
(def chod-threads-cache
|
||||
"Cached map of threads that have CHanceOfDeath > configured"
|
||||
nil)
|
||||
(atom {}))
|
||||
|
||||
(def time-of-cache nil)
|
||||
|
||||
(defn init-global-cache
|
||||
"Initializes global cache of catalogs"
|
||||
(defn generate-chod-cache-structure
|
||||
"Generates initial structure for global cache
|
||||
Structure is returned, you have to set it yourself"
|
||||
[config]
|
||||
(keys (:boards-enabled config))
|
||||
)
|
||||
(let [ks (keys (:boards-enabled config))]
|
||||
(zipmap ks
|
||||
(repeatedly (count ks) #(atom nil)))))
|
||||
|
||||
(defn process-page
|
||||
"Procesess every thread in page, leaving only relevant information
|
||||
|
@ -50,19 +56,23 @@
|
|||
(defn build-cache
|
||||
"Build cache of near-death threads so the values don't have to be recalculated on each request."
|
||||
[pages-to-index pages-total threads-per-page threads-total]
|
||||
(vec (flatten (map (fn [single-page]
|
||||
{:time (System/currentTimeMillis)
|
||||
:data (vec (flatten (map (fn [single-page]
|
||||
;; We have to (dec page-number) bcs otherwise we would get the total number of threads
|
||||
;; including the whole page of threads
|
||||
(let [page-number (dec (:page single-page))] ; inc to get to the actuall page
|
||||
(process-page (:threads single-page) threads-total (inc (* page-number threads-per-page)))))
|
||||
pages-to-index))))
|
||||
pages-to-index)))})
|
||||
|
||||
(defn update-thread-cache!
|
||||
(defn update-board-cache!
|
||||
"Updates cache of near-death threads. Writes to chod-threads-cache as side effect.
|
||||
[url] - Url to download data from
|
||||
[starting-page] - From which page consider threads to be fit for near-death cache"
|
||||
[url starting-page]
|
||||
;; Todo: surround with try so we can timeout and other stuff
|
||||
[board] - Board to assign cached data to, it's existence is NOT checked here
|
||||
[starting-page] - From which page consider threads to be fit for near-death cache
|
||||
THIS FUNCTION WRITES TO chod-threads-cache
|
||||
Returns :data part of [board] cache"
|
||||
[url board starting-page]
|
||||
;; Todo: surround with try so we can timeout, 40x and other stuff
|
||||
(let [catalog (with-open [readr (io/reader url)]
|
||||
(js/read readr :key-fn keyword))
|
||||
pages-total (count catalog)
|
||||
|
@ -72,5 +82,36 @@
|
|||
threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let
|
||||
to-index (filter (fn [item]
|
||||
(<= starting-page (:page item))) catalog)]
|
||||
(reset! chod-threads-cache (build-cache to-index pages-total threads-per-page threads-total))
|
||||
(reset! time-of-cache (System/currentTimeMillis))))
|
||||
;; TODO: there absolutely must be try catch for missing - not enabled boards,
|
||||
;; This will return nill and that fuck everything up
|
||||
(reset! (get @chod-threads-cache board)
|
||||
(build-cache to-index pages-total threads-per-page threads-total))))
|
||||
|
||||
(defn board-enabled?
|
||||
"Checks whether board is enabled in config"
|
||||
[board config]
|
||||
(contains? board (keys (get config :boards-enabled))))
|
||||
|
||||
(defn get-board-url
|
||||
"Gets board url from :target if "
|
||||
[board config]
|
||||
;; TODO: jesus, this needs sanitization and should be probably crafted by some URL class
|
||||
(str (get-in config [:boards-enabled board :target]) board "catalog.json"))
|
||||
|
||||
(defn get-thread-data
|
||||
"Gets thread cache for given board.
|
||||
If board is lazy loaded, downloads new one if needed.
|
||||
|
||||
MAY CAUSE WRITE TO chod-thread-cache IF NECCESARRY"
|
||||
[board config]
|
||||
(let [refresh-rate (* 1000 (get-in config `(:boards-enabled ~board :refresh-rate)))
|
||||
{data :data
|
||||
time-downloaded :time
|
||||
:or {time-downloaded 0}
|
||||
:as board-atom } @(get @chod-threads-cache board)
|
||||
;; TODO: This also makes it implictly lazy-load -> if disabled make the check here
|
||||
time-to-update? (or (nil? board-atom)
|
||||
(> (System/currentTimeMillis) (+ refresh-rate time-downloaded)))]
|
||||
(if time-to-update?
|
||||
(update-board-cache! (get-board-url "/mlp/" config) board (get-in config [:boards-enabled board :starting-page]))
|
||||
@(get @chod-threads-cache board))))
|
||||
|
|
Loading…
Reference in a new issue