Compare commits
2 commits
98a58ca1c3
...
8a7981c27a
Author | SHA1 | Date | |
---|---|---|---|
8a7981c27a | |||
8c1cfbed33 |
4 changed files with 114 additions and 59 deletions
|
@ -1,4 +1,4 @@
|
||||||
(defproject rss-thread-watch "0.3.0-SNAPSHOT"
|
(defproject rss-thread-watch "0.3.5-SNAPSHOT"
|
||||||
:description "RSS based thread watcher"
|
:description "RSS based thread watcher"
|
||||||
:url "http://example.com/FIXME"
|
:url "http://example.com/FIXME"
|
||||||
:license {:name "AGPL-3.0-only"
|
:license {:name "AGPL-3.0-only"
|
||||||
|
|
|
@ -29,10 +29,11 @@
|
||||||
:default-board "/mlp/"
|
:default-board "/mlp/"
|
||||||
:enable-board-listing true
|
:enable-board-listing true
|
||||||
:board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact] and I may enable it for you"
|
:board-disabled-message "This board is not enabled for feed generation.\n\nYou can contact me here: [contact] and I may enable it for you"
|
||||||
:target "https://api.4chan.org"
|
|
||||||
:boards-defaults {:refresh-rate 300
|
:boards-defaults {:refresh-rate 300
|
||||||
:starting-page 7
|
:starting-page 7
|
||||||
:default-chod 94}
|
:default-chod 94
|
||||||
|
:target "https://api.4chan.org"
|
||||||
|
:lazy-load true}
|
||||||
:boards-enabled {"/mlp/" {}
|
:boards-enabled {"/mlp/" {}
|
||||||
"/g/" {}}})
|
"/g/" {}}})
|
||||||
|
|
||||||
|
@ -60,12 +61,13 @@
|
||||||
"Fills every enabled board with default config values"
|
"Fills every enabled board with default config values"
|
||||||
[config]
|
[config]
|
||||||
(let [defaults (:boards-defaults config)]
|
(let [defaults (:boards-defaults config)]
|
||||||
(update-in config
|
(dissoc (update-in config
|
||||||
'(:boards-enabled)
|
'(:boards-enabled)
|
||||||
(fn [mp]
|
(fn [mp]
|
||||||
(u/fmap (fn [k v]
|
(u/fmap (fn [k v]
|
||||||
(u/map-apply-defaults v defaults))
|
(u/map-apply-defaults v defaults))
|
||||||
mp)))))
|
mp)))
|
||||||
|
:boards-defaults)))
|
||||||
|
|
||||||
(defn get-some-config
|
(defn get-some-config
|
||||||
"Attempts to get config somehow,
|
"Attempts to get config somehow,
|
||||||
|
@ -87,18 +89,20 @@
|
||||||
"Entry point, starts webserver"
|
"Entry point, starts webserver"
|
||||||
[& args]
|
[& args]
|
||||||
;; Todo: Think of a way to start repeated download for every catalog efficiently
|
;; Todo: Think of a way to start repeated download for every catalog efficiently
|
||||||
(let [config (get-some-config args)
|
(let [config (get-some-config args)]
|
||||||
expanded-config
|
;; Init the few globals we have
|
||||||
|
(reset! watcher/GLOBAL-CONFIG config)
|
||||||
]
|
(reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled))))
|
||||||
(println args)
|
(reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config))
|
||||||
(System/exit 0)
|
(println args)
|
||||||
(set-interval (fn []
|
(clojure.pprint/pprint config)
|
||||||
(println "Starting cache update")
|
;; Needs to be redone and probably removed from here
|
||||||
(watcher/update-thread-cache! (:target config) (:starting-page config)))
|
;; (set-interval (fn []
|
||||||
(* 1000 (:refresh-delay config)))
|
;; (println "Starting cache update")
|
||||||
(jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT)
|
;; (watcher/update-board-cache! (:target config) (:starting-page config)))
|
||||||
:join? true})))
|
;; (* 1000 (:refresh-delay config)))
|
||||||
|
(jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port CONFIG-DEFAULT)
|
||||||
|
:join? true})))
|
||||||
|
|
||||||
;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started
|
;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started
|
||||||
(defn repl-main
|
(defn repl-main
|
||||||
|
|
|
@ -22,8 +22,8 @@
|
||||||
[rss-thread-watch.utils :as ut])
|
[rss-thread-watch.utils :as ut])
|
||||||
(:gen-class))
|
(:gen-class))
|
||||||
|
|
||||||
|
(def boards-enabled-cache
|
||||||
|
(atom nil))
|
||||||
|
|
||||||
(defn new-guid-always
|
(defn new-guid-always
|
||||||
"Generates always unique GUID for Feed item.
|
"Generates always unique GUID for Feed item.
|
||||||
|
@ -48,9 +48,11 @@
|
||||||
(defn filter-chod-posts
|
(defn filter-chod-posts
|
||||||
"Return list of all threads with equal or higher ChoD than requested
|
"Return list of all threads with equal or higher ChoD than requested
|
||||||
|
|
||||||
READS FROM GLOBALS: watcher.time-of-cache" ;Todo: best thing would be to add timestamp to cache
|
READS FROM GLOBALS: watcher.time-of-cache"
|
||||||
[query-vec chod-treshold repeat? cache]
|
[query-vec chod-treshold repeat? board-cache]
|
||||||
(let [time-of-generation @watcher/time-of-cache
|
|
||||||
|
(let [{time-of-generation :time
|
||||||
|
cache :data} board-cache
|
||||||
guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation))
|
guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation))
|
||||||
update-only-guid)
|
update-only-guid)
|
||||||
cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold))
|
cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold))
|
||||||
|
@ -73,9 +75,9 @@
|
||||||
(defn thread-to-rss-item
|
(defn thread-to-rss-item
|
||||||
"If I wasnt retarded I could have made the cached version look like
|
"If I wasnt retarded I could have made the cached version look like
|
||||||
rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says
|
rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says
|
||||||
[t]
|
[t] ;TODO: oh Luna the hardcodes ;;RESUME
|
||||||
(let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it
|
(let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it
|
||||||
{:title (format "%.2f%% - %s" (:chod t) (:title t))
|
{:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response
|
||||||
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
|
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
|
||||||
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
|
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
|
||||||
:link link-url
|
:link link-url
|
||||||
|
@ -97,9 +99,11 @@
|
||||||
|
|
||||||
READS FROM GLOBALS:
|
READS FROM GLOBALS:
|
||||||
rss-thread-watch.watcher.chod-threads-cache
|
rss-thread-watch.watcher.chod-threads-cache
|
||||||
rss-thread-watch.core.CONFIG"
|
rss-thread-watch.watcher.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore
|
||||||
[rqst]
|
[rqst]
|
||||||
(try (let [{{chod "chod" :or {chod "94"}
|
(try (let [{{chod "chod"
|
||||||
|
board "board" :or {chod "94"
|
||||||
|
board (get @watcher/GLOBAL-CONFIG :default-board)}
|
||||||
:as prms} :params
|
:as prms} :params
|
||||||
uri :uri} rqst
|
uri :uri} rqst
|
||||||
qrs (prms "q")
|
qrs (prms "q")
|
||||||
|
@ -110,19 +114,23 @@
|
||||||
chod)]
|
chod)]
|
||||||
(try ;If we can't parse number from chod, use default 94
|
(try ;If we can't parse number from chod, use default 94
|
||||||
(if (or (vector? chod)
|
(if (or (vector? chod)
|
||||||
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower that 60 TODO: don't hardcode this
|
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this
|
||||||
60 (Integer/parseInt chod))
|
60 (Integer/parseInt chod))
|
||||||
(catch Exception e
|
(catch Exception e
|
||||||
94)))
|
94)))
|
||||||
cache @watcher/chod-threads-cache]
|
cache @watcher/chod-threads-cache]
|
||||||
;; (println "RCVD: " rqst)
|
(println "\n\nRCVD: " rqst)
|
||||||
(println rqst)
|
;; (println rqst)
|
||||||
;; ====== Errors =====
|
;; ====== Errors =====
|
||||||
;; Something other than feed.xml requested
|
;; Something other than feed.xml requested
|
||||||
(when-not (s/ends-with? uri "feed.xml")
|
(when-not (s/ends-with? uri "feed.xml")
|
||||||
(throw (ex-info "404" {:status 404
|
(throw (ex-info "404" {:status 404
|
||||||
:header {"Content-Type" "text/plain"}
|
:header {"Content-Type" "text/plain"}
|
||||||
:body "404 This server has nothing but /feed.xml"})))
|
:body "404 This server has nothing but /feed.xml"})))
|
||||||
|
(when-not (contains? @boards-enabled-cache board)
|
||||||
|
(throw (ex-info "403" {:status 403
|
||||||
|
:header {"Content-Type" "text/plain"}
|
||||||
|
:body (get @watcher/GLOBAL-CONFIG :board-disabled-message)})))
|
||||||
;; No url params -> we redirect to documentation about params
|
;; No url params -> we redirect to documentation about params
|
||||||
(when (empty? prms)
|
(when (empty? prms)
|
||||||
(throw (ex-info "302"
|
(throw (ex-info "302"
|
||||||
|
@ -146,13 +154,15 @@
|
||||||
;; There shouldn't be any problems with this mime type but if there are
|
;; There shouldn't be any problems with this mime type but if there are
|
||||||
;; replace with "text/xml", or even better, get RSS reader that is not utter shit
|
;; replace with "text/xml", or even better, get RSS reader that is not utter shit
|
||||||
:header {"Content-Type" "application/rss+xml"}
|
:header {"Content-Type" "application/rss+xml"}
|
||||||
:body (generate-feed queries real-chod repeat? cache)})
|
:body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @watcher/GLOBAL-CONFIG))})
|
||||||
(catch Exception e
|
(catch Exception e
|
||||||
;; Ex-info has been crafted to match HTTP response body so we can send it
|
;; Ex-info has been crafted to match HTTP response body so we can send it
|
||||||
(if-let [caught (ex-data e)]
|
(if-let [caught (ex-data e)]
|
||||||
caught ;We have custom crafted error
|
caught ;We have custom crafted error
|
||||||
{:status 500 ;Something else fucked up, we print what happened
|
(do
|
||||||
:header {"Content-Type" "text/plain"}
|
(print "WTF??: " e)
|
||||||
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
|
{:status 500 ;Something else fucked up, we print what happened
|
||||||
(ex-cause e) "\n"
|
:header {"Content-Type" "text/plain"}
|
||||||
e)}))))
|
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
|
||||||
|
(ex-cause e) "\n"
|
||||||
|
e)})))))
|
||||||
|
|
|
@ -18,17 +18,23 @@
|
||||||
[clojure.data.json :as js])
|
[clojure.data.json :as js])
|
||||||
(:gen-class))
|
(:gen-class))
|
||||||
|
|
||||||
|
(def GLOBAL-CONFIG
|
||||||
|
"Global config with defaults for missing entires"
|
||||||
|
;; I know globals are ew in Clojure but I don't know any
|
||||||
|
;; better way of doing this
|
||||||
|
(atom nil))
|
||||||
|
|
||||||
(def chod-threads-cache
|
(def chod-threads-cache
|
||||||
"Cached map of threads that have CHanceOfDeath > configured"
|
"Cached map of threads that have CHanceOfDeath > configured"
|
||||||
nil)
|
(atom {}))
|
||||||
|
|
||||||
(def time-of-cache nil)
|
(defn generate-chod-cache-structure
|
||||||
|
"Generates initial structure for global cache
|
||||||
(defn init-global-cache
|
Structure is returned, you have to set it yourself"
|
||||||
"Initializes global cache of catalogs"
|
|
||||||
[config]
|
[config]
|
||||||
(keys (:boards-enabled config))
|
(let [ks (keys (:boards-enabled config))]
|
||||||
)
|
(zipmap ks
|
||||||
|
(repeatedly (count ks) #(atom nil)))))
|
||||||
|
|
||||||
(defn process-page
|
(defn process-page
|
||||||
"Procesess every thread in page, leaving only relevant information
|
"Procesess every thread in page, leaving only relevant information
|
||||||
|
@ -50,27 +56,62 @@
|
||||||
(defn build-cache
|
(defn build-cache
|
||||||
"Build cache of near-death threads so the values don't have to be recalculated on each request."
|
"Build cache of near-death threads so the values don't have to be recalculated on each request."
|
||||||
[pages-to-index pages-total threads-per-page threads-total]
|
[pages-to-index pages-total threads-per-page threads-total]
|
||||||
(vec (flatten (map (fn [single-page]
|
{:time (System/currentTimeMillis)
|
||||||
;; We have to (dec page-number) bcs otherwise we would get the total number of threads
|
:data (vec (flatten (map (fn [single-page]
|
||||||
;; including the whole page of threads
|
;; We have to (dec page-number) bcs otherwise we would get the total number of threads
|
||||||
(let [page-number (dec (:page single-page))] ; inc to get to the actuall page
|
;; including the whole page of threads
|
||||||
(process-page (:threads single-page) threads-total (inc (* page-number threads-per-page)))))
|
(let [page-number (dec (:page single-page))] ; inc to get to the actuall page
|
||||||
pages-to-index))))
|
(process-page (:threads single-page) threads-total (inc (* page-number threads-per-page)))))
|
||||||
|
pages-to-index)))})
|
||||||
|
|
||||||
(defn update-thread-cache!
|
(defn update-board-cache!
|
||||||
"Updates cache of near-death threads. Writes to chod-threads-cache as side effect.
|
"Updates cache of near-death threads. Writes to chod-threads-cache as side effect.
|
||||||
[url] - Url to download data from
|
[url] - Url to download data from
|
||||||
[starting-page] - From which page consider threads to be fit for near-death cache"
|
[board] - Board to assign cached data to, it's existence is NOT checked here
|
||||||
[url starting-page]
|
[starting-page] - From which page consider threads to be fit for near-death cache
|
||||||
;; Todo: surround with try so we can timeout and other stuff
|
THIS FUNCTION WRITES TO chod-threads-cache
|
||||||
|
Returns :data part of [board] cache"
|
||||||
|
[url board starting-page]
|
||||||
|
;; Todo: surround with try so we can timeout, 40x and other stuff
|
||||||
(let [catalog (with-open [readr (io/reader url)]
|
(let [catalog (with-open [readr (io/reader url)]
|
||||||
(js/read readr :key-fn keyword))
|
(js/read readr :key-fn keyword))
|
||||||
pages-total (count catalog)
|
pages-total (count catalog)
|
||||||
;; universal calculation for total number of threads:
|
;; universal calculation for total number of threads:
|
||||||
;; (pages-total-1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages
|
;; (pages-total -1) * threadsPerPage + threadsOnLastpage ;;accounts for boards which have stickied threads making them have 11pages
|
||||||
threads-per-page (count (:threads (first catalog)))
|
threads-per-page (count (:threads (first catalog)))
|
||||||
threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let
|
threads-total (+ (* threads-per-page (dec pages-total)) (count (:threads (last catalog)))) ;; Todo: Yeah, maybe this calculation could be refactored into let
|
||||||
to-index (filter (fn [item]
|
to-index (filter (fn [item]
|
||||||
(<= starting-page (:page item))) catalog)]
|
(<= starting-page (:page item))) catalog)]
|
||||||
(reset! chod-threads-cache (build-cache to-index pages-total threads-per-page threads-total))
|
;; TODO: there absolutely must be try catch for missing - not enabled boards,
|
||||||
(reset! time-of-cache (System/currentTimeMillis))))
|
;; This will return nill and that fuck everything up
|
||||||
|
(reset! (get @chod-threads-cache board)
|
||||||
|
(build-cache to-index pages-total threads-per-page threads-total))))
|
||||||
|
|
||||||
|
(defn board-enabled?
|
||||||
|
"Checks whether board is enabled in config"
|
||||||
|
[board config]
|
||||||
|
(contains? board (keys (get config :boards-enabled))))
|
||||||
|
|
||||||
|
(defn get-board-url
|
||||||
|
"Gets board url from :target if "
|
||||||
|
[board config]
|
||||||
|
;; TODO: jesus, this needs sanitization and should be probably crafted by some URL class
|
||||||
|
(str (get-in config [:boards-enabled board :target]) board "catalog.json"))
|
||||||
|
|
||||||
|
(defn get-thread-data
|
||||||
|
"Gets thread cache for given board.
|
||||||
|
If board is lazy loaded, downloads new one if needed.
|
||||||
|
|
||||||
|
MAY CAUSE WRITE TO chod-thread-cache IF NECCESARRY"
|
||||||
|
[board config]
|
||||||
|
(let [refresh-rate (* 1000 (get-in config `(:boards-enabled ~board :refresh-rate)))
|
||||||
|
{data :data
|
||||||
|
time-downloaded :time
|
||||||
|
:or {time-downloaded 0}
|
||||||
|
:as board-atom } @(get @chod-threads-cache board)
|
||||||
|
;; TODO: This also makes it implictly lazy-load -> if disabled make the check here
|
||||||
|
time-to-update? (or (nil? board-atom)
|
||||||
|
(> (System/currentTimeMillis) (+ refresh-rate time-downloaded)))]
|
||||||
|
(if time-to-update?
|
||||||
|
(update-board-cache! (get-board-url "/mlp/" config) board (get-in config [:boards-enabled board :starting-page]))
|
||||||
|
@(get @chod-threads-cache board))))
|
||||||
|
|
Loading…
Reference in a new issue