2024-01-19 00:55:42 +01:00
|
|
|
;; Copyright (C) 2024 Felisp
|
2023-12-23 02:19:29 +01:00
|
|
|
;;
|
|
|
|
;; This program is free software: you can redistribute it and/or modify
|
|
|
|
;; it under the terms of the GNU Affero General Public License as published by
|
|
|
|
;; the Free Software Foundation, version 3 of the License.
|
|
|
|
;;
|
|
|
|
;; This program is distributed in the hope that it will be useful,
|
|
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;; GNU Affero General Public License for more details.
|
|
|
|
;;
|
|
|
|
;; You should have received a copy of the GNU Affero General Public License
|
|
|
|
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
(ns rss-thread-watch.feed-generator
|
2023-12-24 00:20:11 +01:00
|
|
|
"Generates feeds for requests"
|
2023-12-23 02:19:29 +01:00
|
|
|
(:require [ring.middleware.params :as rp]
|
2023-12-24 02:41:53 +01:00
|
|
|
[ring.util.response :as response]
|
2023-12-23 02:19:29 +01:00
|
|
|
[clj-rss.core :as rss]
|
|
|
|
[clojure.string :as s]
|
2024-01-05 01:45:41 +01:00
|
|
|
[rss-thread-watch.watcher :as watcher]
|
2024-09-10 17:15:49 +02:00
|
|
|
[rss-thread-watch.utils :as ut]
|
|
|
|
[rss-thread-watch.config :as conf])
|
2023-12-23 02:19:29 +01:00
|
|
|
(:gen-class))
|
|
|
|
|
2024-07-30 02:55:15 +02:00
|
|
|
(def boards-enabled-cache
|
|
|
|
(atom nil))
|
2023-12-23 02:19:29 +01:00
|
|
|
|
|
|
|
(defn new-guid-always
|
|
|
|
"Generates always unique GUID for Feed item.
|
|
|
|
This is used when user wants notification on every server update,
|
|
|
|
no matter whether thread actually changed it's position.
|
|
|
|
|
|
|
|
This is done by always making new GUID - (concat thread-number UNIX-time-of-data-update)"
|
|
|
|
[thread time]
|
2023-12-23 03:24:38 +01:00
|
|
|
(assoc thread :guid (str (:no thread)
|
2023-12-23 02:19:29 +01:00
|
|
|
"-"
|
|
|
|
time)))
|
|
|
|
|
2024-08-15 17:29:56 +02:00
|
|
|
(defn new-guid-paranoid
|
|
|
|
"Generate unique GUID on EVERY request to the feed.
|
|
|
|
|
|
|
|
Usefull for when you really don't want thread to die.
|
|
|
|
By making new GUID on every request you can be sure that what you're
|
|
|
|
seeing in your feed reader isn't cached"
|
|
|
|
[thread]
|
|
|
|
(new-guid-always thread (System/currentTimeMillis)))
|
|
|
|
|
2023-12-23 02:19:29 +01:00
|
|
|
(defn update-only-guid
|
|
|
|
"Generates new GUID for feed item ONLY if the threads ChoD increased
|
|
|
|
|
|
|
|
This is done by concating thread-number and it's rounded chod"
|
|
|
|
[thread]
|
2024-09-10 16:56:41 +02:00
|
|
|
(assoc thread :guid (format "%d-%d-%.2f"
|
2023-12-23 02:19:29 +01:00
|
|
|
(:no thread)
|
2024-09-10 16:56:41 +02:00
|
|
|
(:last-modified thread)
|
2023-12-24 00:20:11 +01:00
|
|
|
(:chod thread))))
|
2023-12-23 02:19:29 +01:00
|
|
|
|
|
|
|
(defn filter-chod-posts
|
2023-12-28 14:39:27 +01:00
|
|
|
"Return list of all threads with equal or higher ChoD than requested
|
|
|
|
|
2024-07-30 02:55:15 +02:00
|
|
|
READS FROM GLOBALS: watcher.time-of-cache"
|
|
|
|
[query-vec chod-treshold repeat? board-cache]
|
|
|
|
|
|
|
|
(let [{time-of-generation :time
|
|
|
|
cache :data} board-cache
|
2024-08-15 17:29:56 +02:00
|
|
|
guid-fn (case repeat?
|
|
|
|
"paranoid" new-guid-paranoid
|
|
|
|
"true" (fn [x] (new-guid-always x time-of-generation))
|
|
|
|
update-only-guid)
|
2024-01-05 01:45:41 +01:00
|
|
|
cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold))
|
2023-12-24 00:20:11 +01:00
|
|
|
cache))
|
2023-12-23 02:19:29 +01:00
|
|
|
;; So we don't have to search thru everything we have cached
|
2023-12-28 14:39:27 +01:00
|
|
|
needed-cache-part (subvec cache cache-start-index)
|
2023-12-23 02:19:29 +01:00
|
|
|
actuall-matches (keep (fn [t]
|
|
|
|
(let [title (:title t)]
|
2023-12-23 03:24:38 +01:00
|
|
|
;; Todo: Man, wouldn't it be cool to know which querry matched the thread?
|
|
|
|
;; Would be so much easier for user to figure out why is it showing
|
|
|
|
;; and it would solve the problem of super long titles (or OPs instead of titles)
|
2023-12-23 02:19:29 +01:00
|
|
|
(when (some (fn [querry]
|
2024-07-28 18:39:32 +02:00
|
|
|
(s/includes? (s/lower-case title) (s/lower-case querry)))
|
2023-12-23 02:19:29 +01:00
|
|
|
query-vec)
|
|
|
|
t)))
|
2023-12-24 02:09:59 +01:00
|
|
|
(reverse needed-cache-part))]
|
2023-12-23 02:19:29 +01:00
|
|
|
;; Finally generate and append GUIDs
|
|
|
|
(map guid-fn actuall-matches)))
|
|
|
|
|
2023-12-23 03:24:38 +01:00
|
|
|
(defn thread-to-rss-item
|
2024-08-15 20:46:17 +02:00
|
|
|
"Converts cached thread item to feed item which can be serialized into RSS"
|
2024-08-20 22:43:57 +02:00
|
|
|
[t host board]
|
2024-09-10 16:34:31 +02:00
|
|
|
(let [link-url (s/replace host "{threadnum}" (str (:no t)))] ;Hardcode emergency bugfix
|
2024-07-30 02:55:15 +02:00
|
|
|
{:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response
|
2023-12-23 03:24:38 +01:00
|
|
|
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
|
|
|
|
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
|
|
|
|
:link link-url
|
|
|
|
:guid (:guid t)}))
|
|
|
|
|
|
|
|
(defn generate-feed
|
2023-12-24 00:20:11 +01:00
|
|
|
"Generates feed from matching items"
|
2024-08-15 20:46:17 +02:00
|
|
|
[query-vec chod-treshold repeat? cache board-config]
|
2023-12-24 00:20:11 +01:00
|
|
|
(let [items (filter-chod-posts query-vec chod-treshold repeat? cache)
|
2024-09-10 19:59:29 +02:00
|
|
|
served-filename (get @conf/GLOBAL-CONFIG :served-filename)
|
2024-09-10 17:15:49 +02:00
|
|
|
head {:title (str "RSS Thread watcher v" conf/VERSION)
|
2024-09-10 19:59:29 +02:00
|
|
|
;; TODO: hardcoded homepage
|
|
|
|
:link (str "https://tools.treebrary.org/thread-watcher/" served-filename)
|
|
|
|
:feed-url (str "https://tools.treebrary.org/thread-watcher/" served-filename)
|
2023-12-23 03:24:38 +01:00
|
|
|
:description "RSS based thread watcher"}
|
2024-08-20 22:43:57 +02:00
|
|
|
body (map #(thread-to-rss-item
|
|
|
|
%1
|
2024-08-22 02:55:21 +02:00
|
|
|
(get board-config :host)
|
2024-08-20 22:43:57 +02:00
|
|
|
(get board-config :name)) items)]
|
2023-12-23 03:24:38 +01:00
|
|
|
(rss/channel-xml head body)))
|
|
|
|
|
2023-12-23 02:19:29 +01:00
|
|
|
(defn http-handler
|
|
|
|
"Handles HTTP requests, returns generated feed
|
|
|
|
|
|
|
|
READS FROM GLOBALS:
|
|
|
|
rss-thread-watch.watcher.chod-threads-cache
|
2024-09-10 17:15:49 +02:00
|
|
|
rss-thread-watch.config.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore
|
2023-12-23 02:19:29 +01:00
|
|
|
[rqst]
|
2024-09-10 19:59:29 +02:00
|
|
|
(try (let [served-filename (get @conf/GLOBAL-CONFIG :served-filename)
|
|
|
|
{{chod "chod"
|
2024-08-15 17:29:56 +02:00
|
|
|
board "board"
|
|
|
|
repeat? "repeat" :or {chod "94"
|
2024-09-10 17:15:49 +02:00
|
|
|
board (get @conf/GLOBAL-CONFIG :default-board)
|
2024-08-15 17:29:56 +02:00
|
|
|
repeat? false}
|
2023-12-24 00:20:52 +01:00
|
|
|
:as prms} :params
|
|
|
|
uri :uri} rqst
|
2023-12-28 10:02:10 +01:00
|
|
|
qrs (prms "q")
|
|
|
|
queries (if (vector? qrs) qrs [qrs]) ; to always return vector
|
|
|
|
real-chod (if-let [ch (or (and (vector? chod)
|
|
|
|
(first chod))
|
|
|
|
chod)]
|
|
|
|
(try ;If we can't parse number from chod, use default 94
|
|
|
|
(if (or (vector? chod)
|
2024-09-10 16:34:31 +02:00
|
|
|
;; TODO: Do we seriously parse this twice?
|
2024-07-30 02:55:15 +02:00
|
|
|
(<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this
|
2023-12-28 10:02:10 +01:00
|
|
|
60 (Integer/parseInt chod))
|
|
|
|
(catch Exception e
|
|
|
|
94)))
|
2024-09-10 17:15:49 +02:00
|
|
|
board-config (get-in @conf/GLOBAL-CONFIG [:boards-enabled board])
|
2023-12-24 01:21:02 +01:00
|
|
|
cache @watcher/chod-threads-cache]
|
2024-07-30 02:55:15 +02:00
|
|
|
(println "\n\nRCVD: " rqst)
|
|
|
|
;; (println rqst)
|
2023-12-24 00:20:52 +01:00
|
|
|
;; ====== Errors =====
|
2024-09-10 19:59:29 +02:00
|
|
|
;; Something other than $served-filename requested
|
|
|
|
(when-not (s/ends-with? uri served-filename)
|
2023-12-24 00:20:52 +01:00
|
|
|
(throw (ex-info "404" {:status 404
|
|
|
|
:header {"Content-Type" "text/plain"}
|
2024-09-10 19:59:29 +02:00
|
|
|
:body (str "404 This server has nothing but " served-filename)})))
|
2024-07-30 02:55:15 +02:00
|
|
|
(when-not (contains? @boards-enabled-cache board)
|
|
|
|
(throw (ex-info "403" {:status 403
|
|
|
|
:header {"Content-Type" "text/plain"}
|
2024-09-10 17:15:49 +02:00
|
|
|
:body (get @conf/GLOBAL-CONFIG :board-disabled-message)})))
|
2023-12-24 02:41:53 +01:00
|
|
|
;; No url params -> we redirect to documentation about params
|
|
|
|
(when (empty? prms)
|
2024-09-10 19:59:29 +02:00
|
|
|
(throw (ex-info "302" ;TODO: remove hardcode redirect
|
2023-12-24 02:41:53 +01:00
|
|
|
(response/redirect "https://git.treebrary.org/Treebrary.org/rss-thread-watcher#headline-4"))))
|
|
|
|
|
2023-12-24 00:20:52 +01:00
|
|
|
;; No querry specified - don't know what to search for
|
|
|
|
(when-not (prms "q")
|
|
|
|
(throw (ex-info "400" {:status 400
|
|
|
|
:header {"Content-Type" "text/plain"}
|
|
|
|
:body (str "400 You MUST specify query with one OR more'q=searchTerm' url parameter(s)\n\n\n"
|
2024-09-10 19:59:29 +02:00
|
|
|
"Exmple: '" served-filename "?q=pony&q=IWTCIRD' will show in your feed all threads with 'pony' or 'IWTCIRD'"
|
2023-12-24 00:20:52 +01:00
|
|
|
" in their title that are about to die.")})))
|
2023-12-24 01:21:02 +01:00
|
|
|
;; Whether cache has been generated yet
|
2023-12-24 00:20:52 +01:00
|
|
|
(when (empty? cache)
|
|
|
|
(throw (ex-info "503" {:status 503
|
|
|
|
:header {"Content-Type" "text/plain"}
|
|
|
|
:body (str "503 Service Unavailable\n"
|
|
|
|
"Cache is empty, cannot generate feed. Try again later, it may work.")})))
|
|
|
|
;; ==== Everything good ====
|
2023-12-24 01:21:02 +01:00
|
|
|
{:status 200
|
|
|
|
;; There shouldn't be any problems with this mime type but if there are
|
|
|
|
;; replace with "text/xml", or even better, get RSS reader that is not utter shit
|
|
|
|
:header {"Content-Type" "application/rss+xml"}
|
2024-09-10 17:15:49 +02:00
|
|
|
:body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @conf/GLOBAL-CONFIG) board-config)})
|
2023-12-24 00:20:52 +01:00
|
|
|
(catch Exception e
|
|
|
|
;; Ex-info has been crafted to match HTTP response body so we can send it
|
2023-12-28 10:02:10 +01:00
|
|
|
(if-let [caught (ex-data e)]
|
2023-12-24 00:20:52 +01:00
|
|
|
caught ;We have custom crafted error
|
2024-07-30 02:55:15 +02:00
|
|
|
(do
|
|
|
|
(print "WTF??: " e)
|
|
|
|
{:status 500 ;Something else fucked up, we print what happened
|
|
|
|
:header {"Content-Type" "text/plain"}
|
|
|
|
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
|
|
|
|
(ex-cause e) "\n"
|
|
|
|
e)})))))
|