;; Copyright (C) 2024 Felisp ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as published by ;; the Free Software Foundation, version 3 of the License. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU Affero General Public License for more details. ;; ;; You should have received a copy of the GNU Affero General Public License ;; along with this program. If not, see . (ns rss-thread-watch.feed-generator "Generates feeds for requests" (:require [ring.middleware.params :as rp] [ring.util.response :as response] [clj-rss.core :as rss] [clojure.string :as s] [rss-thread-watch.watcher :as watcher] [rss-thread-watch.utils :as ut] [rss-thread-watch.config :as conf]) (:gen-class)) (def boards-enabled-cache (atom nil)) (defn new-guid-always "Generates always unique GUID for Feed item. This is used when user wants notification on every server update, no matter whether thread actually changed it's position. This is done by always making new GUID - (concat thread-number UNIX-time-of-data-update)" [thread time] (assoc thread :guid (str (:no thread) "-" time))) (defn new-guid-paranoid "Generate unique GUID on EVERY request to the feed. Usefull for when you really don't want thread to die. By making new GUID on every request you can be sure that what you're seeing in your feed reader isn't cached" [thread] (new-guid-always thread (System/currentTimeMillis))) (defn update-only-guid "Generates new GUID for feed item ONLY if the threads ChoD increased This is done by concating thread-number and it's rounded chod" [thread] (assoc thread :guid (format "%d-%d-%.2f" (:no thread) (:last-modified thread) (:chod thread)))) (defn filter-chod-posts "Return list of all threads with equal or higher ChoD than requested READS FROM GLOBALS: watcher.time-of-cache" [query-vec chod-treshold repeat? board-cache] (let [{time-of-generation :time cache :data} board-cache guid-fn (case repeat? "paranoid" new-guid-paranoid "true" (fn [x] (new-guid-always x time-of-generation)) update-only-guid) cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold)) cache)) ;; So we don't have to search thru everything we have cached needed-cache-part (subvec cache cache-start-index) actuall-matches (keep (fn [t] (let [title (:title t)] ;; Todo: Man, wouldn't it be cool to know which querry matched the thread? ;; Would be so much easier for user to figure out why is it showing ;; and it would solve the problem of super long titles (or OPs instead of titles) (when (some (fn [querry] (s/includes? (s/lower-case title) (s/lower-case querry))) query-vec) t))) (reverse needed-cache-part))] ;; Finally generate and append GUIDs (map guid-fn actuall-matches))) (defn thread-to-rss-item "Converts cached thread item to feed item which can be serialized into RSS" [t host board] (let [link-url (s/replace host "{threadnum}" (str (:no t)))] ;Hardcode emergency bugfix {:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response ;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html :description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t)) :link link-url :guid (:guid t)})) (defn generate-feed "Generates feed from matching items" [query-vec chod-treshold repeat? cache board-config] (let [items (filter-chod-posts query-vec chod-treshold repeat? cache) head {:title (str "RSS Thread watcher v" conf/VERSION) :link "https://tools.treebrary.org/thread-watcher/feed.xml" :feed-url "https://tools.treebrary.org/thread-watcher/feed.xml" :description "RSS based thread watcher"} body (map #(thread-to-rss-item %1 (get board-config :host) (get board-config :name)) items)] (rss/channel-xml head body))) (defn http-handler "Handles HTTP requests, returns generated feed READS FROM GLOBALS: rss-thread-watch.watcher.chod-threads-cache rss-thread-watch.config.GLOBAL-CONFIG" ;TODO: Update if it really reads from there anymore [rqst] (try (let [{{chod "chod" board "board" repeat? "repeat" :or {chod "94" board (get @conf/GLOBAL-CONFIG :default-board) repeat? false} :as prms} :params uri :uri} rqst qrs (prms "q") queries (if (vector? qrs) qrs [qrs]) ; to always return vector real-chod (if-let [ch (or (and (vector? chod) (first chod)) chod)] (try ;If we can't parse number from chod, use default 94 (if (or (vector? chod) ;; TODO: Do we seriously parse this twice? (<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this 60 (Integer/parseInt chod)) (catch Exception e 94))) board-config (get-in @conf/GLOBAL-CONFIG [:boards-enabled board]) cache @watcher/chod-threads-cache] (println "\n\nRCVD: " rqst) ;; (println rqst) ;; ====== Errors ===== ;; Something other than feed.xml requested (when-not (s/ends-with? uri "feed.xml") (throw (ex-info "404" {:status 404 :header {"Content-Type" "text/plain"} :body "404 This server has nothing but /feed.xml"}))) (when-not (contains? @boards-enabled-cache board) (throw (ex-info "403" {:status 403 :header {"Content-Type" "text/plain"} :body (get @conf/GLOBAL-CONFIG :board-disabled-message)}))) ;; No url params -> we redirect to documentation about params (when (empty? prms) (throw (ex-info "302" (response/redirect "https://git.treebrary.org/Treebrary.org/rss-thread-watcher#headline-4")))) ;; No querry specified - don't know what to search for (when-not (prms "q") (throw (ex-info "400" {:status 400 :header {"Content-Type" "text/plain"} :body (str "400 You MUST specify query with one OR more'q=searchTerm' url parameter(s)\n\n\n" "Exmple: '/feed.xml?q=pony&q=IWTCIRD' will show in your feed all threads with 'pony' or 'IWTCIRD'" " in their title that are about to die.")}))) ;; Whether cache has been generated yet (when (empty? cache) (throw (ex-info "503" {:status 503 :header {"Content-Type" "text/plain"} :body (str "503 Service Unavailable\n" "Cache is empty, cannot generate feed. Try again later, it may work.")}))) ;; ==== Everything good ==== {:status 200 ;; There shouldn't be any problems with this mime type but if there are ;; replace with "text/xml", or even better, get RSS reader that is not utter shit :header {"Content-Type" "application/rss+xml"} :body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @conf/GLOBAL-CONFIG) board-config)}) (catch Exception e ;; Ex-info has been crafted to match HTTP response body so we can send it (if-let [caught (ex-data e)] caught ;We have custom crafted error (do (print "WTF??: " e) {:status 500 ;Something else fucked up, we print what happened :header {"Content-Type" "text/plain"} :body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n" (ex-cause e) "\n" e)})))))