rss-thread-watcher/src/rss_thread_watch/feed_generator.clj

150 lines
7.5 KiB
Clojure
Raw Normal View History

;; Copyright (C) 2023 Felisp
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU Affero General Public License as published by
;; the Free Software Foundation, version 3 of the License.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU Affero General Public License for more details.
;;
;; You should have received a copy of the GNU Affero General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(ns rss-thread-watch.feed-generator
"Generates feeds for requests"
(:require [ring.middleware.params :as rp]
[clj-rss.core :as rss]
[clojure.string :as s]
[rss-thread-watch.watcher :as watcher])
(:gen-class))
(defn indices
;; https://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure
"Returns indexes of elements passing predicate"
[pred coll]
(keep-indexed #(when (pred %2) %1) coll))
(defn new-guid-always
"Generates always unique GUID for Feed item.
This is used when user wants notification on every server update,
no matter whether thread actually changed it's position.
This is done by always making new GUID - (concat thread-number UNIX-time-of-data-update)"
[thread time]
2023-12-23 03:24:38 +01:00
(assoc thread :guid (str (:no thread)
"-"
time)))
(defn update-only-guid
"Generates new GUID for feed item ONLY if the threads ChoD increased
This is done by concating thread-number and it's rounded chod"
[thread]
(assoc thread :guid (format "%d-%.2f"
(:no thread)
(:chod thread))))
(defn filter-chod-posts
"Return list of all threads with equal or higher ChoD than requested"
[query-vec chod-treshold repeat? cache]
(let [time-of-generation (System/currentTimeMillis)
guid-fn (if repeat? (fn [x] (new-guid-always x time-of-generation))
update-only-guid)
2023-12-23 03:24:38 +01:00
cache-start-index (first (indices (fn [x] (>= (:chod x) chod-treshold))
cache))
;; So we don't have to search thru everything we have cached
needed-cache-part (subvec cache cache-start-index) ;Todo: remove that ugly global reference
actuall-matches (keep (fn [t]
(let [title (:title t)]
2023-12-23 03:24:38 +01:00
;; Todo: Man, wouldn't it be cool to know which querry matched the thread?
;; Would be so much easier for user to figure out why is it showing
;; and it would solve the problem of super long titles (or OPs instead of titles)
(when (some (fn [querry]
(s/includes? title querry))
query-vec)
t)))
2023-12-24 02:09:59 +01:00
(reverse needed-cache-part))]
;; Finally generate and append GUIDs
(map guid-fn actuall-matches)))
2023-12-23 03:24:38 +01:00
(defn thread-to-rss-item
"If I wasnt retarded I could have made the cached version look like
rss item already but what can you do. I'll refactor I promise, I just need this done ASAP" ;Todo: do what the docstring says
[t]
(let [link-url (str "https://boards.4chan.org/mlp/thread/" (:no t))] ; jesus, well I said only /mlp/ is supported now so fuck it
{:title (format "%.2f%% - %s" (:chod t) (:title t))
;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html
:description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t))
:link link-url
:guid (:guid t)}))
(defn generate-feed
"Generates feed from matching items"
[query-vec chod-treshold repeat? cache]
(let [items (filter-chod-posts query-vec chod-treshold repeat? cache)
2023-12-23 03:24:38 +01:00
head {:title "RSS Thread watcher v0.1"
:link "https://tools.treebrary.org/thread-watcher/feed.xml"
:feed-url "https://tools.treebrary.org/thread-watcher/feed.xml"
:description "RSS based thread watcher"}
body (map thread-to-rss-item items)]
(rss/channel-xml head body)))
(defn http-handler
"Handles HTTP requests, returns generated feed
READS FROM GLOBALS:
rss-thread-watch.watcher.chod-threads-cache
rss-thread-watch.core.CONFIG"
[rqst]
(try (let [{{chod "chod" :or {chod 60}
:as prms} :params
uri :uri} rqst
queries (if (vector? (prms "q")) (prms "q") [(prms "q")]) ; to always return vector
repeat? (prms "repeat")
real-chod (try ;If we can't parse number from give chod param, just use 94
(if (or (vector? chod)
(< (Integer/parseInt chod) 60)) ; Never accept chod lower that 60 TODO: don't hardcode this
94 (Integer/parseInt chod))
(catch Exception e
94))
cache @watcher/chod-threads-cache]
;; (println "RCVD: " rqst)
(println rqst)
;; ====== Errors =====
;; Something other than feed.xml requested
(when-not (s/ends-with? uri "feed.xml")
(throw (ex-info "404" {:status 404
:header {"Content-Type" "text/plain"}
:body "404 This server has nothing but /feed.xml"})))
;; No querry specified - don't know what to search for
(when-not (prms "q")
(throw (ex-info "400" {:status 400
:header {"Content-Type" "text/plain"}
:body (str "400 You MUST specify query with one OR more'q=searchTerm' url parameter(s)\n\n\n"
"Exmple: '/feed.xml?q=pony&q=IWTCIRD' will show in your feed all threads with 'pony' or 'IWTCIRD'"
" in their title that are about to die.")})))
;; Whether cache has been generated yet
(when (empty? cache)
(throw (ex-info "503" {:status 503
:header {"Content-Type" "text/plain"}
:body (str "503 Service Unavailable\n"
"Cache is empty, cannot generate feed. Try again later, it may work.")})))
;; ==== Everything good ====
{:status 200
;; There shouldn't be any problems with this mime type but if there are
;; replace with "text/xml", or even better, get RSS reader that is not utter shit
:header {"Content-Type" "application/rss+xml"}
:body (generate-feed queries real-chod repeat? cache)})
(catch Exception e
;; Ex-info has been crafted to match HTTP response body so we can send it
(if-let [caught (ex-data e)] ;Tam bude ale vždycky ex-data myslím, to chce čekovat jestli t obsahuje nějaký klíč (body? at nemusí být nějaký extra)
caught ;We have custom crafted error
{:status 500 ;Something else fucked up, we print what happened
:header {"Content-Type" "text/plain"}
:body (str "500 - Something fucked up while generating feed, If you decide to report it, please include url adress you used:\n"
(ex-cause e) "\n"
e)}))))