diff --git a/src/rss_thread_watch/config.clj b/src/rss_thread_watch/config.clj index 0626b83..0f2c959 100644 --- a/src/rss_thread_watch/config.clj +++ b/src/rss_thread_watch/config.clj @@ -78,6 +78,8 @@ boards)))) (defn config-fill-board-defaults + ;; TODO: must have check that if board is default, it's enabled, if it's not, give some big fat warning + ;; that users must always specify board, maybe change the error? "Fills every enabled board with default config values" [config] (let [defaults (:boards-defaults config)] diff --git a/src/rss_thread_watch/core.clj b/src/rss_thread_watch/core.clj index 4fab9df..892f85a 100644 --- a/src/rss_thread_watch/core.clj +++ b/src/rss_thread_watch/core.clj @@ -72,17 +72,24 @@ (let [config (conf/get-some-config (:config options))] ;; TODO: probably refactor to use separate config.clj file when validation will be added ;; Init the few globals we have + ;; TODO: this all needs to go in separate function so it doesnt have to duplicated in repl-main (reset! conf/GLOBAL-CONFIG config) (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled)))) (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config)) (clojure.pprint/pprint config) - (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port conf/CONFIG-DEFAULT) + (jetty/run-jetty (rp/wrap-params feed/http-handler) {:port (:port config) :join? true})))) ;; Docs: https://github.com/ring-clojure/ring/wiki/Getting-Started (defn repl-main "Development entry point" [] + (let [config (conf/get-some-config nil)] + ;; TODO: probably refactor to use separate config.clj file when validation will be added + ;; Init the few globals we have + (reset! conf/GLOBAL-CONFIG config) + (reset! feed/boards-enabled-cache (set (keys (get config :boards-enabled)))) + (reset! watcher/chod-threads-cache (watcher/generate-chod-cache-structure config))) (jetty/run-jetty (rp/wrap-params #'feed/http-handler) {:port (:port conf/CONFIG-DEFAULT) ;; Dont block REPL thread diff --git a/src/rss_thread_watch/feed_generator.clj b/src/rss_thread_watch/feed_generator.clj index 6f34a62..c9b340c 100644 --- a/src/rss_thread_watch/feed_generator.clj +++ b/src/rss_thread_watch/feed_generator.clj @@ -20,7 +20,8 @@ [clojure.string :as s] [rss-thread-watch.watcher :as watcher] [rss-thread-watch.utils :as ut] - [rss-thread-watch.config :as conf]) + [rss-thread-watch.config :as conf] + [rss-thread-watch.filters :as f]) (:gen-class)) (def boards-enabled-cache @@ -34,8 +35,8 @@ This is done by always making new GUID - (concat thread-number UNIX-time-of-data-update)" [thread time] (assoc thread :guid (str (:no thread) - "-" - time))) + "-" + time))) (defn new-guid-paranoid "Generate unique GUID on EVERY request to the feed. @@ -56,12 +57,20 @@ (:last-modified thread) (:chod thread)))) +(defn make-filters + "Creates map of functions and filters from query string. + Return format is: {filter-fun ['words' 'to' 'filter' 'using this function]}" + [query-string known-filter-map] + (let [filterable (select-keys query-string + (keys known-filter-map))] + (ut/fkmap (fn [k v] + {(get known-filter-map k) (ut/vectorize v)}) + filterable))) + (defn filter-chod-posts "Return list of all threads with equal or higher ChoD than requested - READS FROM GLOBALS: watcher.time-of-cache" - [query-vec chod-treshold repeat? board-cache] - + [filters chod-treshold repeat? board-cache] (let [{time-of-generation :time cache :data} board-cache guid-fn (case repeat? @@ -69,27 +78,24 @@ "true" (fn [x] (new-guid-always x time-of-generation)) update-only-guid) cache-start-index (first (ut/indices (fn [x] (>= (:chod x) chod-treshold)) - cache)) + cache)) ;; So we don't have to search thru everything we have cached needed-cache-part (subvec cache cache-start-index) - actuall-matches (keep (fn [t] - (let [title (:title t)] - ;; Todo: Man, wouldn't it be cool to know which querry matched the thread? - ;; Would be so much easier for user to figure out why is it showing - ;; and it would solve the problem of super long titles (or OPs instead of titles) - (when (some (fn [querry] - (s/includes? (s/lower-case title) (s/lower-case querry))) - query-vec) - t))) + actuall-matches (keep (fn [thread] + (some + (fn [fun] + (when (fun thread (get filters fun)) + thread)) + (keys filters))) (reverse needed-cache-part))] ;; Finally generate and append GUIDs (map guid-fn actuall-matches))) (defn thread-to-rss-item "Converts cached thread item to feed item which can be serialized into RSS" - [t host board] - (let [link-url (s/replace host "{threadnum}" (str (:no t)))] ;Hardcode emergency bugfix - {:title (format "%.2f%% - %s" (:chod t) (:title t)) ;TODO: Generate link from the target somehow, or just include it from API response + [t host] + (let [link-url (s/replace host "{threadnum}" (str (:no t)))] + {:title (format "%.2f%% - %s" (:chod t) (:title t)) ;; :url link-url <- this is supposed to be for images according to: https://cyber.harvard.edu/rss/rss.html :description (format "The thread: '%s' has %.2f%% chance of dying" (:title t) (:chod t)) :link link-url @@ -97,9 +103,8 @@ (defn generate-feed "Generates feed from matching items" - [query-vec chod-treshold repeat? cache board-config self-link] - (let [items (filter-chod-posts query-vec chod-treshold repeat? cache) - served-filename (get @conf/GLOBAL-CONFIG :served-filename) + [filters chod-treshold repeat? cache board-config self-link] + (let [items (filter-chod-posts filters chod-treshold repeat? cache) head {:title (str "RSS Thread watcher v" conf/VERSION) ;; :link is the homepage of the channel :link (get @conf/GLOBAL-CONFIG :homepage) @@ -108,8 +113,7 @@ :description "RSS based thread watcher"} body (map #(thread-to-rss-item %1 - (get board-config :host) - (get board-config :name)) items)] + (get board-config :host)) items)] (rss/channel-xml head body))) (defn http-handler @@ -130,21 +134,17 @@ query :query-string scheme :scheme server-name :server-name} rqst - qrs (prms "q") - self-uri (str (s/replace-first scheme ":" "") + filters (make-filters prms f/known-filters) + ;; BUG if local fileserver not running -> FileNotFound exception is thrown and it fucks up the feed generation + ;; Should be handled because wrong config and thus url generation could do the same + self-uri (str (s/replace-first scheme ":" "") ; "://" server-name uri "?" query) - queries (if (vector? qrs) qrs [qrs]) ; to always return vector - real-chod (if-let [ch (or (and (vector? chod) - (first chod)) - chod)] - (try ;If we can't parse number from chod, use default 94 - (if (or (vector? chod) - ;; TODO: Do we seriously parse this twice? - (<= (Integer/parseInt chod) 60)) ; Never accept chod lower than 60 TODO: don't hardcode this - 60 (Integer/parseInt chod)) - (catch Exception e - 94))) board-config (get-in @conf/GLOBAL-CONFIG [:boards-enabled board]) + real-chod (try (max (Integer/parseInt (or (and (vector? chod) + (first chod)) + chod)) 60) ;HARDCODED CHoD + (catch Exception _ + (get board-config :default-chod))) cache @watcher/chod-threads-cache] (println "\n\nRCVD: " rqst) ;; (println rqst) @@ -164,13 +164,14 @@ (response/redirect (get @conf/GLOBAL-CONFIG :homepage))))) ;; No querry specified - don't know what to search for - (when-not (prms "q") + (when-not (some f/known-filter-set (keys prms)) (throw (ex-info "400" {:status 400 :header {"Content-Type" "text/plain"} - :body (str "400 You MUST specify query with one OR more'q=searchTerm' url parameter(s)\n\n\n" + :body (str "400 You MUST specify query with one OR more'q=searchTerm' (or 'Q=SeARChteRm' for case sensitive) url parameter(s)\n\n\n" "Exmple: '" served-filename "?q=pony&q=IWTCIRD' will show in your feed all threads with 'pony' or 'IWTCIRD'" " in their title that are about to die.")}))) ;; Whether cache has been generated yet + (when (empty? cache) (throw (ex-info "503" {:status 503 :header {"Content-Type" "text/plain"} @@ -181,7 +182,7 @@ ;; There shouldn't be any problems with this mime type but if there are ;; replace with "text/xml", or even better, get RSS reader that is not utter shit :header {"Content-Type" "application/rss+xml"} - :body (generate-feed queries real-chod repeat? (watcher/get-thread-data board @conf/GLOBAL-CONFIG) board-config self-uri)}) + :body (generate-feed filters real-chod repeat? (watcher/get-thread-data board @conf/GLOBAL-CONFIG) board-config self-uri)}) (catch Exception e ;; Ex-info has been crafted to match HTTP response body so we can send it (if-let [caught (ex-data e)] diff --git a/src/rss_thread_watch/filters.clj b/src/rss_thread_watch/filters.clj new file mode 100644 index 0000000..071da96 --- /dev/null +++ b/src/rss_thread_watch/filters.clj @@ -0,0 +1,38 @@ +;; Copyright (C) 2024 Felisp +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU Affero General Public License as published by +;; the Free Software Foundation, version 3 of the License. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU Affero General Public License for more details. +;; +;; You should have received a copy of the GNU Affero General Public License +;; along with this program. If not, see . + +(ns rss-thread-watch.filters + "Functions filtering posts" + (:require [clojure.string :as cs] + [rss-thread-watch.utils :as u]) + (:gen-class)) + +(defn case-sensitive-filter + "Returns true if string [s] is matched by any query. It's case insensitive" + [{:keys [title]} queries] + (some (fn [querry] + (cs/includes? title querry)) + queries)) + +(defn case-insensitive-filter + "Returns true if string [s] is case-matched by query" + [{:keys [title]} queries] + (case-sensitive-filter {:title (cs/lower-case title)} (map cs/lower-case queries))) + +(def known-filters + {"Q" case-sensitive-filter + "q" case-insensitive-filter}) + +(def known-filter-set (set (keys known-filters))) + diff --git a/src/rss_thread_watch/utils.clj b/src/rss_thread_watch/utils.clj index db53c12..5d81d9b 100644 --- a/src/rss_thread_watch/utils.clj +++ b/src/rss_thread_watch/utils.clj @@ -47,6 +47,11 @@ ~x result#))) +(defmacro vectorize + "If arg is not a vector, put into vector, otherwise return it" + [v] + (if (vector? v) v [v])) + ;; ===== Generic functions ==== (defn indices @@ -69,15 +74,28 @@ {k (map-apply-defaults conf-val default-val)} {k (nil?-else conf-val default-val)}))))) +;; This is a shitty version of reduce-kv (defn fmap "Applies function [f] to every key and value in map [m] - Function signature should be (f [key value])." + Function signature should be (f [key value]). + Key stays unchanged" [f m] (into (empty m) (for [[key val] m] [key (f key val)]))) +(defn fkmap + ;; I am horrible with docstrings, I don't deny that + "Applies function [f] to every key and value in map [m] + Function signature should be (f [key value]). + Unlike fmap, you can change key too, so return both {key value} in map" + [f m] + (into + (empty m) + (for [[key val] m] + (f key val)))) + (defn expand-home "Expands ~ to home directory" ;;modified from sauce: https://stackoverflow.com/questions/29585928/how-to-substitute-path-to-home-for