|
| 1 | +(ns repl-sessions.stats-from-indexer |
| 2 | + (:require [clojurians-log.time-util :as time-util]) |
| 3 | + (:import (java.time LocalDate))) |
| 4 | + |
| 5 | +;; Fetching all messages for a given channel and day is really fast, but we also |
| 6 | +;; want to know the list of all channels for a given day, and how many messages |
| 7 | +;; each had, and which day was the previous or next day that had messages, and |
| 8 | +;; these queries are really slow, since they basically need to traverse all |
| 9 | +;; messages. |
| 10 | + |
| 11 | +;; To work around that we build up these "indexes" and keep them in an atom. We |
| 12 | +;; have an "indexer" component that rebuilds them regularly. |
| 13 | + |
| 14 | +(keys @clojurians-log.db.queries/!indexes) |
| 15 | +;; => (:chan-day-cnt :day-chan-cnt :chan-id->name :chan-name->id) |
| 16 | + |
| 17 | +(defn day-chan-cnt [] |
| 18 | + (:day-chan-cnt @clojurians-log.db.queries/!indexes)) |
| 19 | + |
| 20 | +;; For example day-chan-cnt groups first on day, then on channel, and then shows |
| 21 | +;; the count of messages. |
| 22 | + |
| 23 | +(day-chan-cnt) |
| 24 | +;;=> |
| 25 | +{"2018-01-28" {"C03S1KBA2" 1}, |
| 26 | + "2018-02-02" {"C064BA6G2" 28, |
| 27 | + "C099W16KZ" 19, |
| 28 | + "C0617A8PQ" 51, |
| 29 | + ,,,}} |
| 30 | + |
| 31 | +;; So we can easily sum up all messages for a given day. |
| 32 | + |
| 33 | +(defn day-total [day] |
| 34 | + (apply + (vals (get (day-chan-cnt) day)))) |
| 35 | + |
| 36 | +(day-total "2018-02-02") |
| 37 | +;; => 887 |
| 38 | + |
| 39 | +;; If we want to query this we just need to extrapolate to a range of days. We |
| 40 | +;; use clojure.java-time elsewhere but actually the java.time API is quite nice |
| 41 | +;; and nowadays I tend to use it directly. A simple date (so year+month+day) |
| 42 | +;; without any time or timezone information is represented as a |
| 43 | +;; java.time.LocalDate. |
| 44 | + |
| 45 | +;; Good example here of recursion and lazy-seq. Note that the lazy-seq is |
| 46 | +;; optional here, you can remove it and still get a valid result, it would just |
| 47 | +;; be eager instead of lazy. |
| 48 | + |
| 49 | +;; There are obviously more ways to write this, for instance with loop/recur. |
| 50 | +;; This use of recursion + cons is a very "classic lisp" approach. |
| 51 | + |
| 52 | +(defn range-of-local-dates [^LocalDate ld1 ^LocalDate ld2] |
| 53 | + (when (.isBefore ld1 ld2) |
| 54 | + (cons ld1 (lazy-seq (range-of-local-dates (.plusDays ld1 1) ld2))))) |
| 55 | + |
| 56 | +;; A bit more clojure-y, use vectors with [year month day], return strings |
| 57 | +;; like "2018-02-02", since that is what we have in the indexes. Note that just |
| 58 | +;; calling `str` on any java.time class usually gives a nicely formatted result. |
| 59 | + |
| 60 | +(defn range-of-days [[y1 m1 d1] [y2 m2 d2]] |
| 61 | + (map str |
| 62 | + (range-of-local-dates |
| 63 | + (java.time.LocalDate/of y1 m1 d1) |
| 64 | + (java.time.LocalDate/of y2 m2 d2)))) |
| 65 | + |
| 66 | +;; So this is what that looks like now. I made the range half-open (not |
| 67 | +;; including the end date), might make more sense to make it inclusive. |
| 68 | + |
| 69 | +(range-of-days [2018 2 2] [2018 2 5]) |
| 70 | +;; => ("2018-02-02" "2018-02-03" "2018-02-04") |
| 71 | + |
| 72 | +;; So now we can grab the numbers for these days and sum them up. This is a |
| 73 | +;; textbook example of where a transducer works nicely. Check the LI episode for |
| 74 | +;; transducers if you haven't seen this before. Obviously there are other ways |
| 75 | +;; to write this too, like a simple (apply + (map ...)) |
| 76 | + |
| 77 | +(defn days-total [days] |
| 78 | + (transduce (map day-total) + 0 days)) |
| 79 | + |
| 80 | +;; And there you go |
| 81 | + |
| 82 | +(days-total |
| 83 | + (range-of-days [2018 2 2] [2018 2 5])) |
| 84 | +;; => 1913 |
0 commit comments