Skip to content

Commit 9d18a15

Browse files
authored
Merge pull request #127 from clojureverse/arne/emoji-deploy-followup
Arne/emoji deploy followup
2 parents 85202ac + c9f6322 commit 9d18a15

File tree

6 files changed

+165
-73
lines changed

6 files changed

+165
-73
lines changed

repl/emoji_cleanup.clj

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
(ns repl.emoji-cleanup
2+
(:require [clojurians-log.datomic :as d]
3+
[clojurians-log.repl :as repl]
4+
[clojurians-log.slack-api :as slack]))
5+
6+
;; Our :emoji/shortcode property did not originally have an index, meaning we
7+
;; kept creating more entities for emojis, instead of upserting them.
8+
;;
9+
;; To fix this, first retract all shortcode attributes, then transact the schema
10+
;; so it gets the index, then re-import
11+
12+
;; Do the following before deploying the new schema changes:
13+
14+
(def shortcodes (d/q '[:find ?i ?s :where [?i :emoji/shortcode ?s]] (repl/db)))
15+
(def tx-data (for [[i s] shortcodes]
16+
[:db/retract i :emoji/shortcode s]))
17+
18+
(run! (partial d/transact (repl/conn)) (partition-all 1000 tx-data))
19+
20+
@(d/transact (repl/conn) [{:db/ident :emoji/shortcode
21+
:db/valueType :db.type/string
22+
:db/cardinality :db.cardinality/one
23+
:db/unique :db.unique/identity}])
24+
25+
(slack/import-emojis! (repl/conn))
26+
27+
;; After deploying the new version, re-import the data to get the emoji reactions in there
28+
29+
(def result (load-files! (log-files))) ;; [(volatile count) (promise)]
30+
31+
(future
32+
(while (not (realized? (second result)))
33+
(println @(first result))
34+
(Thread/sleep 2000))
35+
(println :done))
36+
37+
(group-by second
38+
(map (juxt :reaction/type
39+
(comp :emoji/shortcode :reaction/emoji))
40+
(:reaction/_message
41+
(datomic.api/entity (repl/db)
42+
[:message/key "C0GLTDB2T--1608789973.125700"]))))

src/clojurians_log/components/datomic_schema.clj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
(defrecord DatomicSchema [datomic]
77
component/Lifecycle
88
(start [this]
9-
(d/transact (:conn datomic)
10-
(if d/cloud?
11-
(map #(dissoc % :db/index) schema/full-schema)
12-
schema/full-schema)))
9+
@(d/transact (:conn datomic)
10+
(if d/cloud?
11+
(map #(dissoc % :db/index) schema/full-schema)
12+
schema/full-schema)))
1313
(stop [this]))
1414

1515
(defn new-datomic-schema []

src/clojurians_log/db/import.clj

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,18 +94,26 @@
9494
#:emoji {:shortcode (name shortcode)
9595
:url url})
9696

97-
(defmethod event->tx ["reaction_added" nil] [{:keys [user item reaction item_user event_ts ts]}]
98-
{:reaction/type "reaction_added"
99-
:reaction/emoji {:emoji/shortcode reaction}
97+
;; TODO: deal with reactions on files (I guess this will depend on us actually dealing with files in the first place :))
98+
;; {:reaction "joy", :event_ts "1521818444.000850", :item {:type "file", :file "F9W0B0LHM"}, :user "U06P56UUB", :item_user "U4E5W80P7", :type "reaction_added"}
99+
100+
(defn- reaction-entity [{:keys [user item reaction ts]}]
101+
{:reaction/emoji {:emoji/shortcode reaction}
100102
:reaction/ts ts
101103
:reaction/user [:user/slack-id user]
102104
:reaction/message {:message/key (message-key item)}})
103105

104-
(defmethod event->tx ["reaction_removed" nil] [{:keys [user item reaction item_user event_ts ts]}]
105-
;; Placeholder just to show that we're getting some data.
106-
;; TODO: return Datomic transaction data to retract a reaction entity
107-
(println "-" reaction)
108-
nil)
106+
(defmethod event->tx ["reaction_added" nil] [{:keys [item] :as msg}]
107+
(when (and (:channel item) (:ts item)) ; exclude reactions on things other than messages
108+
(assoc
109+
(reaction-entity msg)
110+
:reaction/type "reaction_added")))
111+
112+
(defmethod event->tx ["reaction_removed" nil] [{:keys [item] :as msg}]
113+
(when (and (:channel item) (:ts item)) ; exclude reactions on things other than messages
114+
(assoc
115+
(reaction-entity msg)
116+
:reaction/type "reaction_removed")))
109117

110118
(defn lines-reducible [^BufferedReader rdr]
111119
(reify clojure.lang.IReduceInit
@@ -133,9 +141,11 @@
133141
(rf acc (persistent! @part))
134142
(vreset! part (transient []))
135143
(vreset! part-keys (transient #{}))))
144+
message-key #(or (:message/key %)
145+
(:message/key (:reaction/message %)))
136146
append (fn [msg]
137147
(conj! @part msg)
138-
(conj! @part-keys (:message/key msg)))]
148+
(conj! @part-keys (message-key msg)))]
139149
(fn
140150
([]
141151
(rf))

src/clojurians_log/db/queries.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@
5252
[:message/text
5353
:message/ts
5454
:message/thread-ts
55-
{:reaction/_message [:reaction/user
56-
:reaction/type {:reaction/emoji [*]}]}
55+
{:reaction/_message [:reaction/user
56+
:reaction/type
57+
{:reaction/emoji [*]}]}
5758
{:message/user [:user/name
5859
:user/slack-id
5960
:user-profile/real-name

src/clojurians_log/repl.clj

Lines changed: 87 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
(uncaughtException [_ thread throwable]
2020
(println (.getMessage throwable)))))
2121

22-
2322
(defn read-edn [filepath]
2423
(-> filepath
2524
slurp
@@ -48,11 +47,11 @@
4847
(slack/import-emojis! (conn))
4948
(let [channel->db-id (q/channel-id-map (d/db (conn)))
5049
channels (mapv import/channel->tx (slack/channels))]
51-
(d/transact (conn) (map (fn [{slack-id :channel/slack-id :as ch}]
52-
(if-let [db-id (channel->db-id slack-id)]
53-
(assoc ch :db/id db-id)
54-
ch))
55-
channels))))
50+
@(d/transact (conn) (map (fn [{slack-id :channel/slack-id :as ch}]
51+
(if-let [db-id (channel->db-id slack-id)]
52+
(assoc ch :db/id db-id)
53+
ch))
54+
channels))))
5655

5756
(defn build-indexes! []
5857
(q/build-indexes! (d/db (conn))))
@@ -100,13 +99,27 @@
10099
(run! load-log-file! (log-files (java.io.File. directory "logs")))
101100
(build-indexes!))
102101

102+
(defn files-from
103+
"Get a sequence of log files starting ata given date"
104+
[date]
105+
(->> (log-files "/home/arne/github/clojurians-log/logs")
106+
(drop-while #(not (clojure.string/starts-with? (.getName %) date)))))
107+
103108
(defn load-from
104109
"Load log files starting from a certain date (a string like \"2019-05-20\")"
105110
[date]
106-
(->> (log-files)
107-
(drop-while #(not (clojure.string/starts-with? (.getName %) date)))
111+
(->> (files-from date)
108112
(run! load-log-file!)))
109113

114+
(defn wrap-catch [f]
115+
(fn [& args]
116+
(try
117+
(apply f args)
118+
(catch Throwable t
119+
(let [ex-sym (gensym "ex-")]
120+
(intern *ns* ex-sym t)
121+
(println (str f " threw " (class t) " see " ex-sym)))))))
122+
110123
(def file->tx
111124
"Transducer which consumes files and produces transaction data"
112125
(comp (mapcat #(import/lines-reducible (io/reader %)))
@@ -116,8 +129,7 @@
116129
(println "Error decoding JSON: " %)
117130
(println e)
118131
nil)))
119-
(filter #(= (:type %) "message"))
120-
(keep import/event->tx)))
132+
(keep (wrap-catch import/event->tx))))
121133

122134
(def tx-thread-count
123135
"The number of threads to use for processing transactions"
@@ -135,19 +147,20 @@
135147
(mod-hash
136148
(if (vector? msg)
137149
(second (second msg))
138-
(:message/key msg)))))
150+
(or (:message/key msg)
151+
(:message/key (:reaction/message msg)))))))
139152

140153
(defn load-files!
141154
"Bulk import a set of files (e.g. from (log-files)), uses multiple threads to speed things up"
142155
[files]
143-
(let [tx-chs (into [] (repeatedly tx-thread-count
144-
#(async/chan 100 (import/partition-messages tx-size))))
145-
file-ch (async/chan 100)
146-
pubsub-ch (async/chan 100)
147-
pubsub (async/pub pubsub-ch msg-topic)
148-
conn (conn)
149-
counter (volatile! 0)
150-
done? (promise)]
156+
(let [make-tx-chan #(async/chan 100 (import/partition-messages tx-size))
157+
tx-chs (into [] (repeatedly tx-thread-count make-tx-chan))
158+
file-ch (async/chan 100)
159+
pubsub-ch (async/chan 100)
160+
pubsub (async/pub pubsub-ch (wrap-catch msg-topic))
161+
conn (conn)
162+
counter (volatile! 0)
163+
done? (promise)]
151164

152165
(doseq [tx-ch tx-chs]
153166
(async/thread
@@ -162,14 +175,10 @@
162175
(println e)))
163176
(recur (<!! tx-ch)))))))
164177

165-
(go-loop [[f & files] files]
166-
(>! file-ch f)
167-
(if (seq files)
168-
(recur files)
169-
(async/close! file-ch)))
178+
(async/onto-chan file-ch files)
170179

171-
(doseq [i (range tx-thread-count)]
172-
(async/sub pubsub i (get tx-chs i)))
180+
(doseq [[i tx-ch] (map-indexed vector tx-chs)]
181+
(async/sub pubsub i tx-ch))
173182

174183
(async/pipeline-blocking 10 pubsub-ch file->tx file-ch true)
175184

@@ -189,45 +198,70 @@
189198
(use 'clojurians-log.repl)
190199
(in-ns 'clojurians-log.repl)
191200
(load-slack-data!)
201+
192202
(def result (load-files! (log-files)))
193-
result
203+
;; or
204+
(def result (load-files! (files-from "2019-01-01")))
194205

195-
(load-files! [f])
206+
;; see progress
207+
(future
208+
(while (not (realized? (second result)))
209+
(println (java.util.Date.) "\t" @(first result))
210+
(Thread/sleep 5000)))
196211

197-
(def result (load-files! (drop 1508 (log-files))))
198-
(def rrr (load-files! (filter #(and (.contains (str %) "2020-08") (.contains (str %) "backfill")) (log-files))))
212+
;; After importing, or you won't see data show up
213+
(build-indexes!)
199214

200-
(while (not (realized? (second result)))
201-
(println (java.util.Date.) "\t" @(first result))
202-
(Thread/sleep 5000))
215+
(def result
216+
(load-files!
217+
(filter #(and (.contains (str %) "2020-08")
218+
(.contains (str %) "backfill"))
219+
(log-files))))
203220

204-
;; old way (slower)
205-
(run! load-log-file! (log-files))
206221

207-
;; incremental
208-
(load-from "2019-08-23")
209222

223+
;; old way, this does not use multi-thread core.async magic, and does not
224+
;; batch transactions, it literally transacts each slack event separately.
225+
;; Very slow but always works.
226+
(run! load-log-file! (log-files))
210227

228+
;; Fetch and store slack data
211229
(do
212230
(write-edn "users.edn" (map import/user->tx (slack/users)))
213-
(write-edn "channels.edn" (map import/channel->tx (slack/channels))))
214-
215-
(time
216-
(do
217-
(time (clojurians-log.db.queries/channel-day-messages db "clojurescript" "2018-02-04"))
218-
(time (clojurians-log.db.queries/thread-messages db '("1517722327.000023" "1517722363.000043" "1517722613.000012" "1517724278.000043" "1517724340.000044" "1517724770.000024" "1517724836.000023" "1517725105.000054")))
219-
(time (ffirst (clojurians-log.db.queries/channel db "clojurescript")))
220-
(time (clojurians-log.db.queries/channel-list db "2018-02-04"))
221-
(time (clojurians-log.db.queries/user-names db #{"U2TUBBPNU"}))
222-
(time (clojurians-log.db.queries/channel-days db "clojurescript"))
223-
224-
nil))
225-
231+
(write-edn "channels.edn" (map import/channel->tx (slack/channels)))
232+
(write-edn "emoji.edn" (map import/emoji->tx (slack/emoji))))
233+
234+
;; Micro-benchmark some queries, good to check if anything is unreasonably
235+
;; slow
236+
(let [db (db)]
237+
(time
238+
(do
239+
(time (clojurians-log.db.queries/channel-day-messages db "clojurescript" "2018-02-04"))
240+
(time (clojurians-log.db.queries/thread-messages db '("1517722327.000023" "1517722363.000043" "1517722613.000012" "1517724278.000043" "1517724340.000044" "1517724770.000024" "1517724836.000023" "1517725105.000054")))
241+
(time (ffirst (clojurians-log.db.queries/channel db "clojurescript")))
242+
(time (clojurians-log.db.queries/channel-list db "2018-02-04"))
243+
(time (clojurians-log.db.queries/user-names db #{"U2TUBBPNU"}))
244+
(time (clojurians-log.db.queries/channel-days db "clojurescript"))
245+
246+
nil)))
247+
248+
;; Original
226249
"Elapsed time: 18.166254 msecs"
227-
"Elapsed time: 631.458841 msecs"
250+
"Elapsed time: 631.458841 msecs" ; -> we optimized this
228251
"Elapsed time: 1.568807 msecs"
229252
"Elapsed time: 16.425878 msecs"
230253
"Elapsed time: 1.126005 msecs"
231-
"Elapsed time: 1535.355001 msecs"
232-
"Elapsed time: 2205.20762 msecs"
254+
"Elapsed time: 1535.355001 msecs" ; -> and this
255+
"Elapsed time: 2205.20762 msecs" ; Total
256+
257+
;; Latest
258+
"Elapsed time: 31.38712 msecs" ; -> seems fetching channel-day-messages is
259+
; slower now
260+
"Elapsed time: 0.844338 msecs"
261+
"Elapsed time: 1.582986 msecs"
262+
"Elapsed time: 0.22545 msecs"
263+
"Elapsed time: 1.120628 msecs"
264+
"Elapsed time: 0.00676 msecs"
265+
"Elapsed time: 37.954533 msecs" ; Total
266+
233267
)

src/clojurians_log/views.clj

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,9 @@
187187
;; :image_512 :email :real_name_normalized :image_48 :image_192 :real_name :image_72 :image_24
188188
;; :avatar_hash :title :team :image_32 :display_name :display_name_normalized
189189
[:div.message
190-
{:id (cl.tu/format-inst-id inst) :class (when (thread-child? message) "thread-msg")}
190+
{:id (cl.tu/format-inst-id inst)
191+
:class (when (thread-child? message) "thread-msg")
192+
:data-message-key (:message/key message)}
191193
[:a.message_profile-pic {:href (str "/_/_/users/" slack-id) :style (str "background-image: url(" image-48 ");")}]
192194
[:a.message_username {:href (str "/_/_/users/" slack-id)}
193195
(some #(when-not (str/blank? %) %) [display-name real-name name])]
@@ -201,12 +203,15 @@
201203
[:span.message_star]
202204
[:span.message_content [:p (slack-messages/message->hiccup text usernames emojis)]]
203205
[:div.message-reaction-bar
204-
(let [reaction-group (group-by #(get-in % [:reaction/emoji :emoji/shortcode]) (:reaction/_message message))]
205-
(for [[emoji-shortcode reactions] reaction-group]
206-
[:div.message-reaction
206+
(let [reaction-groups (group-by #(get-in % [:reaction/emoji :emoji/shortcode]) (:reaction/_message message))]
207+
(for [[emoji-shortcode reactions] reaction-groups
208+
:let [{:strs [reaction_added reaction_removed]} (group-by :reaction/type reactions)
209+
reaction-count (- (count reaction_added) (count reaction_removed))]
210+
:when (< 0 reaction-count)]
211+
[:div.message-reaction {:title emoji-shortcode}
207212
[:span.emoji (slack-messages/text->emoji emoji-shortcode emojis)]
208213
" "
209-
(count reactions)]))]]))
214+
reaction-count]))]]))
210215

211216
(defn- message-hiccup
212217
"Returns either a single message hiccup, or if the given message starts a thread,

0 commit comments

Comments
 (0)