@@ -2,7 +2,9 @@ package kafka
2
2
3
3
import (
4
4
"hash"
5
+ "hash/crc32"
5
6
"hash/fnv"
7
+ "math/rand"
6
8
"sort"
7
9
"sync"
8
10
)
@@ -158,3 +160,128 @@ func (h *Hash) Balance(msg Message, partitions ...int) (partition int) {
158
160
159
161
return
160
162
}
163
+
164
+ type randomBalancer struct {
165
+ mock int // mocked return value, used for testing
166
+ }
167
+
168
+ func (b randomBalancer ) Balance (msg Message , partitions ... int ) (partition int ) {
169
+ if b .mock != 0 {
170
+ return b .mock
171
+ }
172
+ return partitions [rand .Int ()% len (partitions )]
173
+ }
174
+
175
+ // CRC32Balancer is a Balancer that uses the CRC32 hash function to determine
176
+ // which partition to route messages to. This ensures that messages with the
177
+ // same key are routed to the same partition. This balancer is compatible with
178
+ // the built-in hash partitioners in librdkafka and the language bindings that
179
+ // are built on top of it, including the
180
+ // github.com/confluentinc/confluent-kafka-go Go package.
181
+ //
182
+ // With the Consistent field false (default), this partitioner is equivalent to
183
+ // the "consistent_random" setting in librdkafka. When Consistent is true, this
184
+ // partitioner is equivalent to the "consistent" setting. The latter will hash
185
+ // empty or nil keys into the same partition.
186
+ //
187
+ // Unless you are absolutely certain that all your messages will have keys, it's
188
+ // best to leave the Consistent flag off. Otherwise, you run the risk of
189
+ // creating a very hot partition.
190
+ type CRC32Balancer struct {
191
+ Consistent bool
192
+ random randomBalancer
193
+ }
194
+
195
+ func (b CRC32Balancer ) Balance (msg Message , partitions ... int ) (partition int ) {
196
+ // NOTE: the crc32 balancers in librdkafka don't differentiate between nil
197
+ // and empty keys. both cases are treated as unset.
198
+ if len (msg .Key ) == 0 && ! b .Consistent {
199
+ return b .random .Balance (msg , partitions ... )
200
+ }
201
+
202
+ idx := crc32 .ChecksumIEEE (msg .Key ) % uint32 (len (partitions ))
203
+ return partitions [idx ]
204
+ }
205
+
206
+ // Murmur2Balancer is a Balancer that uses the Murmur2 hash function to
207
+ // determine which partition to route messages to. This ensures that messages
208
+ // with the same key are routed to the same partition. This balancer is
209
+ // compatible with the partitioner used by the Java library and by librdkafka's
210
+ // "murmur2" and "murmur2_random" partitioners. /
211
+ //
212
+ // With the Consistent field false (default), this partitioner is equivalent to
213
+ // the "murmur2_random" setting in librdkafka. When Consistent is true, this
214
+ // partitioner is equivalent to the "murmur2" setting. The latter will hash
215
+ // nil keys into the same partition. Empty, non-nil keys are always hashed to
216
+ // the same partition regardless of configuration.
217
+ //
218
+ // Unless you are absolutely certain that all your messages will have keys, it's
219
+ // best to leave the Consistent flag off. Otherwise, you run the risk of
220
+ // creating a very hot partition.
221
+ //
222
+ // Note that the librdkafka documentation states that the "murmur2_random" is
223
+ // functionally equivalent to the default Java partitioner. That's because the
224
+ // Java partitioner will use a round robin balancer instead of random on nil
225
+ // keys. We choose librdkafka's implementation because it arguably has a larger
226
+ // install base.
227
+ type Murmur2Balancer struct {
228
+ Consistent bool
229
+ random randomBalancer
230
+ }
231
+
232
+ func (b Murmur2Balancer ) Balance (msg Message , partitions ... int ) (partition int ) {
233
+ // NOTE: the murmur2 balancers in java and librdkafka treat a nil key as
234
+ // non-existent while treating an empty slice as a defined value.
235
+ if msg .Key == nil && ! b .Consistent {
236
+ return b .random .Balance (msg , partitions ... )
237
+ }
238
+
239
+ idx := (murmur2 (msg .Key ) & 0x7fffffff ) % uint32 (len (partitions ))
240
+ return partitions [idx ]
241
+ }
242
+
243
+ // Go port of the Java library's murmur2 function.
244
+ // https://github.com/apache/kafka/blob/1.0/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L353
245
+ func murmur2 (data []byte ) uint32 {
246
+ length := len (data )
247
+ const (
248
+ seed uint32 = 0x9747b28c
249
+ // 'm' and 'r' are mixing constants generated offline.
250
+ // They're not really 'magic', they just happen to work well.
251
+ m = 0x5bd1e995
252
+ r = 24
253
+ )
254
+
255
+ // Initialize the hash to a random value
256
+ h := seed ^ uint32 (length )
257
+ length4 := length / 4
258
+
259
+ for i := 0 ; i < length4 ; i ++ {
260
+ i4 := i * 4
261
+ k := (uint32 (data [i4 + 0 ]) & 0xff ) + ((uint32 (data [i4 + 1 ]) & 0xff ) << 8 ) + ((uint32 (data [i4 + 2 ]) & 0xff ) << 16 ) + ((uint32 (data [i4 + 3 ]) & 0xff ) << 24 )
262
+ k *= m
263
+ k ^= k >> r
264
+ k *= m
265
+ h *= m
266
+ h ^= k
267
+ }
268
+
269
+ // Handle the last few bytes of the input array
270
+ extra := length % 4
271
+ if extra >= 3 {
272
+ h ^= (uint32 (data [(length & ^ 3 )+ 2 ]) & 0xff ) << 16
273
+ }
274
+ if extra >= 2 {
275
+ h ^= (uint32 (data [(length & ^ 3 )+ 1 ]) & 0xff ) << 8
276
+ }
277
+ if extra >= 1 {
278
+ h ^= uint32 (data [length & ^ 3 ]) & 0xff
279
+ h *= m
280
+ }
281
+
282
+ h ^= h >> 13
283
+ h *= m
284
+ h ^= h >> 15
285
+
286
+ return h
287
+ }
0 commit comments