redis
diff --git a/‎.clang-format‎
Lines changed: 4 additions & 4 deletions b/‎.clang-format‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎client.cpp‎
Lines changed: 12 additions & 4 deletions b/‎client.cpp‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎cluster_client.cpp‎
Lines changed: 122 additions & 0 deletions b/‎cluster_client.cpp‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎cluster_client.h‎
Lines changed: 11 additions & 0 deletions b/‎cluster_client.h‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎memtier_benchmark.cpp‎
Lines changed: 25 additions & 5 deletions b/‎memtier_benchmark.cpp‎
Lines changed: 25 additions & 5 deletions
diff --git a/‎memtier_benchmark.h‎
Lines changed: 20 additions & 0 deletions b/‎memtier_benchmark.h‎
Lines changed: 20 additions & 0 deletions
@@ -3,6 +3,10 @@
 
 BasedOnStyle: LLVM
 
+# C++ standard: keep Cpp03 so nested templates use "> >" (not ">>"),
+# which is required for the macOS C++03-compatible build.
+Standard: Cpp03
+
 # Indentation
 IndentWidth: 4
 TabWidth: 4
@@ -53,10 +57,6 @@ AllowShortFunctionsOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: WithoutElse
 AllowShortLoopsOnASingleLine: false
 
-# C++ standard: keep Cpp03 so nested templates use "> >" (not ">>"),
-# which is required for the macOS C++03-compatible build.
-Standard: Cpp03
-
 # Other
 AlignTrailingComments: true
 AlwaysBreakAfterReturnType: None
 
@@ -585,12 +585,13 @@ bool client::create_mget_request(struct timeval &timestamp, unsigned int conn_id
     m_keylist->clear();
     for (unsigned int i = 0; i < keys_count; i++) {
         get_key_response res = get_key_for_conn(GET_CMD_IDX, conn_id, &key_index);
-        /* Not supported in cluster mode */
-        assert(res == available_for_conn);
+        if (res != available_for_conn) continue;
 
         m_keylist->add_key(m_obj_gen->get_key(), m_obj_gen->get_key_len());
     }
 
+    if (m_keylist->get_keys_count() == 0) return false;
+
     m_connections[conn_id]->send_mget_command(&timestamp, m_keylist);
     return true;
 }
@@ -655,9 +656,16 @@ void client::create_request(struct timeval timestamp, unsigned int conn_id)
         }
 
         // MGET command
-        if (!create_mget_request(timestamp, conn_id)) return;
+        if (!create_mget_request(timestamp, conn_id)) {
+            // No MGET could be sent (e.g. this cluster connection owns no
+            // slots that map to the configured key range). Force the ratio
+            // counter past the threshold so the next create_request() call
+            // resets both counters instead of busy-spinning here forever.
+            m_get_ratio_count = m_config->ratio.b;
+            return;
+        }
 
-        m_get_ratio_count += m_config->multi_key_get;
+        m_get_ratio_count += m_keylist->get_keys_count();
         m_reqs_generated++;
     } else {
         // overlap counters
 
@@ -279,6 +279,68 @@ bool cluster_client::connect_shard_connection(shard_connection *sc, char *addres
     return res == 0;
 }
 
+void cluster_client::build_mget_slot_cache()
+{
+    if (!m_config->multi_key_get) return;
+
+    mget_slot_cache *cache = m_config->mget_cache;
+    assert(cache != NULL);
+
+    unsigned int num_conns = (unsigned int) m_connections.size();
+
+    // Slot→key mapping is topology-independent: build it once across all threads.
+    pthread_mutex_lock(&cache->mutex);
+    if (!cache->built.load(std::memory_order_relaxed)) {
+        unsigned long long key_min = m_config->key_minimum;
+        unsigned long long key_max = m_config->key_maximum;
+
+        // Cap per-slot storage: multi_key_get * 4, bounded to [multi_key_get, 4096].
+        // This bounds both memory and scan time regardless of key range size.
+        unsigned int cap = (unsigned int) m_config->multi_key_get * 4;
+        if (cap > 4096) cap = 4096;
+        if (cap < (unsigned int) m_config->multi_key_get) cap = (unsigned int) m_config->multi_key_get;
+
+        benchmark_error_log("Building MGET slot cache for key range [%llu, %llu] "
+                            "(cap %u keys/slot)...\n",
+                            key_min, key_max, cap);
+
+        cache->slot_keys.assign(MAX_CLUSTER_HSLOT + 1, std::vector<unsigned long long>());
+
+        unsigned int filled_slots = 0;
+        for (unsigned long long idx = key_min; idx <= key_max && filled_slots < MAX_CLUSTER_HSLOT + 1; idx++) {
+            m_obj_gen->generate_key(idx);
+            unsigned int slot = calc_hslot_crc16_with_hash_tag(m_obj_gen->get_key(), m_obj_gen->get_key_len());
+            if (cache->slot_keys[slot].size() < cap) {
+                cache->slot_keys[slot].push_back(idx);
+                if (cache->slot_keys[slot].size() == cap) filled_slots++;
+            }
+        }
+
+        cache->built.store(true, std::memory_order_release);
+
+        // Count slots that ended up with at least one key (informational).
+        unsigned int populated = 0;
+        for (unsigned int s = 0; s <= MAX_CLUSTER_HSLOT; s++) {
+            if (!cache->slot_keys[s].empty()) populated++;
+        }
+        benchmark_error_log("MGET slot cache built: %u/%u slots populated.\n", populated, MAX_CLUSTER_HSLOT + 1);
+    }
+    pthread_mutex_unlock(&cache->mutex);
+
+    // Per-thread cursor: one entry per slot, sized to match the shared table.
+    m_mget_slot_cursor.assign(MAX_CLUSTER_HSLOT + 1, 0);
+
+    // Conn→slot mapping depends on topology: rebuild on every refresh.
+    m_mget_conn_slots.assign(num_conns, std::vector<unsigned int>());
+    m_mget_conn_slot_cursor.assign(num_conns, 0);
+
+    for (unsigned int slot = 0; slot <= MAX_CLUSTER_HSLOT; slot++) {
+        if (cache->slot_keys[slot].empty()) continue;
+        unsigned int cid = m_slot_to_shard[slot];
+        if (cid < num_conns) m_mget_conn_slots[cid].push_back(slot);
+    }
+}
+
 void cluster_client::handle_cluster_slots(protocol_response *r)
 {
     /*
@@ -362,6 +424,19 @@ void cluster_client::handle_cluster_slots(protocol_response *r)
             }
         }
     }
+
+    // Rebuild same-slot key index cache for MGET if enabled.
+    build_mget_slot_cache();
+
+    // Wake all connected shard connections so each one re-evaluates hold_pipeline()
+    // with the freshly-built m_mget_conn_slots.  Without this, a connection that
+    // was bufferevent_disable()'d before the cache existed would never re-run
+    // fill_pipeline() and would stay permanently idle.
+    if (m_config->multi_key_get > 0) {
+        for (size_t i = 0; i < m_connections.size(); i++) {
+            if (m_connections[i]->get_connection_state() != conn_disconnected) m_connections[i]->schedule_fill();
+        }
+    }
 }
 
 bool cluster_client::hold_pipeline(unsigned int conn_id)
@@ -392,6 +467,17 @@ bool cluster_client::hold_pipeline(unsigned int conn_id)
         }
     }
 
+    /* In GET-only MGET mode, a connection whose slots own no keys in the
+     * configured key range can never generate a request.  Returning true here
+     * breaks the fill_pipeline while-loop for that connection so it does not
+     * spin consuming CPU.  Other connections (which do have eligible slots)
+     * continue to operate normally. */
+    if (m_config->multi_key_get > 0 && m_config->ratio.a == 0 && m_config->mget_cache != NULL &&
+        m_config->mget_cache->built.load(std::memory_order_acquire) && conn_id < m_mget_conn_slots.size() &&
+        m_mget_conn_slots[conn_id].empty() && m_staged_monitor_commands[conn_id].empty()) {
+        return true;
+    }
+
     /* In transaction mode the pin connection drives the entire rotation.
      * Non-pin connections must not spin in fill_pipeline; they will be
      * rescheduled via schedule_fill() when the pin is cleared. If the pin
@@ -649,6 +735,42 @@ bool cluster_client::create_arbitrary_request(unsigned int command_index, struct
     return true;
 }
 
+bool cluster_client::create_mget_request(struct timeval &timestamp, unsigned int conn_id)
+{
+    // Only reached when --multi-key-get is set.
+    // Use the pre-built slot cache so all N keys in this MGET share one hash
+    // slot — Redis requires exact same-slot (not just same-node) for MGET in
+    // cluster mode. Cache is rebuilt on every topology change via
+    // build_mget_slot_cache() at the end of handle_cluster_slots().
+    unsigned int keys_count = m_config->ratio.b - m_get_ratio_count;
+    if ((int) keys_count > m_config->multi_key_get) keys_count = m_config->multi_key_get;
+    if (keys_count == 0) return false;
+
+    if (conn_id >= m_mget_conn_slots.size() || m_mget_conn_slots[conn_id].empty()) {
+        // Cache not ready or no key in the configured range maps to this shard.
+        return false;
+    }
+
+    // Round-robin over the slots owned by this connection.
+    size_t &sc = m_mget_conn_slot_cursor[conn_id];
+    unsigned int target_slot = m_mget_conn_slots[conn_id][sc % m_mget_conn_slots[conn_id].size()];
+    sc++;
+
+    std::vector<unsigned long long> &slot_keys = m_config->mget_cache->slot_keys[target_slot];
+    size_t &kc = m_mget_slot_cursor[target_slot];
+
+    m_keylist->clear();
+    for (unsigned int i = 0; i < keys_count; i++) {
+        unsigned long long idx = slot_keys[kc % slot_keys.size()];
+        kc++;
+        m_obj_gen->generate_key(idx);
+        m_keylist->add_key(m_obj_gen->get_key(), m_obj_gen->get_key_len());
+    }
+
+    m_connections[conn_id]->send_mget_command(&timestamp, m_keylist);
+    return true;
+}
+
 void cluster_client::create_request(struct timeval timestamp, unsigned int conn_id)
 {
     /* Drain staged monitor commands that were routed here from another shard connection. */
 
@@ -22,6 +22,7 @@
 #include <set>
 #include <queue>
 #include <string>
+#include <vector>
 #include "client.h"
 
 typedef std::queue<unsigned long long> key_index_pool;
@@ -58,6 +59,15 @@ class cluster_client : public client
     // Set when we emit the "pin connection lost mid-rotation" warning so we
     // don't spam it on every hold_pipeline() call during the outage.
     bool m_txn_pin_lost_warned;
+    // Per-slot key index cache for cluster MGET. The slot→key table
+    // (slot_keys) is shared across threads via m_config->mget_cache and is
+    // read-only after the first thread builds it.  Only the per-slot
+    // round-robin cursor (m_mget_slot_cursor) and the per-connection slot
+    // list (m_mget_conn_slots) are per-thread.
+    std::vector<size_t> m_mget_slot_cursor;                    // [slot] → per-thread round-robin cursor
+    std::vector<std::vector<unsigned int> > m_mget_conn_slots; // [conn] → owned slot list
+    std::vector<size_t> m_mget_conn_slot_cursor;               // [conn] → slot round-robin cursor
+    void build_mget_slot_cache();
 
     virtual int connect(void);
     virtual void disconnect(void);
@@ -89,6 +99,7 @@ class cluster_client : public client
     virtual get_key_response get_key_for_conn(unsigned int command_index, unsigned int conn_id,
                                               unsigned long long *key_index);
     virtual bool create_arbitrary_request(unsigned int command_index, struct timeval &timestamp, unsigned int conn_id);
+    virtual bool create_mget_request(struct timeval &timestamp, unsigned int conn_id);
 
     // client manager api's
     virtual void handle_cluster_slots(protocol_response *r);
 
@@ -561,6 +561,7 @@ static int parse_uri(const char *uri, struct benchmark_config *cfg)
 
 static void config_init_defaults(struct benchmark_config *cfg)
 {
+    cfg->mget_cache = NULL;
     if (!cfg->server && !cfg->unix_socket) cfg->server = "localhost";
     if (!cfg->port && !cfg->unix_socket) cfg->port = 6379;
     if (!cfg->resolution) cfg->resolution = AF_UNSPEC;
@@ -616,9 +617,6 @@ static bool verify_cluster_option(struct benchmark_config *cfg)
     if (cfg->reconnect_interval) {
         fprintf(stderr, "error: cluster mode dose not support reconnect-interval option.\n");
         return false;
-    } else if (cfg->multi_key_get) {
-        fprintf(stderr, "error: cluster mode dose not support multi-key-get option.\n");
-        return false;
     } else if (cfg->wait_ratio.is_defined()) {
         fprintf(stderr, "error: cluster mode dose not support wait-ratio option.\n");
         return false;
@@ -1252,7 +1250,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
         case o_multi_key_get:
             endptr = NULL;
             cfg->multi_key_get = (unsigned int) strtoul(optarg, &endptr, 10);
-            if (cfg->multi_key_get <= 0 || !endptr || *endptr != '\0') {
+            if (cfg->multi_key_get < 1 || !endptr || *endptr != '\0') {
                 fprintf(stderr, "error: multi-key-get must be greater than zero.\n");
                 return -1;
             }
@@ -1724,7 +1722,9 @@ void usage()
         "(default: 0)\n"
         "      --thread-conn-start-max-jitter-micros=NUM Maximum jitter in microseconds between connection creation "
         "(default: 0)\n"
-        "      --multi-key-get=NUM        Enable multi-key get commands, up to NUM keys (default: 0)\n"
+        "      --multi-key-get=NUM        Enable multi-key get commands, up to NUM keys (default: 0).\n"
+        "                                 In cluster mode, keys are probed from the key space so that all\n"
+        "                                 keys in one batch route to the same shard (no hash-tag prefix).\n"
         "      --select-db=DB             DB number to select, when testing a redis server\n"
         "      --distinct-client-seed     Use a different random seed for each client\n"
         "      --randomize                random seed based on timestamp (default is constant value)\n"
@@ -2117,6 +2117,13 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj
 {
     fprintf(stderr, "[RUN #%u] Preparing benchmark client...\n", run_id);
 
+    // Shared MGET slot cache: allocate fresh for this run so the lazy build
+    // inside build_mget_slot_cache() fires again (topology may have changed).
+    if (cfg->cluster_mode && cfg->multi_key_get > 0) {
+        delete cfg->mget_cache;
+        cfg->mget_cache = new mget_slot_cache();
+    }
+
     // prepare threads data
     std::vector<cg_thread *> threads;
     g_threads = &threads; // Set global pointer for crash handler
@@ -2127,6 +2134,8 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj
 
         if (t->prepare() < 0) {
             benchmark_error_log("error: failed to prepare thread %u for test.\n", i);
+            delete cfg->mget_cache;
+            cfg->mget_cache = NULL;
             exit(1);
         }
         threads.push_back(t);
@@ -2562,6 +2571,9 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj
 
     g_threads = NULL; // Clear global pointer
 
+    delete cfg->mget_cache;
+    cfg->mget_cache = NULL;
+
     return stats;
 }
 
@@ -3196,6 +3208,14 @@ int main(int argc, char *argv[])
         fprintf(stderr, "error: select-db can only be used with redis protocol.\n");
         usage();
     }
+    if (cfg.multi_key_get > 0 && cfg.protocol == PROTOCOL_MEMCACHE_BINARY) {
+        fprintf(stderr, "error: --multi-key-get is not supported with memcache_binary.\n");
+        usage();
+    }
+    if (cfg.multi_key_get > 0 && cfg.arbitrary_commands->is_defined()) {
+        fprintf(stderr, "error: --multi-key-get cannot be combined with --command.\n");
+        usage();
+    }
     if (cfg.data_offset > 0) {
         if (cfg.data_offset > (1 << 29) - 1) {
             fprintf(stderr, "error: data-offset too long\n");
 
@@ -19,8 +19,10 @@
 #ifndef _MEMTIER_BENCHMARK_H
 #define _MEMTIER_BENCHMARK_H
 
+#include <atomic>
 #include <vector>
 #include <sys/time.h>
+#include <pthread.h>
 #include "config_types.h"
 
 #ifdef USE_TLS
@@ -53,6 +55,23 @@ enum PROTOCOL_TYPE
     PROTOCOL_MEMCACHE_BINARY,
 };
 
+// Shared MGET slot cache: built once (lazily, on first topology load) and
+// read concurrently by all cluster_client threads.  m_mget_slot_keys is
+// identical for every thread — only the per-slot round-robin cursors differ.
+struct mget_slot_cache
+{
+    std::vector<std::vector<unsigned long long> > slot_keys; // [slot] → key indices; read-only after built
+    std::atomic<bool> built;
+    pthread_mutex_t mutex;
+
+    mget_slot_cache() : built(false) { pthread_mutex_init(&mutex, NULL); }
+    ~mget_slot_cache() { pthread_mutex_destroy(&mutex); }
+
+private:
+    mget_slot_cache(const mget_slot_cache &);
+    mget_slot_cache &operator=(const mget_slot_cache &);
+};
+
 struct benchmark_config
 {
     const char *server;
@@ -124,6 +143,7 @@ struct benchmark_config
     unsigned int thread_conn_start_min_jitter_micros;
     unsigned int thread_conn_start_max_jitter_micros;
     int multi_key_get;
+    struct mget_slot_cache *mget_cache; // NULL unless cluster_mode && multi_key_get > 0
     const char *authenticate;
     int select_db;
     const char *uri;