sripathikrishnan · loisaidasam · Aug 21, 2014 · Aug 21, 2014 · Aug 21, 2014
diff --git a/README.md b/README.md
@@ -5,8 +5,9 @@ Rdbtools is a parser for Redis' dump.rdb files. The parser generates events simi
 In addition, rdbtools provides utilities to :
 
  1.  Generate a Memory Report of your data across all databases and keys
- 2.  Convert dump files to JSON
- 3.  Compare two dump files using standard diff tools
+ 2.  Analyze memory used across keys using an interactive Memory Shell
+ 3.  Convert dump files to JSON
+ 4.  Compare two dump files using standard diff tools
 
 Rdbtools is written in Python, though there are similar projects in other languages. See [FAQs](https://github.com/sripathikrishnan/redis-rdb-tools/wiki/FAQs) for more information.
 
@@ -84,6 +85,40 @@ NOTE :
 1. This was added to redis-rdb-tools version 0.1.3
 2. This command depends [redis-py](https://github.com/andymccurdy/redis-py) package
 
+## Open an interactive Memory Shell ##
+
+Sometimes you want to do a quick memory analysis across a few keys or even entire key namespaces, but summing up dump report values can be time consuming.
+
+For these cases, RDB tools offers an interactive Memory Shell dubbed "MShell", which loads the approximate memory sizes of all keys into memory for quick querying.
+
+*Beware, this can take a minute to prepare and be memory-consuming for large-scale dump files since it's all stored in memory.*
+
+Start MShell :
+
+    rdb -c mshell dump.rdb
+
+    Welcome to RDB MShell!
+    mshell> 
+
+Get approximate memory usage for a specific key, `user:123:items` :
+
+    mshell> info user:123:items
+    25136234 bytes (2.829%)
+
+Get a wildcard range across a specific namespace, all keys starting with `user:*` :
+
+    mshell> info user:*
+    710816090 bytes (80.000%)
+
+See approximate memory usage of all keys :
+
+    mshell> info *
+    888520112 bytes (100.000%)
+
+
+MShell also offers auto-complete support, which is helpful for when you don't remember exact key or namespace values.
+
+
 ## Comparing RDB files ##
 
 First, use the --command diff option, and pipe the output to standard sort utility
@@ -171,4 +206,5 @@ Sripathi Krishnan : @srithedabbler
  5. [Josep M. Pujol](https://github.com/solso)
  6. [Charles Gordon](https://github.com/cgordon)
  7. [Justin Poliey](https://github.com/jdp)
+ 7. [Loisaida Sam Sandberg](https://github.com/loisaidasam)
 
diff --git a/rdbtools/__init__.py b/rdbtools/__init__.py
@@ -1,10 +1,10 @@
 from rdbtools.parser import RdbCallback, RdbParser, DebugCallback
 from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback
-from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator
+from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StoreAllKeysInMemory, StatsAggregator, MShell
 
 __version__ = '0.1.6'
 VERSION = tuple(map(int, __version__.split('.')))
 
 __all__ = [
-    'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys']
+    'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys', 'StoreAllKeysInMemory', 'MShell']
 
diff --git a/rdbtools/cli/rdb.py b/rdbtools/cli/rdb.py
@@ -2,7 +2,7 @@
 import os
 import sys
 from optparse import OptionParser
-from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys
+from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, StoreAllKeysInMemory, MShell
 
 VALID_TYPES = ("hash", "set", "string", "list", "sortedset")
 def main():
@@ -12,7 +12,7 @@ def main():
 
     parser = OptionParser(usage=usage)
     parser.add_option("-c", "--command", dest="command",
-                  help="Command to execute. Valid commands are json, diff, and protocol", metavar="FILE")
+                  help="Command to execute. Valid commands are json, diff, protocol, and mshell", metavar="FILE")
     parser.add_option("-f", "--file", dest="output",
                   help="Output file", metavar="FILE")
     parser.add_option("-n", "--db", dest="dbs", action="append",
@@ -75,12 +75,18 @@ def main():
             callback = MemoryCallback(reporter, 64)
         elif 'protocol' == options.command:
             callback = ProtocolCallback(sys.stdout)
+        elif 'mshell' == options.command:
+            reporter = StoreAllKeysInMemory()
+            callback = MemoryCallback(reporter, 64)
         else:
             raise Exception('Invalid Command %s' % options.command)
 
         parser = RdbParser(callback, filters=filters)
         parser.parse(dump_file)
-
+
+        if 'mshell' == options.command:
+            MShell(reporter).cmdloop()
+
 if __name__ == '__main__':
     main()
 
diff --git a/rdbtools/memprofiler.py b/rdbtools/memprofiler.py
@@ -1,3 +1,5 @@
+
+import cmd
 from collections import namedtuple
 import random
 import json
@@ -76,6 +78,15 @@ def __init__(self, out):
     def next_record(self, record) :
         self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key), 
                                                  record.bytes, record.encoding, record.size, record.len_largest_element))
+
+class StoreAllKeysInMemory():
+    def __init__(self):
+        self.store = {}
+        self.total_bytes = 0
+
+    def next_record(self, record):
+        self.store[record.key] = record.bytes
+        self.total_bytes += record.bytes
 
 class MemoryCallback(RdbCallback):
     '''Calculates the memory used if this rdb file were loaded into RAM
@@ -336,4 +347,80 @@ def element_length(element):
         return 16
     else:
         return len(element)
-
+
+
+class MShell(cmd.Cmd):
+    def __init__(self, stream, limit_results=True):
+        '''MShell is a "Memory Shell" for getting info about memory consumption
+        of keys in the redis store.
+
+        `limit_results` is an optional parameter for limiting the auto-complete
+            results returned to the nearest following ':' character of the key
+            namespace. For example, if set to True and the stored keys are
+            "foo:bar:1", "foo:bar:2", and "foo:baz:1", and the input to the
+            auto-complete function is "foo:", then the returned results will be
+            "foo:bar:" and "foo:baz:" (instead of "foo:bar:1", "foo:bar:2",
+            "foo:baz:1", ... which can get rather large)
+        '''
+        self.store = stream.store
+        self.total_bytes = stream.total_bytes
+        self.limit_results = limit_results
+        cmd.Cmd.__init__(self)
+        self.prompt = "mshell> "
+        self.intro  = "Welcome to RDB MShell!"
+
+    def do_info(self, key):
+        '''Get memory info about a specified key or pattern using * as the wildcard char
+        '''
+        if '*' in key:
+            # Wildcard match
+            subkey = key[:len(key)-1]
+            if '*' in subkey:
+                # TODO: Support better wildcarding
+                print "Only supporting tail wildcard matches right now (ex: 'foo:*')"
+                return
+            bytes = 0
+            for key in self.store.iterkeys():
+                if key.startswith(subkey):
+                    bytes += self.store[key]
+        else:
+            # Exact match
+            if key not in self.store:
+                print "No such key"
+                return
+            bytes = self.store[key]
+        pct = 100.0 * bytes / self.total_bytes
+        print "%d bytes (%0.3f%%)" % (bytes, pct)
+
+    def complete_info(self, text, line, begidx, endidx):
+        '''Auto-complete functionality for `info` command
+
+        Caveat: when auto-complete input ends in ':' (a character that many
+        namespaces contain, as per redis common practice), `text` parameter
+        becomes an empty string, so `key_text` is extracted from the remainder
+        of the line starting after the "info " prefix.
+        '''
+        key_text = line[5:]
+        if not key_text:
+            return []
+        existing_prefix = ':'.join(key_text.split(':')[:-1])
+        if existing_prefix:
+            existing_prefix += ':'
+        results = set()
+        for key in self.store.iterkeys():
+            if not key.startswith(key_text):
+                continue
+            key_result = key[len(existing_prefix):]
+            if self.limit_results:
+                pos_of_colon = key_result.find(':')
+                if pos_of_colon != -1:
+                    # If there is a colon, only add part up to the colon
+                    key_result = key_result[:pos_of_colon] + ':'
+            results.add(key_result)
+        return sorted(results)
+
+    def do_EOF(self, line):
+        '''Support expected EOF behavior
+        '''
+        print ""
+        return True