diff --git a/README.md b/README.md index fcf2841..84f8126 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ Rdbtools is a parser for Redis' dump.rdb files. The parser generates events simi In addition, rdbtools provides utilities to : 1. Generate a Memory Report of your data across all databases and keys - 2. Convert dump files to JSON - 3. Compare two dump files using standard diff tools + 2. Analyze memory used across keys using an interactive Memory Shell + 3. Convert dump files to JSON + 4. Compare two dump files using standard diff tools Rdbtools is written in Python, though there are similar projects in other languages. See [FAQs](https://github.com/sripathikrishnan/redis-rdb-tools/wiki/FAQs) for more information. @@ -84,6 +85,40 @@ NOTE : 1. This was added to redis-rdb-tools version 0.1.3 2. This command depends [redis-py](https://github.com/andymccurdy/redis-py) package +## Open an interactive Memory Shell ## + +Sometimes you want to do a quick memory analysis across a few keys or even entire key namespaces, but summing up dump report values can be time consuming. + +For these cases, RDB tools offers an interactive Memory Shell dubbed "MShell", which loads the approximate memory sizes of all keys into memory for quick querying. + +*Beware, this can take a minute to prepare and be memory-consuming for large-scale dump files since it's all stored in memory.* + +Start MShell : + + rdb -c mshell dump.rdb + + Welcome to RDB MShell! + mshell> + +Get approximate memory usage for a specific key, `user:123:items` : + + mshell> info user:123:items + 25136234 bytes (2.829%) + +Get a wildcard range across a specific namespace, all keys starting with `user:*` : + + mshell> info user:* + 710816090 bytes (80.000%) + +See approximate memory usage of all keys : + + mshell> info * + 888520112 bytes (100.000%) + + +MShell also offers auto-complete support, which is helpful for when you don't remember exact key or namespace values. + + ## Comparing RDB files ## First, use the --command diff option, and pipe the output to standard sort utility @@ -171,4 +206,5 @@ Sripathi Krishnan : @srithedabbler 5. [Josep M. Pujol](https://github.com/solso) 6. [Charles Gordon](https://github.com/cgordon) 7. [Justin Poliey](https://github.com/jdp) + 7. [Loisaida Sam Sandberg](https://github.com/loisaidasam) diff --git a/rdbtools/__init__.py b/rdbtools/__init__.py index e3fe8b2..449811f 100644 --- a/rdbtools/__init__.py +++ b/rdbtools/__init__.py @@ -1,10 +1,10 @@ from rdbtools.parser import RdbCallback, RdbParser, DebugCallback from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback -from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator +from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StoreAllKeysInMemory, StatsAggregator, MShell __version__ = '0.1.6' VERSION = tuple(map(int, __version__.split('.'))) __all__ = [ - 'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys'] + 'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys', 'StoreAllKeysInMemory', 'MShell'] diff --git a/rdbtools/cli/rdb.py b/rdbtools/cli/rdb.py index 348102f..508c31d 100755 --- a/rdbtools/cli/rdb.py +++ b/rdbtools/cli/rdb.py @@ -2,7 +2,7 @@ import os import sys from optparse import OptionParser -from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys +from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, StoreAllKeysInMemory, MShell VALID_TYPES = ("hash", "set", "string", "list", "sortedset") def main(): @@ -12,7 +12,7 @@ def main(): parser = OptionParser(usage=usage) parser.add_option("-c", "--command", dest="command", - help="Command to execute. Valid commands are json, diff, and protocol", metavar="FILE") + help="Command to execute. Valid commands are json, diff, protocol, and mshell", metavar="FILE") parser.add_option("-f", "--file", dest="output", help="Output file", metavar="FILE") parser.add_option("-n", "--db", dest="dbs", action="append", @@ -75,12 +75,18 @@ def main(): callback = MemoryCallback(reporter, 64) elif 'protocol' == options.command: callback = ProtocolCallback(sys.stdout) + elif 'mshell' == options.command: + reporter = StoreAllKeysInMemory() + callback = MemoryCallback(reporter, 64) else: raise Exception('Invalid Command %s' % options.command) parser = RdbParser(callback, filters=filters) parser.parse(dump_file) - + + if 'mshell' == options.command: + MShell(reporter).cmdloop() + if __name__ == '__main__': main() diff --git a/rdbtools/memprofiler.py b/rdbtools/memprofiler.py index 3a80ba2..d3410be 100644 --- a/rdbtools/memprofiler.py +++ b/rdbtools/memprofiler.py @@ -1,3 +1,5 @@ + +import cmd from collections import namedtuple import random import json @@ -76,6 +78,15 @@ def __init__(self, out): def next_record(self, record) : self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key), record.bytes, record.encoding, record.size, record.len_largest_element)) + +class StoreAllKeysInMemory(): + def __init__(self): + self.store = {} + self.total_bytes = 0 + + def next_record(self, record): + self.store[record.key] = record.bytes + self.total_bytes += record.bytes class MemoryCallback(RdbCallback): '''Calculates the memory used if this rdb file were loaded into RAM @@ -336,4 +347,80 @@ def element_length(element): return 16 else: return len(element) - + + +class MShell(cmd.Cmd): + def __init__(self, stream, limit_results=True): + '''MShell is a "Memory Shell" for getting info about memory consumption + of keys in the redis store. + + `limit_results` is an optional parameter for limiting the auto-complete + results returned to the nearest following ':' character of the key + namespace. For example, if set to True and the stored keys are + "foo:bar:1", "foo:bar:2", and "foo:baz:1", and the input to the + auto-complete function is "foo:", then the returned results will be + "foo:bar:" and "foo:baz:" (instead of "foo:bar:1", "foo:bar:2", + "foo:baz:1", ... which can get rather large) + ''' + self.store = stream.store + self.total_bytes = stream.total_bytes + self.limit_results = limit_results + cmd.Cmd.__init__(self) + self.prompt = "mshell> " + self.intro = "Welcome to RDB MShell!" + + def do_info(self, key): + '''Get memory info about a specified key or pattern using * as the wildcard char + ''' + if '*' in key: + # Wildcard match + subkey = key[:len(key)-1] + if '*' in subkey: + # TODO: Support better wildcarding + print "Only supporting tail wildcard matches right now (ex: 'foo:*')" + return + bytes = 0 + for key in self.store.iterkeys(): + if key.startswith(subkey): + bytes += self.store[key] + else: + # Exact match + if key not in self.store: + print "No such key" + return + bytes = self.store[key] + pct = 100.0 * bytes / self.total_bytes + print "%d bytes (%0.3f%%)" % (bytes, pct) + + def complete_info(self, text, line, begidx, endidx): + '''Auto-complete functionality for `info` command + + Caveat: when auto-complete input ends in ':' (a character that many + namespaces contain, as per redis common practice), `text` parameter + becomes an empty string, so `key_text` is extracted from the remainder + of the line starting after the "info " prefix. + ''' + key_text = line[5:] + if not key_text: + return [] + existing_prefix = ':'.join(key_text.split(':')[:-1]) + if existing_prefix: + existing_prefix += ':' + results = set() + for key in self.store.iterkeys(): + if not key.startswith(key_text): + continue + key_result = key[len(existing_prefix):] + if self.limit_results: + pos_of_colon = key_result.find(':') + if pos_of_colon != -1: + # If there is a colon, only add part up to the colon + key_result = key_result[:pos_of_colon] + ':' + results.add(key_result) + return sorted(results) + + def do_EOF(self, line): + '''Support expected EOF behavior + ''' + print "" + return True