Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ Rdbtools is a parser for Redis' dump.rdb files. The parser generates events simi
In addition, rdbtools provides utilities to :

1. Generate a Memory Report of your data across all databases and keys
2. Convert dump files to JSON
3. Compare two dump files using standard diff tools
2. Analyze memory used across keys using an interactive Memory Shell
3. Convert dump files to JSON
4. Compare two dump files using standard diff tools

Rdbtools is written in Python, though there are similar projects in other languages. See [FAQs](https://github.com/sripathikrishnan/redis-rdb-tools/wiki/FAQs) for more information.

Expand Down Expand Up @@ -84,6 +85,40 @@ NOTE :
1. This was added to redis-rdb-tools version 0.1.3
2. This command depends [redis-py](https://github.com/andymccurdy/redis-py) package

## Open an interactive Memory Shell ##

Sometimes you want to do a quick memory analysis across a few keys or even entire key namespaces, but summing up dump report values can be time consuming.

For these cases, RDB tools offers an interactive Memory Shell dubbed "MShell", which loads the approximate memory sizes of all keys into memory for quick querying.

*Beware, this can take a minute to prepare and be memory-consuming for large-scale dump files since it's all stored in memory.*

Start MShell :

rdb -c mshell dump.rdb

Welcome to RDB MShell!
mshell>

Get approximate memory usage for a specific key, `user:123:items` :

mshell> info user:123:items
25136234 bytes (2.829%)

Get a wildcard range across a specific namespace, all keys starting with `user:*` :

mshell> info user:*
710816090 bytes (80.000%)

See approximate memory usage of all keys :

mshell> info *
888520112 bytes (100.000%)


MShell also offers auto-complete support, which is helpful for when you don't remember exact key or namespace values.


## Comparing RDB files ##

First, use the --command diff option, and pipe the output to standard sort utility
Expand Down Expand Up @@ -171,4 +206,5 @@ Sripathi Krishnan : @srithedabbler
5. [Josep M. Pujol](https://github.com/solso)
6. [Charles Gordon](https://github.com/cgordon)
7. [Justin Poliey](https://github.com/jdp)
7. [Loisaida Sam Sandberg](https://github.com/loisaidasam)

4 changes: 2 additions & 2 deletions rdbtools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from rdbtools.parser import RdbCallback, RdbParser, DebugCallback
from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback
from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator
from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StoreAllKeysInMemory, StatsAggregator, MShell

__version__ = '0.1.6'
VERSION = tuple(map(int, __version__.split('.')))

__all__ = [
'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys']
'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys', 'StoreAllKeysInMemory', 'MShell']

12 changes: 9 additions & 3 deletions rdbtools/cli/rdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import sys
from optparse import OptionParser
from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys
from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, StoreAllKeysInMemory, MShell

VALID_TYPES = ("hash", "set", "string", "list", "sortedset")
def main():
Expand All @@ -12,7 +12,7 @@ def main():

parser = OptionParser(usage=usage)
parser.add_option("-c", "--command", dest="command",
help="Command to execute. Valid commands are json, diff, and protocol", metavar="FILE")
help="Command to execute. Valid commands are json, diff, protocol, and mshell", metavar="FILE")
parser.add_option("-f", "--file", dest="output",
help="Output file", metavar="FILE")
parser.add_option("-n", "--db", dest="dbs", action="append",
Expand Down Expand Up @@ -75,12 +75,18 @@ def main():
callback = MemoryCallback(reporter, 64)
elif 'protocol' == options.command:
callback = ProtocolCallback(sys.stdout)
elif 'mshell' == options.command:
reporter = StoreAllKeysInMemory()
callback = MemoryCallback(reporter, 64)
else:
raise Exception('Invalid Command %s' % options.command)

parser = RdbParser(callback, filters=filters)
parser.parse(dump_file)


if 'mshell' == options.command:
MShell(reporter).cmdloop()

if __name__ == '__main__':
main()

89 changes: 88 additions & 1 deletion rdbtools/memprofiler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

import cmd
from collections import namedtuple
import random
import json
Expand Down Expand Up @@ -76,6 +78,15 @@ def __init__(self, out):
def next_record(self, record) :
self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key),
record.bytes, record.encoding, record.size, record.len_largest_element))

class StoreAllKeysInMemory():
def __init__(self):
self.store = {}
self.total_bytes = 0

def next_record(self, record):
self.store[record.key] = record.bytes
self.total_bytes += record.bytes

class MemoryCallback(RdbCallback):
'''Calculates the memory used if this rdb file were loaded into RAM
Expand Down Expand Up @@ -336,4 +347,80 @@ def element_length(element):
return 16
else:
return len(element)



class MShell(cmd.Cmd):
def __init__(self, stream, limit_results=True):
'''MShell is a "Memory Shell" for getting info about memory consumption
of keys in the redis store.

`limit_results` is an optional parameter for limiting the auto-complete
results returned to the nearest following ':' character of the key
namespace. For example, if set to True and the stored keys are
"foo:bar:1", "foo:bar:2", and "foo:baz:1", and the input to the
auto-complete function is "foo:", then the returned results will be
"foo:bar:" and "foo:baz:" (instead of "foo:bar:1", "foo:bar:2",
"foo:baz:1", ... which can get rather large)
'''
self.store = stream.store
self.total_bytes = stream.total_bytes
self.limit_results = limit_results
cmd.Cmd.__init__(self)
self.prompt = "mshell> "
self.intro = "Welcome to RDB MShell!"

def do_info(self, key):
'''Get memory info about a specified key or pattern using * as the wildcard char
'''
if '*' in key:
# Wildcard match
subkey = key[:len(key)-1]
if '*' in subkey:
# TODO: Support better wildcarding
print "Only supporting tail wildcard matches right now (ex: 'foo:*')"
return
bytes = 0
for key in self.store.iterkeys():
if key.startswith(subkey):
bytes += self.store[key]
else:
# Exact match
if key not in self.store:
print "No such key"
return
bytes = self.store[key]
pct = 100.0 * bytes / self.total_bytes
print "%d bytes (%0.3f%%)" % (bytes, pct)

def complete_info(self, text, line, begidx, endidx):
'''Auto-complete functionality for `info` command

Caveat: when auto-complete input ends in ':' (a character that many
namespaces contain, as per redis common practice), `text` parameter
becomes an empty string, so `key_text` is extracted from the remainder
of the line starting after the "info " prefix.
'''
key_text = line[5:]
if not key_text:
return []
existing_prefix = ':'.join(key_text.split(':')[:-1])
if existing_prefix:
existing_prefix += ':'
results = set()
for key in self.store.iterkeys():
if not key.startswith(key_text):
continue
key_result = key[len(existing_prefix):]
if self.limit_results:
pos_of_colon = key_result.find(':')
if pos_of_colon != -1:
# If there is a colon, only add part up to the colon
key_result = key_result[:pos_of_colon] + ':'
results.add(key_result)
return sorted(results)

def do_EOF(self, line):
'''Support expected EOF behavior
'''
print ""
return True