Skip to content

Commit de437bb

Browse files
test(arbitrary-misses): regression coverage for findings #4, #6, #7
Adds four RLTest cases that observably exercise the Bugbot-found bugs that previously had no automated coverage (verified only via local smoke tests): - test_arbitrary_spop_with_count_empty_array_is_miss (#7) Pre-loads 3 sets * 5 members, runs SPOP key COUNT 1; without the *0 null-sentinel fix, every call would record a hit. With the fix only ~15 destructive pops succeed and the rest report misses. - test_arbitrary_blpop_variadic_keys_single_bucket (#6) Pre-loads memtier-1 with enough items for every BLPOP call to return immediately, and verifies "Per-Key Misses" carries exactly key[0] (not key[0..2]). Without the fix, response->get_hits()=2 from the [key, value] reply would mark two of three buckets as hit and one as miss per call. - test_arbitrary_xread_keyword_spec_evaluates_correctly (#4 Keyword) Pre-loads 3 streams; XREAD COUNT 1 STREAMS __key__ 0 across a 10-key range must produce both hits and misses, proving the Keyword begin_search resolved correctly. Without the args[idx] fix, the STREAMS keyword search would land on the wrong slot. - test_arbitrary_eval_keynum_spec_runs_cleanly (#4 Keynum) EVAL return 1 1 __key__ exercises the Keynum begin_search path. EVAL is NotMissable, so we assert no Per-Key entry but Count > 0, proving resolve_command_meta read numkeys from the right slot. Findings #2 (lazy resize), #5 (empty bulk), #8 (alloc hygiene), #9 (mbulk NULL), #10 (default switch) and #11 (Range step>1) remain without dedicated tests because they're either parser-collapsed (#5), defensive/unreachable (#9, #10, #11), or non-functional (#8); documented in PR comments. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b32c19d commit de437bb

1 file changed

Lines changed: 142 additions & 0 deletions

File tree

tests/test_arbitrary_command_misses.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,145 @@ def test_arbitrary_set_not_missable_no_per_key_section(env):
257257
per_key = result["ALL STATS"].get("Per-Key Misses", {})
258258
# SET should not produce any per-key bookkeeping.
259259
env.assertNotContains("SET", per_key)
260+
261+
262+
def test_arbitrary_spop_with_count_empty_array_is_miss(env):
263+
"""SPOP key COUNT N returns *0 (empty array) on missing key, not null bulk.
264+
265+
Regression for Bugbot finding #7: the SingleNullBulk null-sentinel set
266+
was {$-1, *-1} only; *0 was misclassified as a hit. With the fix and
267+
pre-loaded 3 keys × 5 members each, destructive SPOPs should drain
268+
populated sets quickly (~15 hits total) and report the rest as misses.
269+
Without the fix, every call would record a hit.
270+
"""
271+
env.skipOnCluster()
272+
env.flush()
273+
conn = env.getConnection()
274+
# 3 keys pre-populated, each with 5 members. SPOP count=1 destroys
275+
# one member per successful call, so total successful pops = 15.
276+
for i in range(1, _PRELOADED_KEYS + 1):
277+
conn.sadd("{}{}".format(_KEY_PREFIX, i), "m1", "m2", "m3", "m4", "m5")
278+
279+
result = _run_benchmark(env, "SPOP __key__ 1")
280+
per_key = result["ALL STATS"].get("Per-Key Misses", {})
281+
env.assertContains("SPOP", per_key)
282+
cmd_stats = per_key["SPOP"]
283+
284+
total_ops = cmd_stats["Total Hits"] + cmd_stats["Total Misses"]
285+
env.assertEqual(total_ops, _REQUESTS * 2)
286+
# Misses MUST dominate. Without the *0 fix, Total Misses would be 0.
287+
env.assertTrue(cmd_stats["Total Misses"] > cmd_stats["Total Hits"],
288+
message="expected SPOP key COUNT misses on empty/missing keys "
289+
"(seeing {}/{} hits/misses; *0 may not be classified as miss)"
290+
.format(cmd_stats["Total Hits"], cmd_stats["Total Misses"]))
291+
292+
293+
def test_arbitrary_blpop_variadic_keys_single_bucket(env):
294+
"""BLPOP key1 key2 ... timeout returns [winning_key, value] on success.
295+
296+
Regression for Bugbot finding #6: the SingleNullBulk handler used
297+
response->get_hits() (which counts every non-null bulk recursively),
298+
causing BLPOP/BRPOP/BZPOPMAX/BZPOPMIN replies to inflate hit counts
299+
and report multiple per-key buckets. With the fix, BLPOP must always
300+
show exactly one bucket regardless of how many key slots the user
301+
supplied; we don't currently parse the winning key from the reply.
302+
"""
303+
env.skipOnCluster()
304+
env.flush()
305+
conn = env.getConnection()
306+
# Preload memtier-1 with enough items so every BLPOP call returns
307+
# immediately (the test fixes key range to [1,1] below to guarantee that).
308+
for _ in range(_REQUESTS * 2 + 10):
309+
conn.rpush("{}1".format(_KEY_PREFIX), "v")
310+
311+
test_dir = tempfile.mkdtemp()
312+
benchmark_specs = {
313+
"name": env.testName,
314+
# Three key slots + 1s timeout. Key range [1,1] makes every pick
315+
# land on the populated memtier-1 list, so BLPOP returns instantly.
316+
"args": [
317+
"--command=BLPOP __key__ __key__ __key__ 1",
318+
"--command-key-pattern=R",
319+
"--command-stats-breakdown=line",
320+
"--key-prefix={}".format(_KEY_PREFIX),
321+
"--key-minimum=1",
322+
"--key-maximum=1",
323+
"--hide-histogram",
324+
],
325+
}
326+
addTLSArgs(benchmark_specs, env)
327+
config = get_default_memtier_config(threads=1, clients=2, requests=_REQUESTS)
328+
add_required_env_arguments(benchmark_specs, config, env, env.getMasterNodesList())
329+
config = RunConfig(test_dir, env.testName, config, {})
330+
ensure_clean_benchmark_folder(config.results_dir)
331+
benchmark = Benchmark.from_json(config, benchmark_specs)
332+
env.assertTrue(benchmark.run())
333+
debugPrintMemtierOnError(config, env)
334+
335+
with open("{}/mb.json".format(config.results_dir)) as fh:
336+
result = json.load(fh)
337+
per_key = result["ALL STATS"].get("Per-Key Misses", {})
338+
env.assertContains("BLPOP", per_key)
339+
cmd_stats = per_key["BLPOP"]
340+
# Exactly one bucket, regardless of the 3 __key__ placeholders.
341+
# Without the fix, get_hits()=2 for [key,value] reply would mark two
342+
# buckets as hit and one as miss per call.
343+
env.assertContains("key[0] Hits", cmd_stats)
344+
env.assertNotContains("key[1] Hits", cmd_stats,
345+
message="BLPOP should produce a single bucket; per-key "
346+
"attribution beyond hit/miss isn't extracted")
347+
348+
349+
def test_arbitrary_xread_keyword_spec_evaluates_correctly(env):
350+
"""XREAD COUNT N STREAMS key id uses Keyword begin_search.
351+
352+
Regression for Bugbot finding #4 (Keyword/Keynum off-by-one in
353+
evaluate_key_spec). Before the fix, args[idx-1] always read the
354+
command name first, mis-locating the STREAMS keyword. With the fix
355+
XREAD's spec resolves correctly and miss tracking works against the
356+
SingleNullBulk shape (object reply on hit, null on miss).
357+
"""
358+
env.skipOnCluster()
359+
env.flush()
360+
conn = env.getConnection()
361+
# Preload 3 streams (memtier-1..3) each with one entry.
362+
for i in range(1, _PRELOADED_KEYS + 1):
363+
conn.execute_command("XADD", "{}{}".format(_KEY_PREFIX, i), "*", "f", "v")
364+
365+
result = _run_benchmark(env, "XREAD COUNT 1 STREAMS __key__ 0")
366+
per_key = result["ALL STATS"].get("Per-Key Misses", {})
367+
env.assertContains("XREAD", per_key)
368+
cmd_stats = per_key["XREAD"]
369+
total_ops = cmd_stats["Total Hits"] + cmd_stats["Total Misses"]
370+
env.assertEqual(total_ops, _REQUESTS * 2)
371+
# Both hits and misses must be observable, proving the Keyword spec
372+
# resolved correctly and SingleNullBulk routed status to is_null check.
373+
env.assertTrue(cmd_stats["Total Hits"] > 0,
374+
message="expected XREAD hits on populated streams")
375+
env.assertTrue(cmd_stats["Total Misses"] > 0,
376+
message="expected XREAD misses on missing streams")
377+
378+
379+
def test_arbitrary_eval_keynum_spec_runs_cleanly(env):
380+
"""EVAL has Keynum begin_search; verify resolve_command_meta doesn't crash.
381+
382+
EVAL's reply_shape is NotMissable (script return value is opaque), so
383+
no Per-Key entry should appear — but the Keynum spec evaluation must
384+
not abort the run. Regression for Bugbot finding #4 (Keynum path
385+
previously read the wrong arg via args[idx-1] = "script body" instead
386+
of args[idx] = "numkeys", causing strtol to fail).
387+
"""
388+
env.skipOnCluster()
389+
_preload_strings(env)
390+
391+
# Minimal script returning 1; numkeys=1 followed by one __key__.
392+
result = _run_benchmark(env, "EVAL return 1 1 __key__")
393+
all_stats = result["ALL STATS"]
394+
per_key = all_stats.get("Per-Key Misses", {})
395+
# NotMissable -> no per-key bookkeeping for EVAL.
396+
env.assertNotContains("EVAL", per_key)
397+
# But the run must have produced ops — proves EVAL command was sent
398+
# and parsed without the Keynum evaluator aborting startup.
399+
eval_entry = all_stats.get("Evals", {})
400+
env.assertTrue(eval_entry.get("Count", 0) > 0,
401+
message="expected EVAL to run (Keynum spec evaluation)")

0 commit comments

Comments
 (0)