Skip to content

Commit a2c48c0

Browse files
authored
Restructure redis-core to follow the agentskills.io spec (#20)
* chore(validate): allow evals/ directory in spec-format skills * feat(redis-core): add spec-compliant skill for data modeling * feat(redis-core): move eval suite from redis-development * chore(eval): show generation cost only in skill-vs-baseline comparison * docs: note agentskills.io migration in root AGENTS.md
1 parent 9032f7e commit a2c48c0

19 files changed

Lines changed: 923 additions & 543 deletions

File tree

AGENTS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ This file provides guidance to AI coding agents (Claude Code, Cursor, Copilot, e
66

77
A collection of skills for AI coding agents working with Redis. Skills are packaged instructions and resources that extend agent capabilities.
88

9+
## Skill Format
10+
11+
New skills follow the [agentskills.io specification](https://agentskills.io/specification): a `SKILL.md` with required `name` and `description` frontmatter, plus optional `references/`, `scripts/`, and `assets/`. See [skills/redis-core/](skills/redis-core/) for the reference layout.
12+
13+
`skills/redis-development/` is the legacy compiled layout (`rules/` + generated `AGENTS.md`) and is being migrated category-by-category to the spec layout. The instructions below for `rules/`, the build system, and config wiring apply only to that legacy skill until the migration is complete.
14+
915
## Creating a New Skill
1016

1117
### Directory Structure

packages/redis-development-build/src/eval/aggregate.ts

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ interface BaselineOverallReport {
9494
mean_delta_pass_rate?: number;
9595
mean_delta_tokens?: number;
9696
mean_delta_time_seconds?: number;
97-
total_cost_usd?: number;
9897
mean_delta_cost_usd?: number;
9998
}
10099

@@ -106,7 +105,6 @@ interface BaselineModelReport {
106105
time_seconds?: number;
107106
};
108107
cost?: {
109-
total_usd?: number;
110108
delta_usd?: number;
111109
};
112110
verdict?: string;
@@ -609,7 +607,8 @@ async function summarizeModelCost(
609607
}
610608
}
611609

612-
const meanWithSkillUsd = withSkillRuns === 0 ? 0 : withSkillUsd / withSkillRuns;
610+
const meanWithSkillUsd =
611+
withSkillRuns === 0 ? 0 : withSkillUsd / withSkillRuns;
613612
const meanWithoutSkillUsd =
614613
withoutSkillRuns === 0 ? 0 : withoutSkillUsd / withoutSkillRuns;
615614

@@ -714,9 +713,7 @@ function normalizedEvalName(
714713
return trimmed;
715714
}
716715

717-
function summarizeOverall(
718-
modelSummaries: ModelSummary[],
719-
) {
716+
function summarizeOverall(modelSummaries: ModelSummary[]) {
720717
return {
721718
models: modelSummaries.length,
722719
total_cost_usd: roundUsd(
@@ -729,7 +726,10 @@ function summarizeOverall(
729726
),
730727
),
731728
grading_cost_usd: roundUsd(
732-
modelSummaries.reduce((sum, summary) => sum + summary.cost.grading_usd, 0),
729+
modelSummaries.reduce(
730+
(sum, summary) => sum + summary.cost.grading_usd,
731+
0,
732+
),
733733
),
734734
mean_delta_pass_rate: mean(
735735
modelSummaries.map((summary) => summary.delta.pass_rate),
@@ -746,8 +746,8 @@ function summarizeOverall(
746746
models_improved: modelSummaries.filter(
747747
(summary) => summary.verdict === "improves",
748748
).length,
749-
models_neutral: modelSummaries.filter(
750-
(summary) => isNeutralVerdict(summary.verdict),
749+
models_neutral: modelSummaries.filter((summary) =>
750+
isNeutralVerdict(summary.verdict),
751751
).length,
752752
models_degraded: modelSummaries.filter(
753753
(summary) => summary.verdict === "degrades",
@@ -762,7 +762,6 @@ function currentOverallSnapshot(
762762
mean_pass_delta: overall.mean_delta_pass_rate,
763763
mean_token_delta: overall.mean_delta_tokens,
764764
mean_time_delta_seconds: overall.mean_delta_time_seconds,
765-
total_cost_usd: overall.total_cost_usd,
766765
mean_cost_delta_usd: overall.mean_delta_cost_usd,
767766
};
768767
}
@@ -774,7 +773,6 @@ function baselineOverallSnapshot(
774773
mean_pass_delta: numberOrZero(overall?.mean_delta_pass_rate),
775774
mean_token_delta: numberOrZero(overall?.mean_delta_tokens),
776775
mean_time_delta_seconds: numberOrZero(overall?.mean_delta_time_seconds),
777-
total_cost_usd: numberOrZero(overall?.total_cost_usd),
778776
mean_cost_delta_usd: numberOrZero(overall?.mean_delta_cost_usd),
779777
};
780778
}
@@ -788,7 +786,6 @@ function subtractOverallSnapshot(
788786
mean_token_delta: current.mean_token_delta - baseline.mean_token_delta,
789787
mean_time_delta_seconds:
790788
current.mean_time_delta_seconds - baseline.mean_time_delta_seconds,
791-
total_cost_usd: current.total_cost_usd - baseline.total_cost_usd,
792789
mean_cost_delta_usd:
793790
current.mean_cost_delta_usd - baseline.mean_cost_delta_usd,
794791
};
@@ -799,7 +796,6 @@ function currentModelSnapshot(summary: ModelSummary): BaselineModelSnapshot {
799796
pass_delta: summary.delta.pass_rate,
800797
token_delta: summary.delta.tokens,
801798
time_delta_seconds: summary.delta.time_seconds,
802-
total_cost_usd: summary.cost.total_usd,
803799
cost_delta_usd: summary.cost.delta_usd,
804800
};
805801
}
@@ -811,7 +807,6 @@ function baselineModelSnapshot(
811807
pass_delta: numberOrZero(model.delta?.pass_rate),
812808
token_delta: numberOrZero(model.delta?.tokens),
813809
time_delta_seconds: numberOrZero(model.delta?.time_seconds),
814-
total_cost_usd: numberOrZero(model.cost?.total_usd),
815810
cost_delta_usd: numberOrZero(model.cost?.delta_usd),
816811
};
817812
}
@@ -823,8 +818,8 @@ function subtractModelSnapshot(
823818
return {
824819
pass_delta: current.pass_delta - baseline.pass_delta,
825820
token_delta: current.token_delta - baseline.token_delta,
826-
time_delta_seconds: current.time_delta_seconds - baseline.time_delta_seconds,
827-
total_cost_usd: current.total_cost_usd - baseline.total_cost_usd,
821+
time_delta_seconds:
822+
current.time_delta_seconds - baseline.time_delta_seconds,
828823
cost_delta_usd: current.cost_delta_usd - baseline.cost_delta_usd,
829824
};
830825
}
@@ -843,7 +838,9 @@ function summarizeRuns(runs: BenchmarkRun[]): {
843838
};
844839
}
845840

846-
function summarizeRunSummaries(summaries: Array<ReturnType<typeof summarizeRuns>>): {
841+
function summarizeRunSummaries(
842+
summaries: Array<ReturnType<typeof summarizeRuns>>,
843+
): {
847844
count: number;
848845
pass_rate: number;
849846
time_seconds: number;
@@ -1034,12 +1031,6 @@ function renderBaselineMarkdown(comparison: BaselineComparison): string {
10341031
`${signedNumber(comparison.overall.current.mean_time_delta_seconds, 1)}s`,
10351032
`${signedNumber(comparison.overall.change.mean_time_delta_seconds, 1)}s`,
10361033
],
1037-
[
1038-
"Total eval cost",
1039-
formatUsd(comparison.overall.baseline.total_cost_usd),
1040-
formatUsd(comparison.overall.current.total_cost_usd),
1041-
signedUsd(comparison.overall.change.total_cost_usd),
1042-
],
10431034
[
10441035
"Mean cost delta",
10451036
signedUsd(comparison.overall.baseline.mean_cost_delta_usd),

packages/redis-development-build/src/eval/html-template.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,13 @@ export interface BaselineOverallSnapshot {
9393
mean_pass_delta: number;
9494
mean_token_delta: number;
9595
mean_time_delta_seconds: number;
96-
total_cost_usd: number;
9796
mean_cost_delta_usd: number;
9897
}
9998

10099
export interface BaselineModelSnapshot {
101100
pass_delta: number;
102101
token_delta: number;
103102
time_delta_seconds: number;
104-
total_cost_usd: number;
105103
cost_delta_usd: number;
106104
}
107105

@@ -772,7 +770,6 @@ function renderBaselineSection(comparison: BaselineComparison): string {
772770
${summaryMetric("Pass Delta Change", "Current mean pass delta minus baseline mean pass delta. Positive means this run improved the skill uplift versus the baseline.", signedPercent(comparison.overall.change.mean_pass_delta), deltaClass(comparison.overall.change.mean_pass_delta, "pass_rate"))}
773771
${summaryMetric("Token Delta Change", "Current mean token delta minus baseline mean token delta. Negative means this run reduced token overhead versus the baseline.", signedNumber(comparison.overall.change.mean_token_delta, 0), deltaClass(comparison.overall.change.mean_token_delta, "tokens"))}
774772
${summaryMetric("Time Delta Change", "Current mean time delta minus baseline mean time delta. Negative means this run reduced runtime overhead versus the baseline.", `${signedNumber(comparison.overall.change.mean_time_delta_seconds, 1)}s`, deltaClass(comparison.overall.change.mean_time_delta_seconds, "time_seconds"))}
775-
${summaryMetric("Total Cost Change", "Current total eval cost minus baseline total eval cost. Negative means this run was cheaper than the baseline.", signedUsd(comparison.overall.change.total_cost_usd), deltaClass(comparison.overall.change.total_cost_usd, "cost_usd"))}
776773
${summaryMetric("Cost Delta Change", "Current mean cost delta minus baseline mean cost delta. Negative means the with-skill cost overhead improved versus the baseline.", signedUsd(comparison.overall.change.mean_cost_delta_usd), deltaClass(comparison.overall.change.mean_cost_delta_usd, "cost_usd"))}
777774
</section>
778775
<div class="panel">
@@ -821,7 +818,6 @@ function renderLegend(input: AggregateHtmlInput): string {
821818
addTone(baseline.overall.change.mean_pass_delta, "pass_rate");
822819
addTone(baseline.overall.change.mean_token_delta, "tokens");
823820
addTone(baseline.overall.change.mean_time_delta_seconds, "time_seconds");
824-
addTone(baseline.overall.change.total_cost_usd, "cost_usd");
825821
addTone(baseline.overall.change.mean_cost_delta_usd, "cost_usd");
826822
for (const model of baseline.models) {
827823
if (!model.change) continue;

scripts/validate-skill-structure.mjs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ import process from "node:process";
88
const repoRoot = process.cwd();
99
const skillsRoot = path.join(repoRoot, "skills");
1010

11+
// Subdirectories that exist for repo tooling (not the agent runtime) and that
12+
// skill-validator should accept without "unknown directory" warnings. The
13+
// agentskills.io spec explicitly allows additional files and directories at
14+
// the skill root.
15+
const ALLOW_DIRS = ["evals"];
16+
1117
const options = parseArgs(process.argv.slice(2));
1218
const skillDirs = resolveSkillDirs(options);
1319

@@ -170,11 +176,16 @@ function changedSkillDirs(baseRef) {
170176
}
171177

172178
function validateSkill(skillDir) {
173-
const command = spawnSync(
174-
"skill-validator",
175-
["check", "-o", "json", skillDir],
176-
{ cwd: repoRoot, encoding: "utf8" },
177-
);
179+
const args = ["check", "-o", "json"];
180+
if (ALLOW_DIRS.length > 0) {
181+
args.push(`--allow-dirs=${ALLOW_DIRS.join(",")}`);
182+
}
183+
args.push(skillDir);
184+
185+
const command = spawnSync("skill-validator", args, {
186+
cwd: repoRoot,
187+
encoding: "utf8",
188+
});
178189

179190
if (command.error) {
180191
fail(`Failed to run skill-validator: ${command.error.message}`);

skills/redis-core/SKILL.md

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
---
2+
name: redis-core
3+
description: Core Redis modeling guidance — choose the right data structure (String, Hash, List, Set, Sorted Set, JSON, Stream, Vector Set) and use consistent colon-separated key names. Use when designing a Redis data model, caching objects, deciding between Hash and JSON, building counters, leaderboards, membership sets, or session stores, or when reviewing/cleaning up Redis key naming.
4+
license: MIT
5+
metadata:
6+
author: Redis, Inc.
7+
version: "0.1.0"
8+
---
9+
10+
# Redis Core
11+
12+
Foundational guidance for modeling data in Redis. Covers data-type selection and key-name conventions — the two decisions that most directly drive memory, performance, and maintainability.
13+
14+
## When to apply
15+
16+
- Caching objects, sessions, or per-user state.
17+
- Counters, leaderboards, recent-items lists, unique-membership sets.
18+
- Reviewing or refactoring Redis key names.
19+
- Deciding between a Redis Hash and a JSON document for an entity.
20+
21+
## 1. Choose the right data structure
22+
23+
Pick the type that matches the *access pattern*, not just the shape of the data.
24+
25+
| Use case | Recommended type | Why |
26+
|---|---|---|
27+
| Simple values, counters | String | Atomic `INCR`/`DECR`, `SET`/`GET` |
28+
| Object with independently updated fields | Hash | Per-field reads/writes, no whole-object rewrite |
29+
| Queue, recent-N items | List | O(1) push/pop at ends |
30+
| Unique items, membership checks | Set | O(1) `SADD`/`SISMEMBER`/`SCARD` |
31+
| Rankings, score-based ranges | Sorted Set | Score-ordered; `ZADD`/`ZRANGE`/`ZRANK` |
32+
| Nested / hierarchical data | JSON | Path-level updates, nested arrays, RQE indexing |
33+
| Event log, fan-out messaging | Stream | Persistent, consumer groups |
34+
| Vector similarity | Vector Set | Native vector storage with HNSW |
35+
36+
**Common anti-pattern:** stuffing a flat object into a serialized string. Updating one field means fetch + parse + mutate + rewrite. Use a Hash instead.
37+
38+
See [references/choose-data-structure.md](references/choose-data-structure.md) for full rationale and Python/Java examples.
39+
40+
## 2. Use consistent key names
41+
42+
Use `colon-separated` segments with a stable hierarchy:
43+
44+
```
45+
{entity}:{id}:{attribute}
46+
user:1001:profile
47+
user:1001:settings
48+
order:2024:items
49+
session:abc123
50+
article:987:likes
51+
game:space-invaders:leaderboard
52+
```
53+
54+
Rules of thumb:
55+
56+
- **Lowercase, colon-separated.** No spaces, no mixed casing (`User_1001_Profile` is bad).
57+
- **Keep keys short but readable** — keys live in memory and appear in every command.
58+
- **Don't use full URLs or long strings as keys.** Extract a short identifier, or use a hash digest of the URL.
59+
- **Prefix for multi-tenancy** (`tenant:42:user:7:cart`) so scans and ACLs can target a tenant cleanly.
60+
- **Be consistent.** Pick one convention per service and apply it across all keys.
61+
62+
See [references/key-naming.md](references/key-naming.md) for cleanup examples and edge cases.
63+
64+
## References
65+
66+
- [Redis: Choosing the right data type](https://redis.io/docs/latest/develop/data-types/compare-data-types/)
67+
- [Redis: Keys](https://redis.io/docs/latest/develop/use/keyspace/)

skills/redis-development/evals/data-structures-key-naming/baselines/README.md renamed to skills/redis-core/evals/core/baselines/README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
# data-structures-key-naming Baseline
1+
# core Baseline
22

3-
Updated: 2026-05-20T07:13:25.176Z
3+
Updated: 2026-05-22T10:36:23.051Z
44

5-
Skill: `redis-development`
5+
Skill: `redis-core`
66

7-
Suite: `data-structures-key-naming`
7+
Suite: `core`
88

99
Source iteration: `iteration-1`
1010

11-
Source output: `eval-workspaces/redis-development/data-structures-key-naming/iteration-1`
11+
Source output: `eval-workspaces/redis-core/core/iteration-1`
1212

1313
This is a curated aggregate benchmark snapshot. Use it as the shared reference
1414
when comparing future skill changes against the current accepted behavior.
@@ -24,5 +24,5 @@ when comparing future skill changes against the current accepted behavior.
2424
## Update Command
2525

2626
```bash
27-
npm run eval:baseline -- --skill redis-development --suite data-structures-key-naming --iteration iteration-1
27+
npm run eval:baseline -- --skill redis-core --suite core --iteration iteration-1
2828
```

0 commit comments

Comments
 (0)