redis
diff --git a/‎AGENTS.md‎
Lines changed: 6 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packages/redis-development-build/src/eval/aggregate.ts‎
Lines changed: 14 additions & 23 deletions b/‎packages/redis-development-build/src/eval/aggregate.ts‎
Lines changed: 14 additions & 23 deletions
diff --git a/‎packages/redis-development-build/src/eval/html-template.ts‎
Lines changed: 0 additions & 4 deletions b/‎packages/redis-development-build/src/eval/html-template.ts‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎scripts/validate-skill-structure.mjs‎
Lines changed: 16 additions & 5 deletions b/‎scripts/validate-skill-structure.mjs‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎skills/redis-core/SKILL.md‎
Lines changed: 67 additions & 0 deletions b/‎skills/redis-core/SKILL.md‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎…tructures-key-naming/baselines/README.md‎ ‎…edis-core/evals/core/baselines/README.md‎skills/redis-development/evals/data-structures-key-naming/baselines/README.md renamed to skills/redis-core/evals/core/baselines/README.md
Lines changed: 6 additions & 6 deletions b/‎…tructures-key-naming/baselines/README.md‎ ‎…edis-core/evals/core/baselines/README.md‎skills/redis-development/evals/data-structures-key-naming/baselines/README.md renamed to skills/redis-core/evals/core/baselines/README.md
Lines changed: 6 additions & 6 deletions
@@ -6,6 +6,12 @@ This file provides guidance to AI coding agents (Claude Code, Cursor, Copilot, e
 
 A collection of skills for AI coding agents working with Redis. Skills are packaged instructions and resources that extend agent capabilities.
 
+## Skill Format
+
+New skills follow the [agentskills.io specification](https://agentskills.io/specification): a `SKILL.md` with required `name` and `description` frontmatter, plus optional `references/`, `scripts/`, and `assets/`. See [skills/redis-core/](skills/redis-core/) for the reference layout.
+
+`skills/redis-development/` is the legacy compiled layout (`rules/` + generated `AGENTS.md`) and is being migrated category-by-category to the spec layout. The instructions below for `rules/`, the build system, and config wiring apply only to that legacy skill until the migration is complete.
+
 ## Creating a New Skill
 
 ### Directory Structure
 
@@ -94,7 +94,6 @@ interface BaselineOverallReport {
   mean_delta_pass_rate?: number;
   mean_delta_tokens?: number;
   mean_delta_time_seconds?: number;
-  total_cost_usd?: number;
   mean_delta_cost_usd?: number;
 }
 
@@ -106,7 +105,6 @@ interface BaselineModelReport {
     time_seconds?: number;
   };
   cost?: {
-    total_usd?: number;
     delta_usd?: number;
   };
   verdict?: string;
@@ -609,7 +607,8 @@ async function summarizeModelCost(
     }
   }
 
-  const meanWithSkillUsd = withSkillRuns === 0 ? 0 : withSkillUsd / withSkillRuns;
+  const meanWithSkillUsd =
+    withSkillRuns === 0 ? 0 : withSkillUsd / withSkillRuns;
   const meanWithoutSkillUsd =
     withoutSkillRuns === 0 ? 0 : withoutSkillUsd / withoutSkillRuns;
 
@@ -714,9 +713,7 @@ function normalizedEvalName(
   return trimmed;
 }
 
-function summarizeOverall(
-  modelSummaries: ModelSummary[],
-) {
+function summarizeOverall(modelSummaries: ModelSummary[]) {
   return {
     models: modelSummaries.length,
     total_cost_usd: roundUsd(
@@ -729,7 +726,10 @@ function summarizeOverall(
       ),
     ),
     grading_cost_usd: roundUsd(
-      modelSummaries.reduce((sum, summary) => sum + summary.cost.grading_usd, 0),
+      modelSummaries.reduce(
+        (sum, summary) => sum + summary.cost.grading_usd,
+        0,
+      ),
     ),
     mean_delta_pass_rate: mean(
       modelSummaries.map((summary) => summary.delta.pass_rate),
@@ -746,8 +746,8 @@ function summarizeOverall(
     models_improved: modelSummaries.filter(
       (summary) => summary.verdict === "improves",
     ).length,
-    models_neutral: modelSummaries.filter(
-      (summary) => isNeutralVerdict(summary.verdict),
+    models_neutral: modelSummaries.filter((summary) =>
+      isNeutralVerdict(summary.verdict),
     ).length,
     models_degraded: modelSummaries.filter(
       (summary) => summary.verdict === "degrades",
@@ -762,7 +762,6 @@ function currentOverallSnapshot(
     mean_pass_delta: overall.mean_delta_pass_rate,
     mean_token_delta: overall.mean_delta_tokens,
     mean_time_delta_seconds: overall.mean_delta_time_seconds,
-    total_cost_usd: overall.total_cost_usd,
     mean_cost_delta_usd: overall.mean_delta_cost_usd,
   };
 }
@@ -774,7 +773,6 @@ function baselineOverallSnapshot(
     mean_pass_delta: numberOrZero(overall?.mean_delta_pass_rate),
     mean_token_delta: numberOrZero(overall?.mean_delta_tokens),
     mean_time_delta_seconds: numberOrZero(overall?.mean_delta_time_seconds),
-    total_cost_usd: numberOrZero(overall?.total_cost_usd),
     mean_cost_delta_usd: numberOrZero(overall?.mean_delta_cost_usd),
   };
 }
@@ -788,7 +786,6 @@ function subtractOverallSnapshot(
     mean_token_delta: current.mean_token_delta - baseline.mean_token_delta,
     mean_time_delta_seconds:
       current.mean_time_delta_seconds - baseline.mean_time_delta_seconds,
-    total_cost_usd: current.total_cost_usd - baseline.total_cost_usd,
     mean_cost_delta_usd:
       current.mean_cost_delta_usd - baseline.mean_cost_delta_usd,
   };
@@ -799,7 +796,6 @@ function currentModelSnapshot(summary: ModelSummary): BaselineModelSnapshot {
     pass_delta: summary.delta.pass_rate,
     token_delta: summary.delta.tokens,
     time_delta_seconds: summary.delta.time_seconds,
-    total_cost_usd: summary.cost.total_usd,
     cost_delta_usd: summary.cost.delta_usd,
   };
 }
@@ -811,7 +807,6 @@ function baselineModelSnapshot(
     pass_delta: numberOrZero(model.delta?.pass_rate),
     token_delta: numberOrZero(model.delta?.tokens),
     time_delta_seconds: numberOrZero(model.delta?.time_seconds),
-    total_cost_usd: numberOrZero(model.cost?.total_usd),
     cost_delta_usd: numberOrZero(model.cost?.delta_usd),
   };
 }
@@ -823,8 +818,8 @@ function subtractModelSnapshot(
   return {
     pass_delta: current.pass_delta - baseline.pass_delta,
     token_delta: current.token_delta - baseline.token_delta,
-    time_delta_seconds: current.time_delta_seconds - baseline.time_delta_seconds,
-    total_cost_usd: current.total_cost_usd - baseline.total_cost_usd,
+    time_delta_seconds:
+      current.time_delta_seconds - baseline.time_delta_seconds,
     cost_delta_usd: current.cost_delta_usd - baseline.cost_delta_usd,
   };
 }
@@ -843,7 +838,9 @@ function summarizeRuns(runs: BenchmarkRun[]): {
   };
 }
 
-function summarizeRunSummaries(summaries: Array<ReturnType<typeof summarizeRuns>>): {
+function summarizeRunSummaries(
+  summaries: Array<ReturnType<typeof summarizeRuns>>,
+): {
   count: number;
   pass_rate: number;
   time_seconds: number;
@@ -1034,12 +1031,6 @@ function renderBaselineMarkdown(comparison: BaselineComparison): string {
       `${signedNumber(comparison.overall.current.mean_time_delta_seconds, 1)}s`,
       `${signedNumber(comparison.overall.change.mean_time_delta_seconds, 1)}s`,
     ],
-    [
-      "Total eval cost",
-      formatUsd(comparison.overall.baseline.total_cost_usd),
-      formatUsd(comparison.overall.current.total_cost_usd),
-      signedUsd(comparison.overall.change.total_cost_usd),
-    ],
     [
       "Mean cost delta",
       signedUsd(comparison.overall.baseline.mean_cost_delta_usd),
 
@@ -93,15 +93,13 @@ export interface BaselineOverallSnapshot {
   mean_pass_delta: number;
   mean_token_delta: number;
   mean_time_delta_seconds: number;
-  total_cost_usd: number;
   mean_cost_delta_usd: number;
 }
 
 export interface BaselineModelSnapshot {
   pass_delta: number;
   token_delta: number;
   time_delta_seconds: number;
-  total_cost_usd: number;
   cost_delta_usd: number;
 }
 
@@ -772,7 +770,6 @@ function renderBaselineSection(comparison: BaselineComparison): string {
       ${summaryMetric("Pass Delta Change", "Current mean pass delta minus baseline mean pass delta. Positive means this run improved the skill uplift versus the baseline.", signedPercent(comparison.overall.change.mean_pass_delta), deltaClass(comparison.overall.change.mean_pass_delta, "pass_rate"))}
       ${summaryMetric("Token Delta Change", "Current mean token delta minus baseline mean token delta. Negative means this run reduced token overhead versus the baseline.", signedNumber(comparison.overall.change.mean_token_delta, 0), deltaClass(comparison.overall.change.mean_token_delta, "tokens"))}
       ${summaryMetric("Time Delta Change", "Current mean time delta minus baseline mean time delta. Negative means this run reduced runtime overhead versus the baseline.", `${signedNumber(comparison.overall.change.mean_time_delta_seconds, 1)}s`, deltaClass(comparison.overall.change.mean_time_delta_seconds, "time_seconds"))}
-      ${summaryMetric("Total Cost Change", "Current total eval cost minus baseline total eval cost. Negative means this run was cheaper than the baseline.", signedUsd(comparison.overall.change.total_cost_usd), deltaClass(comparison.overall.change.total_cost_usd, "cost_usd"))}
       ${summaryMetric("Cost Delta Change", "Current mean cost delta minus baseline mean cost delta. Negative means the with-skill cost overhead improved versus the baseline.", signedUsd(comparison.overall.change.mean_cost_delta_usd), deltaClass(comparison.overall.change.mean_cost_delta_usd, "cost_usd"))}
     </section>
     <div class="panel">
@@ -821,7 +818,6 @@ function renderLegend(input: AggregateHtmlInput): string {
     addTone(baseline.overall.change.mean_pass_delta, "pass_rate");
     addTone(baseline.overall.change.mean_token_delta, "tokens");
     addTone(baseline.overall.change.mean_time_delta_seconds, "time_seconds");
-    addTone(baseline.overall.change.total_cost_usd, "cost_usd");
     addTone(baseline.overall.change.mean_cost_delta_usd, "cost_usd");
     for (const model of baseline.models) {
       if (!model.change) continue;
 
@@ -8,6 +8,12 @@ import process from "node:process";
 const repoRoot = process.cwd();
 const skillsRoot = path.join(repoRoot, "skills");
 
+// Subdirectories that exist for repo tooling (not the agent runtime) and that
+// skill-validator should accept without "unknown directory" warnings. The
+// agentskills.io spec explicitly allows additional files and directories at
+// the skill root.
+const ALLOW_DIRS = ["evals"];
+
 const options = parseArgs(process.argv.slice(2));
 const skillDirs = resolveSkillDirs(options);
 
@@ -170,11 +176,16 @@ function changedSkillDirs(baseRef) {
 }
 
 function validateSkill(skillDir) {
-  const command = spawnSync(
-    "skill-validator",
-    ["check", "-o", "json", skillDir],
-    { cwd: repoRoot, encoding: "utf8" },
-  );
+  const args = ["check", "-o", "json"];
+  if (ALLOW_DIRS.length > 0) {
+    args.push(`--allow-dirs=${ALLOW_DIRS.join(",")}`);
+  }
+  args.push(skillDir);
+
+  const command = spawnSync("skill-validator", args, {
+    cwd: repoRoot,
+    encoding: "utf8",
+  });
 
   if (command.error) {
     fail(`Failed to run skill-validator: ${command.error.message}`);
 
@@ -0,0 +1,67 @@
+---
+name: redis-core
+description: Core Redis modeling guidance — choose the right data structure (String, Hash, List, Set, Sorted Set, JSON, Stream, Vector Set) and use consistent colon-separated key names. Use when designing a Redis data model, caching objects, deciding between Hash and JSON, building counters, leaderboards, membership sets, or session stores, or when reviewing/cleaning up Redis key naming.
+license: MIT
+metadata:
+  author: Redis, Inc.
+  version: "0.1.0"
+---
+
+# Redis Core
+
+Foundational guidance for modeling data in Redis. Covers data-type selection and key-name conventions — the two decisions that most directly drive memory, performance, and maintainability.
+
+## When to apply
+
+- Caching objects, sessions, or per-user state.
+- Counters, leaderboards, recent-items lists, unique-membership sets.
+- Reviewing or refactoring Redis key names.
+- Deciding between a Redis Hash and a JSON document for an entity.
+
+## 1. Choose the right data structure
+
+Pick the type that matches the *access pattern*, not just the shape of the data.
+
+| Use case | Recommended type | Why |
+|---|---|---|
+| Simple values, counters | String | Atomic `INCR`/`DECR`, `SET`/`GET` |
+| Object with independently updated fields | Hash | Per-field reads/writes, no whole-object rewrite |
+| Queue, recent-N items | List | O(1) push/pop at ends |
+| Unique items, membership checks | Set | O(1) `SADD`/`SISMEMBER`/`SCARD` |
+| Rankings, score-based ranges | Sorted Set | Score-ordered; `ZADD`/`ZRANGE`/`ZRANK` |
+| Nested / hierarchical data | JSON | Path-level updates, nested arrays, RQE indexing |
+| Event log, fan-out messaging | Stream | Persistent, consumer groups |
+| Vector similarity | Vector Set | Native vector storage with HNSW |
+
+**Common anti-pattern:** stuffing a flat object into a serialized string. Updating one field means fetch + parse + mutate + rewrite. Use a Hash instead.
+
+See [references/choose-data-structure.md](references/choose-data-structure.md) for full rationale and Python/Java examples.
+
+## 2. Use consistent key names
+
+Use `colon-separated` segments with a stable hierarchy:
+
+```
+{entity}:{id}:{attribute}
+user:1001:profile
+user:1001:settings
+order:2024:items
+session:abc123
+article:987:likes
+game:space-invaders:leaderboard
+```
+
+Rules of thumb:
+
+- **Lowercase, colon-separated.** No spaces, no mixed casing (`User_1001_Profile` is bad).
+- **Keep keys short but readable** — keys live in memory and appear in every command.
+- **Don't use full URLs or long strings as keys.** Extract a short identifier, or use a hash digest of the URL.
+- **Prefix for multi-tenancy** (`tenant:42:user:7:cart`) so scans and ACLs can target a tenant cleanly.
+- **Be consistent.** Pick one convention per service and apply it across all keys.
+
+See [references/key-naming.md](references/key-naming.md) for cleanup examples and edge cases.
+
+## References
+
+- [Redis: Choosing the right data type](https://redis.io/docs/latest/develop/data-types/compare-data-types/)
+- [Redis: Keys](https://redis.io/docs/latest/develop/use/keyspace/)
@@ -1,14 +1,14 @@
-# data-structures-key-naming Baseline
+# core Baseline
 
-Updated: 2026-05-20T07:13:25.176Z
+Updated: 2026-05-22T10:36:23.051Z
 
-Skill: `redis-development`
+Skill: `redis-core`
 
-Suite: `data-structures-key-naming`
+Suite: `core`
 
 Source iteration: `iteration-1`
 
-Source output: `eval-workspaces/redis-development/data-structures-key-naming/iteration-1`
+Source output: `eval-workspaces/redis-core/core/iteration-1`
 
 This is a curated aggregate benchmark snapshot. Use it as the shared reference
 when comparing future skill changes against the current accepted behavior.
@@ -24,5 +24,5 @@ when comparing future skill changes against the current accepted behavior.
 ## Update Command
 
 ```bash
-npm run eval:baseline -- --skill redis-development --suite data-structures-key-naming --iteration iteration-1
+npm run eval:baseline -- --skill redis-core --suite core --iteration iteration-1
 ```