From 8d981607862139ab1eb7a4b31123615292588126 Mon Sep 17 00:00:00 2001
From: Louise Lau <QodeXcli@users.noreply.github.com>
Date: Fri, 26 Jun 2026 09:16:43 +0800
Subject: [PATCH] =?UTF-8?q?feat(skills):=20strengthen=20UCB1=20=E2=80=94?=
 =?UTF-8?q?=20composite=20reward,=20tunable=20c,=20trial=20floor,=20scores?=
 =?UTF-8?q?,=20off-switch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All five requested upgrades to the Phase-4 bandit, in the pure tested core:

1. Tunable exploration factor — RouteOptions.explorationFactor (config
   learning.versioning.ucbExplorationFactor; default √2). Higher c keeps a
   barely-tested challenger in play longer.
2. Composite reward — UCB1's per-arm value is no longer raw success rate but a
   weighted blend of success + token-efficiency + time-efficiency (default
   0.7/0.15/0.15, configurable). Success dominates (a cheap failure never beats an
   expensive success), but between two equally-successful versions the cheaper +
   faster one wins. Added totalDurationMs to VersionStats (back-compat optional);
   recordVersionExecution now takes durationMs; decideChampion converges on the
   composite reward too.
3. Minimum challenger trials — RouteOptions.minChallengerTrials (default 5): a
   challenger is force-routed until it clears the floor, so UCB1 never starves it
   (or a decision is never made) on too little signal.
4. UCB score history/analysis — ucbScores(manifest) returns reward + exploration
   bonus + ucb per arm; `qodex skill versions <name>` now prints the breakdown
   (and avg ms/run) for debugging.
5. Off-switch — routingStrategy 'champion-only' (config strategy) disables UCB and
   always routes the stable version — for sensitive skills you don't want
   experimented on.

config: learning.versioning { ucbExplorationFactor, minChallengerTrials,
rewardWeights, strategy }, threaded through versioned-store + the CLI.

Live-verified: between two 100%-success versions, the cheaper/faster one won on
composite reward and was promoted to champion. Tests: +6 (champion-only, trial
floor, exploration-factor explore/exploit, composite reward incl. success-
dominates, ucbScores snapshot) + the existing 12. typecheck + full suite (1222) +
build green.
---
 src/cli/skill-command.ts                |  17 +++-
 src/config/defaults.ts                  |  14 +++
 src/skills/learning/skill-versioning.ts | 119 ++++++++++++++++++------
 src/skills/learning/versioned-store.ts  |  21 +++--
 test/skill-versioning-ucb.test.ts       |  76 +++++++++++++++
 test/skill-versioning.test.ts           |   6 +-
 6 files changed, 214 insertions(+), 39 deletions(-)
 create mode 100644 test/skill-versioning-ucb.test.ts
diff --git a/src/cli/skill-command.ts b/src/cli/skill-command.ts
index 486ea27..fce88f5 100644
--- a/src/cli/skill-command.ts
+++ b/src/cli/skill-command.ts
@@ -196,18 +196,29 @@ export function buildSkillCommand(): Command {
     .action(async (name: string) => {
       const { loadSkillByName } = await import('../skills/loader.js');
       const { readManifest } = await import('../skills/learning/versioned-store.js');
-      const { routeSkillVersion } = await import('../skills/learning/skill-versioning.js');
+      const { routeSkillVersion, ucbScores } = await import('../skills/learning/skill-versioning.js');
+      const { loadConfig } = await import('../config/loader.js');
       const spec = await loadSkillByName(name, process.cwd());
       if (!spec) { console.error(`✗ no skill named "${name}"`); process.exit(1); }
       const m = await readManifest(spec.dir);
       if (!m) { console.log(`"${name}" is a single-version (legacy) skill — no version history yet.`); return; }
-      const routed = routeSkillVersion(m);
+      const vcfg = (await loadConfig(process.cwd()) as any).learning?.versioning ?? {};
+      const opts = {
+        explorationFactor: vcfg.ucbExplorationFactor,
+        minChallengerTrials: vcfg.minChallengerTrials,
+        weights: vcfg.rewardWeights ? { success: vcfg.rewardWeights.success ?? 0.7, token: vcfg.rewardWeights.token ?? 0.15, time: vcfg.rewardWeights.time ?? 0.15 } : undefined,
+      };
+      const routed = routeSkillVersion(m, opts);
+      const scores = new Map(ucbScores(m, opts).map(s => [s.version, s]));
       console.log(`Skill "${m.skillId}"  ·  strategy: ${m.routingStrategy}  ·  routed this turn → ${routed}\n`);
       for (const v of Object.values(m.versions).sort((a, b) => a.version.localeCompare(b.version, undefined, { numeric: true }))) {
         const tag = v.version === m.activeVersion ? '★ champion' : v.version === m.challengerVersion ? '⚡ challenger' : v.retired ? '✗ retired' : '';
         const rate = v.stats.executions ? `${Math.round((v.stats.successes / v.stats.executions) * 100)}% over ${v.stats.executions}` : 'untested';
+        const avgMs = v.stats.executions && v.stats.totalDurationMs ? `  ·  ${Math.round(v.stats.totalDurationMs / v.stats.executions)}ms/run` : '';
         console.log(`  ${v.version}  [${v.author}]  ${tag}`);
-        console.log(`      success: ${rate}  ·  tokens: ${v.stats.totalTokensUsed}  ·  confidence: ${v.confidence}`);
+        console.log(`      success: ${rate}  ·  tokens: ${v.stats.totalTokensUsed}${avgMs}  ·  confidence: ${v.confidence}`);
+        const s = scores.get(v.version);
+        if (s) console.log(`      UCB: reward ${s.reward.toFixed(3)} + bonus ${s.bonus === Infinity ? '∞' : s.bonus.toFixed(3)} = ${s.ucb === Infinity ? '∞' : s.ucb.toFixed(3)}`);
       }
     });
 
diff --git a/src/config/defaults.ts b/src/config/defaults.ts
index 15ef6a0..a713504 100644
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -439,6 +439,20 @@ export interface QodexConfig {
      *  judge is unsure (grey-zone average or high cross-dimension variance). Must differ from
      *  defaults.model and judgeModel. Unset ⇒ no escalation (Tier-1 verdict stands). */
     judgeModelTier2?: string;
+    /** Skill versioning + UCB1 adaptive-bandit routing knobs. */
+    versioning?: {
+      /** UCB1 exploration factor `c` — higher explores challengers more. Default √2 (~1.41). */
+      ucbExplorationFactor?: number;
+      /** Force-route a challenger at least this many times before UCB1 can starve it, so a
+       *  decision is never made on too little signal. Default 5. */
+      minChallengerTrials?: number;
+      /** Composite-reward weights (success + token-efficiency + time-efficiency). Defaults
+       *  { success: 0.7, token: 0.15, time: 0.15 }. */
+      rewardWeights?: { success?: number; token?: number; time?: number };
+      /** Routing strategy when a manifest doesn't pin one: 'ucb1' (default), 'static', or
+       *  'champion-only' (UCB OFF — always the stable version, for sensitive skills). */
+      strategy?: 'ucb1' | 'static' | 'champion-only';
+    };
     /** When auto-promoting, require at least this confidence (0–100). Default 0 (the
      *  judge's pass is sufficient); raise it to gate low-confidence captures. */
     autoPromoteMinConfidence?: number;
diff --git a/src/skills/learning/skill-versioning.ts b/src/skills/learning/skill-versioning.ts
index 1b76175..1840de1 100644
--- a/src/skills/learning/skill-versioning.ts
+++ b/src/skills/learning/skill-versioning.ts
@@ -22,6 +22,29 @@ export interface VersionStats {
   executions: number;
   successes: number;
   totalTokensUsed: number;
+  /** Total wall-clock across executions (ms) — fuels the time term of the composite reward.
+   *  Optional for backward-compat with manifests written before reward weighting. */
+  totalDurationMs?: number;
+}
+
+export interface RewardWeights {
+  /** Weight on the success rate (dominant). */
+  success: number;
+  /** Weight on token efficiency (cheaper = better). */
+  token: number;
+  /** Weight on time efficiency (faster = better). */
+  time: number;
+}
+export const DEFAULT_WEIGHTS: RewardWeights = { success: 0.7, token: 0.15, time: 0.15 };
+
+export interface RouteOptions {
+  /** UCB1 exploration factor `c` (higher = more exploration). Default √2. */
+  explorationFactor?: number;
+  /** Force-route a challenger until it has at least this many trials, BEFORE UCB1 can
+   *  starve it — so a decision is never made on too little signal. Default 5. */
+  minChallengerTrials?: number;
+  /** Composite-reward weights. */
+  weights?: RewardWeights;
 }
 
 export interface VersionDetail {
@@ -40,13 +63,15 @@ export interface SkillManifest {
   skillId: string;
   activeVersion: string;          // the stable champion
   challengerVersion?: string;     // the version under test
-  routingStrategy: 'static' | 'ucb1';
+  /** 'ucb1' (adaptive bandit), 'static' (~25% challenger), or 'champion-only' (UCB OFF —
+   *  always the stable version; for sensitive skills you don't want experimented on). */
+  routingStrategy: 'static' | 'ucb1' | 'champion-only';
   versions: Record<string, VersionDetail>;
 }
 
 /** A fresh manifest for a brand-new skill (its first version is v1, the champion). */
 export function initManifest(skillId: string, author: 'human' | 'machine', confidence = 50, nowIso = ''): { manifest: SkillManifest; fileName: string } {
-  const v: VersionDetail = { version: 'v1', createdAt: nowIso, author, confidence, stats: { executions: 0, successes: 0, totalTokensUsed: 0 } };
+  const v: VersionDetail = { version: 'v1', createdAt: nowIso, author, confidence, stats: { executions: 0, successes: 0, totalTokensUsed: 0, totalDurationMs: 0 } };
   return {
     manifest: { skillId, activeVersion: 'v1', routingStrategy: 'ucb1', versions: { v1: v } },
     fileName: 'SKILL.v1.md',
@@ -92,31 +117,72 @@ export function createNextVersion(
   };
 }
 
-/** UCB1 score for a version given the total trials N. */
-function ucb1(v: VersionDetail, N: number, c: number): number {
-  if (v.stats.executions === 0) return Infinity; // always try an unsampled arm first
-  const mean = v.stats.successes / v.stats.executions;
-  return mean + c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
+const clamp01 = (x: number) => Math.max(0, Math.min(1, x));
+const perExec = (total: number, exec: number) => (exec ? total / exec : 0);
+
+interface RewardNorm { maxTokensPerExec: number; maxMsPerExec: number }
+function rewardNorm(arms: VersionDetail[]): RewardNorm {
+  let maxTokensPerExec = 0, maxMsPerExec = 0;
+  for (const v of arms) {
+    maxTokensPerExec = Math.max(maxTokensPerExec, perExec(v.stats.totalTokensUsed, v.stats.executions));
+    maxMsPerExec = Math.max(maxMsPerExec, perExec(v.stats.totalDurationMs ?? 0, v.stats.executions));
+  }
+  return { maxTokensPerExec, maxMsPerExec };
+}
+
+/**
+ * COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY
+ * nudges (cheaper / faster relative to the other arm scores higher). Efficiency terms are
+ * neutral (0.5) when there's no scale to normalize against. PURE.
+ */
+export function compositeReward(v: VersionDetail, norm: RewardNorm, weights: RewardWeights = DEFAULT_WEIGHTS): number {
+  if (v.stats.executions === 0) return 0;
+  const successRate = v.stats.successes / v.stats.executions;
+  const tokScore = norm.maxTokensPerExec > 0 ? 1 - perExec(v.stats.totalTokensUsed, v.stats.executions) / norm.maxTokensPerExec : 0.5;
+  const timeScore = norm.maxMsPerExec > 0 ? 1 - perExec(v.stats.totalDurationMs ?? 0, v.stats.executions) / norm.maxMsPerExec : 0.5;
+  const w = weights, denom = w.success + w.token + w.time || 1;
+  return (w.success * successRate + w.token * clamp01(tokScore) + w.time * clamp01(timeScore)) / denom;
+}
+
+export interface UcbScore { version: string; reward: number; bonus: number; ucb: number; executions: number }
+
+/** Per-arm UCB breakdown (reward + exploration bonus) for the active+challenger — a pure
+ *  snapshot for debugging / `qodex skill versions` / analysis. */
+export function ucbScores(manifest: SkillManifest, opts: RouteOptions = {}): UcbScore[] {
+  const c = opts.explorationFactor ?? Math.sqrt(2);
+  const arms = [manifest.activeVersion, manifest.challengerVersion]
+    .filter((x): x is string => !!x).map(v => manifest.versions[v]).filter((v): v is VersionDetail => !!v);
+  const norm = rewardNorm(arms);
+  const N = arms.reduce((s, v) => s + v.stats.executions, 0);
+  return arms.map(v => {
+    const reward = compositeReward(v, norm, opts.weights);
+    const bonus = v.stats.executions === 0 ? Infinity : c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
+    return { version: v.version, reward, bonus, ucb: reward + bonus, executions: v.stats.executions };
+  });
 }
 
 /**
- * Choose which version to inject this turn. No challenger → the champion. UCB1 balances
- * exploring the challenger against exploiting the better arm; an unsampled version is
- * tried first (Infinity), and a challenger whose success rate collapses loses traffic.
+ * Choose which version to inject this turn:
+ *   - no challenger / champion-only strategy → the champion (UCB OFF; sensitive skills),
+ *   - static → ~25% challenger,
+ *   - ucb1 → force-explore the challenger until it clears `minChallengerTrials` (so we never
+ *     judge on too little signal), then pick the higher UCB (composite reward + bonus);
+ *     ties go to the champion (don't disturb the stable version).
  */
-export function routeSkillVersion(manifest: SkillManifest, c: number = Math.sqrt(2)): string {
+export function routeSkillVersion(manifest: SkillManifest, opts: RouteOptions = {}): string {
   const champ = manifest.activeVersion;
   const chal = manifest.challengerVersion;
   if (!chal || !manifest.versions[chal] || chal === champ) return champ;
+  if (manifest.routingStrategy === 'champion-only') return champ;
+  if (manifest.routingStrategy === 'static') return deterministicStatic(manifest) ? chal : champ;
 
-  if (manifest.routingStrategy === 'static') {
-    return deterministicStatic(manifest) ? chal : champ;
-  }
-  const v1 = manifest.versions[champ]!;
-  const v2 = manifest.versions[chal]!;
-  const N = v1.stats.executions + v2.stats.executions;
-  // Tie → champion (conservative: don't disturb the stable version on a tie).
-  return ucb1(v2, N, c) > ucb1(v1, N, c) ? chal : champ;
+  const minTrials = opts.minChallengerTrials ?? 5;
+  if (manifest.versions[chal]!.stats.executions < minTrials) return chal; // exploration floor
+
+  const scores = ucbScores(manifest, opts);
+  const champU = scores.find(s => s.version === champ)?.ucb ?? -Infinity;
+  const chalU = scores.find(s => s.version === chal)?.ucb ?? -Infinity;
+  return chalU > champU ? chal : champ;
 }
 
 /** Deterministic ~25% challenger pick for the 'static' strategy (no Math.random — keeps the
@@ -131,7 +197,7 @@ function deterministicStatic(manifest: SkillManifest): boolean {
 export function recordVersionExecution(
   manifest: SkillManifest,
   version: string,
-  outcome: { success: boolean; tokens?: number },
+  outcome: { success: boolean; tokens?: number; durationMs?: number },
 ): SkillManifest {
   const v = manifest.versions[version];
   if (!v) return manifest;
@@ -141,6 +207,7 @@ export function recordVersionExecution(
       executions: v.stats.executions + 1,
       successes: v.stats.successes + (outcome.success ? 1 : 0),
       totalTokensUsed: v.stats.totalTokensUsed + (outcome.tokens ?? 0),
+      totalDurationMs: (v.stats.totalDurationMs ?? 0) + (outcome.durationMs ?? 0),
     },
   };
   return { ...manifest, versions: { ...manifest.versions, [version]: updated } };
@@ -152,16 +219,15 @@ export interface ChampionDecision {
   reason: string;
 }
 
-const rate = (v: VersionDetail) => (v.stats.executions ? v.stats.successes / v.stats.executions : 0);
-
 /**
- * Converge the A/B test. Once the challenger has enough samples:
+ * Converge the A/B test on the COMPOSITE reward (success + token + time), once the
+ * challenger has enough samples:
  *   - clearly BETTER than the champion (by `margin`) → PROMOTE it to active.
- *   - clearly WORSE → RETIRE it (drop the challenger, keep the champion).
+ *   - clearly WORSE → RETIRE it (kept in history, marked retired).
  *   - otherwise keep testing. Below `minExecutions` we never decide (too little signal).
  * PURE.
  */
-export function decideChampion(manifest: SkillManifest, opts: { minExecutions?: number; margin?: number } = {}): ChampionDecision {
+export function decideChampion(manifest: SkillManifest, opts: { minExecutions?: number; margin?: number; weights?: RewardWeights } = {}): ChampionDecision {
   const minExec = opts.minExecutions ?? 8;
   const margin = opts.margin ?? 0.1;
   const chal = manifest.challengerVersion;
@@ -171,7 +237,8 @@ export function decideChampion(manifest: SkillManifest, opts: { minExecutions?:
   const chalV = manifest.versions[chal]!;
   if (chalV.stats.executions < minExec) return { manifest, action: 'keep-testing', reason: `challenger has ${chalV.stats.executions}/${minExec} executions` };
 
-  const cr = rate(chalV), pr = rate(champV);
+  const norm = rewardNorm([champV, chalV]);
+  const cr = compositeReward(chalV, norm, opts.weights), pr = compositeReward(champV, norm, opts.weights);
   if (cr >= pr + margin) {
     const m: SkillManifest = { ...manifest, activeVersion: chal, challengerVersion: undefined };
     return { manifest: m, action: 'promote', reason: `challenger ${(cr * 100).toFixed(0)}% beat champion ${(pr * 100).toFixed(0)}% by ≥${margin * 100}%` };
diff --git a/src/skills/learning/versioned-store.ts b/src/skills/learning/versioned-store.ts
index a1b5cbd..d2cde6d 100644
--- a/src/skills/learning/versioned-store.ts
+++ b/src/skills/learning/versioned-store.ts
@@ -12,7 +12,7 @@
 import { promises as fs } from 'fs';
 import * as path from 'path';
 import {
-  type SkillManifest, type ChampionDecision,
+  type SkillManifest, type ChampionDecision, type RouteOptions,
   initManifest, createNextVersion, routeSkillVersion, recordVersionExecution, decideChampion, versionFileName,
 } from './skill-versioning.js';
 
@@ -29,11 +29,12 @@ async function writeManifest(skillDir: string, m: SkillManifest): Promise<void>
   await fs.rename(tmp, manifestPath(skillDir)); // atomic
 }
 
-/** The skill body to inject this turn + which version it is. Falls back to legacy SKILL.md. */
-export async function routedSkillBody(skillDir: string): Promise<{ version: string; body: string } | null> {
+/** The skill body to inject this turn + which version it is. Falls back to legacy SKILL.md.
+ *  `opts` carries the configurable UCB knobs (exploration factor, min trials, weights). */
+export async function routedSkillBody(skillDir: string, opts: RouteOptions = {}): Promise<{ version: string; body: string } | null> {
   const m = await readManifest(skillDir);
   if (m) {
-    const version = routeSkillVersion(m);
+    const version = routeSkillVersion(m, opts);
     try { return { version, body: await fs.readFile(path.join(skillDir, versionFileName(version)), 'utf-8') }; }
     catch { /* fall through to legacy */ }
   }
@@ -64,12 +65,18 @@ export async function addChallenger(skillDir: string, skillId: string, body: str
   return updatedManifest;
 }
 
-/** Record one execution outcome for the routed version, then try to converge the A/B test. */
-export async function recordOutcomeAndConverge(skillDir: string, version: string, outcome: { success: boolean; tokens?: number }): Promise<ChampionDecision | null> {
+/** Record one execution outcome (success + tokens + duration) for the routed version, then
+ *  try to converge the A/B test on the composite reward. */
+export async function recordOutcomeAndConverge(
+  skillDir: string,
+  version: string,
+  outcome: { success: boolean; tokens?: number; durationMs?: number },
+  opts: { minExecutions?: number; margin?: number } = {},
+): Promise<ChampionDecision | null> {
   const m = await readManifest(skillDir);
   if (!m) return null;
   const afterStats = recordVersionExecution(m, version, outcome);
-  const decision = decideChampion(afterStats);
+  const decision = decideChampion(afterStats, opts);
   await writeManifest(skillDir, decision.manifest);
   return decision;
 }
diff --git a/test/skill-versioning-ucb.test.ts b/test/skill-versioning-ucb.test.ts
new file mode 100644
index 0000000..e6f57ce
--- /dev/null
+++ b/test/skill-versioning-ucb.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect } from 'vitest';
+import {
+  initManifest, createNextVersion, recordVersionExecution, routeSkillVersion, decideChampion,
+  compositeReward, ucbScores, type SkillManifest, type VersionDetail,
+} from '../src/skills/learning/skill-versioning.js';
+
+const challenger = () => createNextVersion(initManifest('s', 'machine', 50, '').manifest, 'machine').updatedManifest;
+function feed(m: SkillManifest, v: string, runs: Array<{ success: boolean; tokens?: number; durationMs?: number }>): SkillManifest {
+  for (const r of runs) m = recordVersionExecution(m, v, r);
+  return m;
+}
+const ten = (success: boolean, tokens = 0, durationMs = 0) => Array(10).fill(0).map(() => ({ success, tokens, durationMs }));
+
+describe('#5 champion-only — UCB OFF for sensitive skills', () => {
+  it('always routes the champion regardless of a strong challenger', () => {
+    let m = challenger();
+    m = { ...m, routingStrategy: 'champion-only' };
+    m = feed(m, 'v2', ten(true));   // perfect challenger
+    expect(routeSkillVersion(m)).toBe(m.activeVersion);
+  });
+});
+
+describe('#3 minChallengerTrials — force exploration before serious judgment', () => {
+  it('routes the challenger until it clears the floor, even if it looks bad early', () => {
+    let m = challenger();
+    m = feed(m, 'v1', ten(true));            // champion strong
+    m = feed(m, 'v2', [{ success: false }, { success: false }]); // 2 bad trials
+    expect(routeSkillVersion(m, { minChallengerTrials: 5 })).toBe('v2'); // still forced
+    m = feed(m, 'v2', [{ success: false }, { success: false }, { success: false }]); // now 5
+    expect(routeSkillVersion(m, { minChallengerTrials: 5 })).toBe(m.activeVersion);   // UCB takes over → champion
+  });
+});
+
+describe('#1 ucbExplorationFactor — tunes explore vs exploit', () => {
+  it('a higher c keeps a barely-tested challenger in play longer', () => {
+    let m = challenger();
+    m = feed(m, 'v1', ten(true));                              // champion 100% over 10
+    m = feed(m, 'v2', [...ten(true).slice(0, 5), { success: false }]); // challenger ~83% over 6
+    // tiny c → exploit champion; large c → explore challenger
+    expect(routeSkillVersion(m, { explorationFactor: 0.1, minChallengerTrials: 5 })).toBe(m.activeVersion);
+    expect(routeSkillVersion(m, { explorationFactor: 5, minChallengerTrials: 5 })).toBe('v2');
+  });
+});
+
+describe('#2 composite reward — success + token + time efficiency', () => {
+  it('between two EQUALLY-successful versions, the cheaper + faster one scores higher', () => {
+    let m = challenger();
+    m = feed(m, 'v1', ten(true, 1000, 2000));  // champion: 100% but expensive/slow
+    m = feed(m, 'v2', ten(true, 200, 400));    // challenger: 100% but cheap/fast
+    const champReward = compositeReward(m.versions.v1!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
+    const chalReward = compositeReward(m.versions.v2!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
+    expect(chalReward).toBeGreaterThan(champReward);
+    // and decideChampion promotes the more efficient one
+    expect(decideChampion(m, { minExecutions: 8 }).action).toBe('promote');
+  });
+  it('success still dominates — a cheap FAILURE never beats an expensive success', () => {
+    const norm = { maxTokensPerExec: 1000, maxMsPerExec: 2000 };
+    const good: VersionDetail = { version: 'a', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 9, totalTokensUsed: 10000, totalDurationMs: 20000 } };
+    const cheapFail: VersionDetail = { version: 'b', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 2, totalTokensUsed: 100, totalDurationMs: 100 } };
+    expect(compositeReward(good, norm)).toBeGreaterThan(compositeReward(cheapFail, norm));
+  });
+});
+
+describe('#4 ucbScores — debugging/analysis snapshot', () => {
+  it('exposes reward + bonus + ucb per arm', () => {
+    let m = challenger();
+    m = feed(m, 'v1', ten(true));
+    m = feed(m, 'v2', ten(false));
+    const scores = ucbScores(m);
+    expect(scores.map(s => s.version).sort()).toEqual(['v1', 'v2']);
+    const v1 = scores.find(s => s.version === 'v1')!;
+    expect(v1.reward).toBeGreaterThan(0);
+    expect(v1.ucb).toBeCloseTo(v1.reward + v1.bonus, 5);
+    expect(v1.executions).toBe(10);
+  });
+});
diff --git a/test/skill-versioning.test.ts b/test/skill-versioning.test.ts
index 69d807a..6813568 100644
--- a/test/skill-versioning.test.ts
+++ b/test/skill-versioning.test.ts
@@ -63,9 +63,9 @@ describe('routeSkillVersion — UCB1 explore/exploit', () => {
 describe('recordVersionExecution — pure stat update', () => {
   it('increments executions/successes/tokens', () => {
     let m = initManifest('s', 'machine', 50, '').manifest;
-    m = recordVersionExecution(m, 'v1', { success: true, tokens: 100 });
-    m = recordVersionExecution(m, 'v1', { success: false, tokens: 50 });
-    expect(m.versions.v1!.stats).toEqual({ executions: 2, successes: 1, totalTokensUsed: 150 });
+    m = recordVersionExecution(m, 'v1', { success: true, tokens: 100, durationMs: 1000 });
+    m = recordVersionExecution(m, 'v1', { success: false, tokens: 50, durationMs: 500 });
+    expect(m.versions.v1!.stats).toEqual({ executions: 2, successes: 1, totalTokensUsed: 150, totalDurationMs: 1500 });
   });
   it('unknown version is a no-op', () => {
     const m = initManifest('s', 'machine', 50, '').manifest;