QodeXcli · QodeXcli · Jun 26, 2026 · Jun 26, 2026
diff --git a/README.md b/README.md
@@ -133,6 +133,35 @@ qodex skill snapshots           # rollback points;  qodex skill restore <archive
 
 > Every successful task can also be exported as a **ShareGPT JSONL** corpus (`flywheel.datasetExport: true` → `~/.qodex/dataset/`) — a ready-to-use dataset for a future zero-cost local fine-tune. Strictly local; nothing is uploaded.
 
+### Skill versioning & A/B testing (UCB1)
+
+A skill keeps its whole history in **one flat directory** — `manifest.json` + `SKILL.v1.md`, `SKILL.v2.md`, … — no symlinks, identical on every OS. When a new candidate is captured for an existing skill it becomes a **challenger** to the stable **champion**, and QodeX routes traffic between them with the **UCB1 adaptive bandit** instead of a fixed split: it explores the challenger enough to get signal, then favours whichever has the higher score — and a challenger that turns out worse has its traffic driven to **zero automatically**.
+
+The score is a **composite reward**, not just win-rate: *success* dominates, but *token-* and *time-efficiency* (normalized **relative to the champion**) break ties — so between two equally-correct versions, the **cheaper, faster** one wins.
+
+```yaml
+learning:
+  versioning:
+    strategy: ucb1                 # or 'champion-only' to freeze a sensitive skill (UCB off)
+    ucbExplorationFactor: 1.41     # √2 — higher explores challengers more
+    minChallengerTrials: 5         # force a challenger ≥5 runs before judging it
+    rewardWeights: { success: 0.7, token: 0.15, time: 0.15 }
+```
+
+```text
+$ qodex skill versions git-commit-expert
+Skill "git-commit-expert"  ·  strategy: ucb1  ·  routed this turn → v2
+
+  v1  [human]   ★ champion
+      success: 88% over 40  ·  tokens: 60000  ·  1900ms/run  ·  confidence: 75
+      UCB: reward 0.838 + bonus 0.214 = 1.052
+  v2  [machine] ⚡ challenger
+      success: 92% over 12  ·  tokens: 41000  ·  1300ms/run  ·  confidence: 60
+      UCB: reward 0.921 + bonus 0.391 = 1.312     ← higher → gets this turn
+
+$ qodex skill rollback git-commit-expert v1     # snap the champion back to v1 anytime
+```
+
 ## Install
 
 **Prerequisites:** **Node 20+** (Node 22 LTS recommended) and **Git**. `dist/` is built locally (not committed), so the `npm run build` step is **required** on every platform. The build links two commands — `qodex` and the short alias `qx`.

diff --git a/src/cli/skill-command.ts b/src/cli/skill-command.ts
@@ -222,6 +222,25 @@ export function buildSkillCommand(): Command {
       }
     });
 
+  cmd
+    .command('rollback <name> <version>')
+    .description('Roll a versioned skill\'s champion back to an earlier version (e.g. v1) — drops any challenger')
+    .action(async (name: string, version: string) => {
+      const { loadSkillByName } = await import('../skills/loader.js');
+      const { rollbackToVersion } = await import('../skills/learning/versioned-store.js');
+      const spec = await loadSkillByName(name, process.cwd());
+      if (!spec) { console.error(`✗ no skill named "${name}"`); process.exit(1); }
+      const ver = version.startsWith('v') ? version : `v${version}`;
+      const ok = await rollbackToVersion(spec.dir, ver);
+      if (ok) {
+        console.log(`✓ "${name}" rolled back — champion is now ${ver}.`);
+        await refreshSkillRegistry();
+      } else {
+        console.error(`✗ "${name}" has no version ${ver} (or isn't versioned). Run \`qodex skill versions ${name}\`.`);
+        process.exit(1);
+      }
+    });
+
   cmd
     .command('lessons')
     .description('Show "learned cautions" mined from your RECURRING tool failures (failure-driven learning)')

diff --git a/src/skills/learning/skill-versioning.ts b/src/skills/learning/skill-versioning.ts
@@ -120,28 +120,36 @@ export function createNextVersion(
 const clamp01 = (x: number) => Math.max(0, Math.min(1, x));
 const perExec = (total: number, exec: number) => (exec ? total / exec : 0);
 
-interface RewardNorm { maxTokensPerExec: number; maxMsPerExec: number }
-function rewardNorm(arms: VersionDetail[]): RewardNorm {
-  let maxTokensPerExec = 0, maxMsPerExec = 0;
-  for (const v of arms) {
-    maxTokensPerExec = Math.max(maxTokensPerExec, perExec(v.stats.totalTokensUsed, v.stats.executions));
-    maxMsPerExec = Math.max(maxMsPerExec, perExec(v.stats.totalDurationMs ?? 0, v.stats.executions));
-  }
-  return { maxTokensPerExec, maxMsPerExec };
+/** The CHAMPION's per-execution cost/latency — the reference everything is normalized
+ *  against, so efficiency means "vs the stable version". */
+export interface RewardRef { champTokensPerExec: number; champMsPerExec: number }
+export function championRef(champion: VersionDetail): RewardRef {
+  return {
+    champTokensPerExec: perExec(champion.stats.totalTokensUsed, champion.stats.executions),
+    champMsPerExec: perExec(champion.stats.totalDurationMs ?? 0, champion.stats.executions),
+  };
+}
+
+/** Efficiency in [0,1] normalized against the champion: at champion cost → 0.5 (baseline),
+ *  free → 1.0, twice the champion's cost → 0.0. Neutral (0.5) when the champion has no scale. */
+function efficiency(vPerExec: number, champPerExec: number): number {
+  if (champPerExec <= 0) return 0.5;
+  return clamp01(1 - 0.5 * (vPerExec / champPerExec));
 }
 
 /**
- * COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY
- * nudges (cheaper / faster relative to the other arm scores higher). Efficiency terms are
- * neutral (0.5) when there's no scale to normalize against. PURE.
+ * COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY nudges
+ * measured RELATIVE TO THE CHAMPION (the stable version is the baseline a challenger must
+ * beat). A version cheaper/faster than the champion scores above the 0.5 efficiency
+ * baseline; one twice as costly scores 0. PURE.
  */
-export function compositeReward(v: VersionDetail, norm: RewardNorm, weights: RewardWeights = DEFAULT_WEIGHTS): number {
+export function compositeReward(v: VersionDetail, ref: RewardRef, weights: RewardWeights = DEFAULT_WEIGHTS): number {
   if (v.stats.executions === 0) return 0;
   const successRate = v.stats.successes / v.stats.executions;
-  const tokScore = norm.maxTokensPerExec > 0 ? 1 - perExec(v.stats.totalTokensUsed, v.stats.executions) / norm.maxTokensPerExec : 0.5;
-  const timeScore = norm.maxMsPerExec > 0 ? 1 - perExec(v.stats.totalDurationMs ?? 0, v.stats.executions) / norm.maxMsPerExec : 0.5;
+  const tokScore = efficiency(perExec(v.stats.totalTokensUsed, v.stats.executions), ref.champTokensPerExec);
+  const timeScore = efficiency(perExec(v.stats.totalDurationMs ?? 0, v.stats.executions), ref.champMsPerExec);
   const w = weights, denom = w.success + w.token + w.time || 1;
-  return (w.success * successRate + w.token * clamp01(tokScore) + w.time * clamp01(timeScore)) / denom;
+  return (w.success * successRate + w.token * tokScore + w.time * timeScore) / denom;
 }
 
 export interface UcbScore { version: string; reward: number; bonus: number; ucb: number; executions: number }
@@ -152,10 +160,10 @@ export function ucbScores(manifest: SkillManifest, opts: RouteOptions = {}): Ucb
   const c = opts.explorationFactor ?? Math.sqrt(2);
   const arms = [manifest.activeVersion, manifest.challengerVersion]
     .filter((x): x is string => !!x).map(v => manifest.versions[v]).filter((v): v is VersionDetail => !!v);
-  const norm = rewardNorm(arms);
+  const ref = championRef(manifest.versions[manifest.activeVersion]!);
   const N = arms.reduce((s, v) => s + v.stats.executions, 0);
   return arms.map(v => {
-    const reward = compositeReward(v, norm, opts.weights);
+    const reward = compositeReward(v, ref, opts.weights);
     const bonus = v.stats.executions === 0 ? Infinity : c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
     return { version: v.version, reward, bonus, ucb: reward + bonus, executions: v.stats.executions };
   });
@@ -237,8 +245,8 @@ export function decideChampion(manifest: SkillManifest, opts: { minExecutions?:
   const chalV = manifest.versions[chal]!;
   if (chalV.stats.executions < minExec) return { manifest, action: 'keep-testing', reason: `challenger has ${chalV.stats.executions}/${minExec} executions` };
 
-  const norm = rewardNorm([champV, chalV]);
-  const cr = compositeReward(chalV, norm, opts.weights), pr = compositeReward(champV, norm, opts.weights);
+  const ref = championRef(champV);
+  const cr = compositeReward(chalV, ref, opts.weights), pr = compositeReward(champV, ref, opts.weights);
   if (cr >= pr + margin) {
     const m: SkillManifest = { ...manifest, activeVersion: chal, challengerVersion: undefined };
     return { manifest: m, action: 'promote', reason: `challenger ${(cr * 100).toFixed(0)}% beat champion ${(pr * 100).toFixed(0)}% by ≥${margin * 100}%` };

diff --git a/src/skills/learning/versioned-store.ts b/src/skills/learning/versioned-store.ts
@@ -65,6 +65,22 @@ export async function addChallenger(skillDir: string, skillId: string, body: str
   return updatedManifest;
 }
 
+/** Roll a skill's champion back to an earlier version: set it active, drop any challenger,
+ *  and un-retire it if it had lost a past A/B. Returns false if the version doesn't exist. */
+export async function rollbackToVersion(skillDir: string, version: string): Promise<boolean> {
+  const m = await readManifest(skillDir);
+  if (!m || !m.versions[version]) return false;
+  const v = { ...m.versions[version]!, retired: false };
+  const updated: SkillManifest = {
+    ...m,
+    activeVersion: version,
+    challengerVersion: m.challengerVersion === version ? undefined : m.challengerVersion,
+    versions: { ...m.versions, [version]: v },
+  };
+  await writeManifest(skillDir, updated);
+  return true;
+}
+
 /** Record one execution outcome (success + tokens + duration) for the routed version, then
  *  try to converge the A/B test on the composite reward. */
 export async function recordOutcomeAndConverge(

diff --git a/test/skill-versioning-ucb.test.ts b/test/skill-versioning-ucb.test.ts
@@ -1,7 +1,7 @@
 import { describe, it, expect } from 'vitest';
 import {
   initManifest, createNextVersion, recordVersionExecution, routeSkillVersion, decideChampion,
-  compositeReward, ucbScores, type SkillManifest, type VersionDetail,
+  compositeReward, championRef, ucbScores, type SkillManifest, type VersionDetail,
 } from '../src/skills/learning/skill-versioning.js';
 
 const challenger = () => createNextVersion(initManifest('s', 'machine', 50, '').manifest, 'machine').updatedManifest;
@@ -42,22 +42,28 @@ describe('#1 ucbExplorationFactor — tunes explore vs exploit', () => {
   });
 });
 
-describe('#2 composite reward — success + token + time efficiency', () => {
-  it('between two EQUALLY-successful versions, the cheaper + faster one scores higher', () => {
+describe('#2 composite reward — efficiency normalized RELATIVE TO THE CHAMPION', () => {
+  it('between two EQUALLY-successful versions, the cheaper + faster challenger beats the champion', () => {
     let m = challenger();
     m = feed(m, 'v1', ten(true, 1000, 2000));  // champion: 100% but expensive/slow
     m = feed(m, 'v2', ten(true, 200, 400));    // challenger: 100% but cheap/fast
-    const champReward = compositeReward(m.versions.v1!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
-    const chalReward = compositeReward(m.versions.v2!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
-    expect(chalReward).toBeGreaterThan(champReward);
-    // and decideChampion promotes the more efficient one
-    expect(decideChampion(m, { minExecutions: 8 }).action).toBe('promote');
+    const ref = championRef(m.versions.v1!);   // normalize vs the champion
+    expect(compositeReward(m.versions.v1!, ref)).toBeCloseTo(0.85, 2); // champion → 0.5 efficiency baseline
+    expect(compositeReward(m.versions.v2!, ref)).toBeGreaterThan(compositeReward(m.versions.v1!, ref));
+    expect(decideChampion(m, { minExecutions: 8 }).action).toBe('promote'); // cheaper wins
+  });
+  it('a challenger TWICE the champion cost is penalized (efficiency → 0)', () => {
+    let m = challenger();
+    m = feed(m, 'v1', ten(true, 500, 1000));
+    m = feed(m, 'v2', ten(true, 1000, 2000)); // 2× the champion's cost, same success
+    const ref = championRef(m.versions.v1!);
+    expect(compositeReward(m.versions.v2!, ref)).toBeLessThan(compositeReward(m.versions.v1!, ref));
   });
   it('success still dominates — a cheap FAILURE never beats an expensive success', () => {
-    const norm = { maxTokensPerExec: 1000, maxMsPerExec: 2000 };
     const good: VersionDetail = { version: 'a', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 9, totalTokensUsed: 10000, totalDurationMs: 20000 } };
     const cheapFail: VersionDetail = { version: 'b', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 2, totalTokensUsed: 100, totalDurationMs: 100 } };
-    expect(compositeReward(good, norm)).toBeGreaterThan(compositeReward(cheapFail, norm));
+    const ref = championRef(good); // champion is the good one
+    expect(compositeReward(good, ref)).toBeGreaterThan(compositeReward(cheapFail, ref));
   });
 });