Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,35 @@ qodex skill snapshots # rollback points; qodex skill restore <archive

> Every successful task can also be exported as a **ShareGPT JSONL** corpus (`flywheel.datasetExport: true` → `~/.qodex/dataset/`) — a ready-to-use dataset for a future zero-cost local fine-tune. Strictly local; nothing is uploaded.

### Skill versioning & A/B testing (UCB1)

A skill keeps its whole history in **one flat directory** — `manifest.json` + `SKILL.v1.md`, `SKILL.v2.md`, … — no symlinks, identical on every OS. When a new candidate is captured for an existing skill it becomes a **challenger** to the stable **champion**, and QodeX routes traffic between them with the **UCB1 adaptive bandit** instead of a fixed split: it explores the challenger enough to get signal, then favours whichever has the higher score — and a challenger that turns out worse has its traffic driven to **zero automatically**.

The score is a **composite reward**, not just win-rate: *success* dominates, but *token-* and *time-efficiency* (normalized **relative to the champion**) break ties — so between two equally-correct versions, the **cheaper, faster** one wins.

```yaml
learning:
versioning:
strategy: ucb1 # or 'champion-only' to freeze a sensitive skill (UCB off)
ucbExplorationFactor: 1.41 # √2 — higher explores challengers more
minChallengerTrials: 5 # force a challenger ≥5 runs before judging it
rewardWeights: { success: 0.7, token: 0.15, time: 0.15 }
```

```text
$ qodex skill versions git-commit-expert
Skill "git-commit-expert" · strategy: ucb1 · routed this turn → v2

v1 [human] ★ champion
success: 88% over 40 · tokens: 60000 · 1900ms/run · confidence: 75
UCB: reward 0.838 + bonus 0.214 = 1.052
v2 [machine] ⚡ challenger
success: 92% over 12 · tokens: 41000 · 1300ms/run · confidence: 60
UCB: reward 0.921 + bonus 0.391 = 1.312 ← higher → gets this turn

$ qodex skill rollback git-commit-expert v1 # snap the champion back to v1 anytime
```

## Install

**Prerequisites:** **Node 20+** (Node 22 LTS recommended) and **Git**. `dist/` is built locally (not committed), so the `npm run build` step is **required** on every platform. The build links two commands — `qodex` and the short alias `qx`.
Expand Down
19 changes: 19 additions & 0 deletions src/cli/skill-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,25 @@ export function buildSkillCommand(): Command {
}
});

cmd
.command('rollback <name> <version>')
.description('Roll a versioned skill\'s champion back to an earlier version (e.g. v1) — drops any challenger')
.action(async (name: string, version: string) => {
const { loadSkillByName } = await import('../skills/loader.js');
const { rollbackToVersion } = await import('../skills/learning/versioned-store.js');
const spec = await loadSkillByName(name, process.cwd());
if (!spec) { console.error(`✗ no skill named "${name}"`); process.exit(1); }
const ver = version.startsWith('v') ? version : `v${version}`;
const ok = await rollbackToVersion(spec.dir, ver);
if (ok) {
console.log(`✓ "${name}" rolled back — champion is now ${ver}.`);
await refreshSkillRegistry();
} else {
console.error(`✗ "${name}" has no version ${ver} (or isn't versioned). Run \`qodex skill versions ${name}\`.`);
process.exit(1);
}
});

cmd
.command('lessons')
.description('Show "learned cautions" mined from your RECURRING tool failures (failure-driven learning)')
Expand Down
46 changes: 27 additions & 19 deletions src/skills/learning/skill-versioning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,36 @@ export function createNextVersion(
const clamp01 = (x: number) => Math.max(0, Math.min(1, x));
const perExec = (total: number, exec: number) => (exec ? total / exec : 0);

interface RewardNorm { maxTokensPerExec: number; maxMsPerExec: number }
function rewardNorm(arms: VersionDetail[]): RewardNorm {
let maxTokensPerExec = 0, maxMsPerExec = 0;
for (const v of arms) {
maxTokensPerExec = Math.max(maxTokensPerExec, perExec(v.stats.totalTokensUsed, v.stats.executions));
maxMsPerExec = Math.max(maxMsPerExec, perExec(v.stats.totalDurationMs ?? 0, v.stats.executions));
}
return { maxTokensPerExec, maxMsPerExec };
/** The CHAMPION's per-execution cost/latency — the reference everything is normalized
* against, so efficiency means "vs the stable version". */
export interface RewardRef { champTokensPerExec: number; champMsPerExec: number }
export function championRef(champion: VersionDetail): RewardRef {
return {
champTokensPerExec: perExec(champion.stats.totalTokensUsed, champion.stats.executions),
champMsPerExec: perExec(champion.stats.totalDurationMs ?? 0, champion.stats.executions),
};
}

/** Efficiency in [0,1] normalized against the champion: at champion cost → 0.5 (baseline),
* free → 1.0, twice the champion's cost → 0.0. Neutral (0.5) when the champion has no scale. */
function efficiency(vPerExec: number, champPerExec: number): number {
if (champPerExec <= 0) return 0.5;
return clamp01(1 - 0.5 * (vPerExec / champPerExec));
}

/**
* COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY
* nudges (cheaper / faster relative to the other arm scores higher). Efficiency terms are
* neutral (0.5) when there's no scale to normalize against. PURE.
* COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY nudges
* measured RELATIVE TO THE CHAMPION (the stable version is the baseline a challenger must
* beat). A version cheaper/faster than the champion scores above the 0.5 efficiency
* baseline; one twice as costly scores 0. PURE.
*/
export function compositeReward(v: VersionDetail, norm: RewardNorm, weights: RewardWeights = DEFAULT_WEIGHTS): number {
export function compositeReward(v: VersionDetail, ref: RewardRef, weights: RewardWeights = DEFAULT_WEIGHTS): number {
if (v.stats.executions === 0) return 0;
const successRate = v.stats.successes / v.stats.executions;
const tokScore = norm.maxTokensPerExec > 0 ? 1 - perExec(v.stats.totalTokensUsed, v.stats.executions) / norm.maxTokensPerExec : 0.5;
const timeScore = norm.maxMsPerExec > 0 ? 1 - perExec(v.stats.totalDurationMs ?? 0, v.stats.executions) / norm.maxMsPerExec : 0.5;
const tokScore = efficiency(perExec(v.stats.totalTokensUsed, v.stats.executions), ref.champTokensPerExec);
const timeScore = efficiency(perExec(v.stats.totalDurationMs ?? 0, v.stats.executions), ref.champMsPerExec);
const w = weights, denom = w.success + w.token + w.time || 1;
return (w.success * successRate + w.token * clamp01(tokScore) + w.time * clamp01(timeScore)) / denom;
return (w.success * successRate + w.token * tokScore + w.time * timeScore) / denom;
}

export interface UcbScore { version: string; reward: number; bonus: number; ucb: number; executions: number }
Expand All @@ -152,10 +160,10 @@ export function ucbScores(manifest: SkillManifest, opts: RouteOptions = {}): Ucb
const c = opts.explorationFactor ?? Math.sqrt(2);
const arms = [manifest.activeVersion, manifest.challengerVersion]
.filter((x): x is string => !!x).map(v => manifest.versions[v]).filter((v): v is VersionDetail => !!v);
const norm = rewardNorm(arms);
const ref = championRef(manifest.versions[manifest.activeVersion]!);
const N = arms.reduce((s, v) => s + v.stats.executions, 0);
return arms.map(v => {
const reward = compositeReward(v, norm, opts.weights);
const reward = compositeReward(v, ref, opts.weights);
const bonus = v.stats.executions === 0 ? Infinity : c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
return { version: v.version, reward, bonus, ucb: reward + bonus, executions: v.stats.executions };
});
Expand Down Expand Up @@ -237,8 +245,8 @@ export function decideChampion(manifest: SkillManifest, opts: { minExecutions?:
const chalV = manifest.versions[chal]!;
if (chalV.stats.executions < minExec) return { manifest, action: 'keep-testing', reason: `challenger has ${chalV.stats.executions}/${minExec} executions` };

const norm = rewardNorm([champV, chalV]);
const cr = compositeReward(chalV, norm, opts.weights), pr = compositeReward(champV, norm, opts.weights);
const ref = championRef(champV);
const cr = compositeReward(chalV, ref, opts.weights), pr = compositeReward(champV, ref, opts.weights);
if (cr >= pr + margin) {
const m: SkillManifest = { ...manifest, activeVersion: chal, challengerVersion: undefined };
return { manifest: m, action: 'promote', reason: `challenger ${(cr * 100).toFixed(0)}% beat champion ${(pr * 100).toFixed(0)}% by ≥${margin * 100}%` };
Expand Down
16 changes: 16 additions & 0 deletions src/skills/learning/versioned-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ export async function addChallenger(skillDir: string, skillId: string, body: str
return updatedManifest;
}

/** Roll a skill's champion back to an earlier version: set it active, drop any challenger,
* and un-retire it if it had lost a past A/B. Returns false if the version doesn't exist. */
export async function rollbackToVersion(skillDir: string, version: string): Promise<boolean> {
const m = await readManifest(skillDir);
if (!m || !m.versions[version]) return false;
const v = { ...m.versions[version]!, retired: false };
const updated: SkillManifest = {
...m,
activeVersion: version,
challengerVersion: m.challengerVersion === version ? undefined : m.challengerVersion,
versions: { ...m.versions, [version]: v },
};
await writeManifest(skillDir, updated);
return true;
}

/** Record one execution outcome (success + tokens + duration) for the routed version, then
* try to converge the A/B test on the composite reward. */
export async function recordOutcomeAndConverge(
Expand Down
26 changes: 16 additions & 10 deletions test/skill-versioning-ucb.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, it, expect } from 'vitest';
import {
initManifest, createNextVersion, recordVersionExecution, routeSkillVersion, decideChampion,
compositeReward, ucbScores, type SkillManifest, type VersionDetail,
compositeReward, championRef, ucbScores, type SkillManifest, type VersionDetail,
} from '../src/skills/learning/skill-versioning.js';

const challenger = () => createNextVersion(initManifest('s', 'machine', 50, '').manifest, 'machine').updatedManifest;
Expand Down Expand Up @@ -42,22 +42,28 @@ describe('#1 ucbExplorationFactor — tunes explore vs exploit', () => {
});
});

describe('#2 composite reward — success + token + time efficiency', () => {
it('between two EQUALLY-successful versions, the cheaper + faster one scores higher', () => {
describe('#2 composite reward — efficiency normalized RELATIVE TO THE CHAMPION', () => {
it('between two EQUALLY-successful versions, the cheaper + faster challenger beats the champion', () => {
let m = challenger();
m = feed(m, 'v1', ten(true, 1000, 2000)); // champion: 100% but expensive/slow
m = feed(m, 'v2', ten(true, 200, 400)); // challenger: 100% but cheap/fast
const champReward = compositeReward(m.versions.v1!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
const chalReward = compositeReward(m.versions.v2!, { maxTokensPerExec: 1000, maxMsPerExec: 2000 });
expect(chalReward).toBeGreaterThan(champReward);
// and decideChampion promotes the more efficient one
expect(decideChampion(m, { minExecutions: 8 }).action).toBe('promote');
const ref = championRef(m.versions.v1!); // normalize vs the champion
expect(compositeReward(m.versions.v1!, ref)).toBeCloseTo(0.85, 2); // champion → 0.5 efficiency baseline
expect(compositeReward(m.versions.v2!, ref)).toBeGreaterThan(compositeReward(m.versions.v1!, ref));
expect(decideChampion(m, { minExecutions: 8 }).action).toBe('promote'); // cheaper wins
});
it('a challenger TWICE the champion cost is penalized (efficiency → 0)', () => {
let m = challenger();
m = feed(m, 'v1', ten(true, 500, 1000));
m = feed(m, 'v2', ten(true, 1000, 2000)); // 2× the champion's cost, same success
const ref = championRef(m.versions.v1!);
expect(compositeReward(m.versions.v2!, ref)).toBeLessThan(compositeReward(m.versions.v1!, ref));
});
it('success still dominates — a cheap FAILURE never beats an expensive success', () => {
const norm = { maxTokensPerExec: 1000, maxMsPerExec: 2000 };
const good: VersionDetail = { version: 'a', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 9, totalTokensUsed: 10000, totalDurationMs: 20000 } };
const cheapFail: VersionDetail = { version: 'b', createdAt: '', author: 'machine', confidence: 50, stats: { executions: 10, successes: 2, totalTokensUsed: 100, totalDurationMs: 100 } };
expect(compositeReward(good, norm)).toBeGreaterThan(compositeReward(cheapFail, norm));
const ref = championRef(good); // champion is the good one
expect(compositeReward(good, ref)).toBeGreaterThan(compositeReward(cheapFail, ref));
});
});

Expand Down