Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/cli/skill-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,29 @@ export function buildSkillCommand(): Command {
.action(async (name: string) => {
const { loadSkillByName } = await import('../skills/loader.js');
const { readManifest } = await import('../skills/learning/versioned-store.js');
const { routeSkillVersion } = await import('../skills/learning/skill-versioning.js');
const { routeSkillVersion, ucbScores } = await import('../skills/learning/skill-versioning.js');
const { loadConfig } = await import('../config/loader.js');
const spec = await loadSkillByName(name, process.cwd());
if (!spec) { console.error(`✗ no skill named "${name}"`); process.exit(1); }
const m = await readManifest(spec.dir);
if (!m) { console.log(`"${name}" is a single-version (legacy) skill — no version history yet.`); return; }
const routed = routeSkillVersion(m);
const vcfg = (await loadConfig(process.cwd()) as any).learning?.versioning ?? {};
const opts = {
explorationFactor: vcfg.ucbExplorationFactor,
minChallengerTrials: vcfg.minChallengerTrials,
weights: vcfg.rewardWeights ? { success: vcfg.rewardWeights.success ?? 0.7, token: vcfg.rewardWeights.token ?? 0.15, time: vcfg.rewardWeights.time ?? 0.15 } : undefined,
};
const routed = routeSkillVersion(m, opts);
const scores = new Map(ucbScores(m, opts).map(s => [s.version, s]));
console.log(`Skill "${m.skillId}" · strategy: ${m.routingStrategy} · routed this turn → ${routed}\n`);
for (const v of Object.values(m.versions).sort((a, b) => a.version.localeCompare(b.version, undefined, { numeric: true }))) {
const tag = v.version === m.activeVersion ? '★ champion' : v.version === m.challengerVersion ? '⚡ challenger' : v.retired ? '✗ retired' : '';
const rate = v.stats.executions ? `${Math.round((v.stats.successes / v.stats.executions) * 100)}% over ${v.stats.executions}` : 'untested';
const avgMs = v.stats.executions && v.stats.totalDurationMs ? ` · ${Math.round(v.stats.totalDurationMs / v.stats.executions)}ms/run` : '';
console.log(` ${v.version} [${v.author}] ${tag}`);
console.log(` success: ${rate} · tokens: ${v.stats.totalTokensUsed} · confidence: ${v.confidence}`);
console.log(` success: ${rate} · tokens: ${v.stats.totalTokensUsed}${avgMs} · confidence: ${v.confidence}`);
const s = scores.get(v.version);
if (s) console.log(` UCB: reward ${s.reward.toFixed(3)} + bonus ${s.bonus === Infinity ? '∞' : s.bonus.toFixed(3)} = ${s.ucb === Infinity ? '∞' : s.ucb.toFixed(3)}`);
}
});

Expand Down
14 changes: 14 additions & 0 deletions src/config/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,20 @@ export interface QodexConfig {
* judge is unsure (grey-zone average or high cross-dimension variance). Must differ from
* defaults.model and judgeModel. Unset ⇒ no escalation (Tier-1 verdict stands). */
judgeModelTier2?: string;
/** Skill versioning + UCB1 adaptive-bandit routing knobs. */
versioning?: {
/** UCB1 exploration factor `c` — higher explores challengers more. Default √2 (~1.41). */
ucbExplorationFactor?: number;
/** Force-route a challenger at least this many times before UCB1 can starve it, so a
* decision is never made on too little signal. Default 5. */
minChallengerTrials?: number;
/** Composite-reward weights (success + token-efficiency + time-efficiency). Defaults
* { success: 0.7, token: 0.15, time: 0.15 }. */
rewardWeights?: { success?: number; token?: number; time?: number };
/** Routing strategy when a manifest doesn't pin one: 'ucb1' (default), 'static', or
* 'champion-only' (UCB OFF — always the stable version, for sensitive skills). */
strategy?: 'ucb1' | 'static' | 'champion-only';
};
/** When auto-promoting, require at least this confidence (0–100). Default 0 (the
* judge's pass is sufficient); raise it to gate low-confidence captures. */
autoPromoteMinConfidence?: number;
Expand Down
119 changes: 93 additions & 26 deletions src/skills/learning/skill-versioning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,29 @@ export interface VersionStats {
executions: number;
successes: number;
totalTokensUsed: number;
/** Total wall-clock across executions (ms) — fuels the time term of the composite reward.
* Optional for backward-compat with manifests written before reward weighting. */
totalDurationMs?: number;
}

export interface RewardWeights {
/** Weight on the success rate (dominant). */
success: number;
/** Weight on token efficiency (cheaper = better). */
token: number;
/** Weight on time efficiency (faster = better). */
time: number;
}
export const DEFAULT_WEIGHTS: RewardWeights = { success: 0.7, token: 0.15, time: 0.15 };

export interface RouteOptions {
/** UCB1 exploration factor `c` (higher = more exploration). Default √2. */
explorationFactor?: number;
/** Force-route a challenger until it has at least this many trials, BEFORE UCB1 can
* starve it — so a decision is never made on too little signal. Default 5. */
minChallengerTrials?: number;
/** Composite-reward weights. */
weights?: RewardWeights;
}

export interface VersionDetail {
Expand All @@ -40,13 +63,15 @@ export interface SkillManifest {
skillId: string;
activeVersion: string; // the stable champion
challengerVersion?: string; // the version under test
routingStrategy: 'static' | 'ucb1';
/** 'ucb1' (adaptive bandit), 'static' (~25% challenger), or 'champion-only' (UCB OFF —
* always the stable version; for sensitive skills you don't want experimented on). */
routingStrategy: 'static' | 'ucb1' | 'champion-only';
versions: Record<string, VersionDetail>;
}

/** A fresh manifest for a brand-new skill (its first version is v1, the champion). */
export function initManifest(skillId: string, author: 'human' | 'machine', confidence = 50, nowIso = ''): { manifest: SkillManifest; fileName: string } {
const v: VersionDetail = { version: 'v1', createdAt: nowIso, author, confidence, stats: { executions: 0, successes: 0, totalTokensUsed: 0 } };
const v: VersionDetail = { version: 'v1', createdAt: nowIso, author, confidence, stats: { executions: 0, successes: 0, totalTokensUsed: 0, totalDurationMs: 0 } };
return {
manifest: { skillId, activeVersion: 'v1', routingStrategy: 'ucb1', versions: { v1: v } },
fileName: 'SKILL.v1.md',
Expand Down Expand Up @@ -92,31 +117,72 @@ export function createNextVersion(
};
}

/** UCB1 score for a version given the total trials N. */
function ucb1(v: VersionDetail, N: number, c: number): number {
if (v.stats.executions === 0) return Infinity; // always try an unsampled arm first
const mean = v.stats.successes / v.stats.executions;
return mean + c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
const clamp01 = (x: number) => Math.max(0, Math.min(1, x));
const perExec = (total: number, exec: number) => (exec ? total / exec : 0);

interface RewardNorm { maxTokensPerExec: number; maxMsPerExec: number }
function rewardNorm(arms: VersionDetail[]): RewardNorm {
let maxTokensPerExec = 0, maxMsPerExec = 0;
for (const v of arms) {
maxTokensPerExec = Math.max(maxTokensPerExec, perExec(v.stats.totalTokensUsed, v.stats.executions));
maxMsPerExec = Math.max(maxMsPerExec, perExec(v.stats.totalDurationMs ?? 0, v.stats.executions));
}
return { maxTokensPerExec, maxMsPerExec };
}

/**
* COMPOSITE reward in [0,1]: success rate dominates, with token- and time-EFFICIENCY
* nudges (cheaper / faster relative to the other arm scores higher). Efficiency terms are
* neutral (0.5) when there's no scale to normalize against. PURE.
*/
export function compositeReward(v: VersionDetail, norm: RewardNorm, weights: RewardWeights = DEFAULT_WEIGHTS): number {
if (v.stats.executions === 0) return 0;
const successRate = v.stats.successes / v.stats.executions;
const tokScore = norm.maxTokensPerExec > 0 ? 1 - perExec(v.stats.totalTokensUsed, v.stats.executions) / norm.maxTokensPerExec : 0.5;
const timeScore = norm.maxMsPerExec > 0 ? 1 - perExec(v.stats.totalDurationMs ?? 0, v.stats.executions) / norm.maxMsPerExec : 0.5;
const w = weights, denom = w.success + w.token + w.time || 1;
return (w.success * successRate + w.token * clamp01(tokScore) + w.time * clamp01(timeScore)) / denom;
}

export interface UcbScore { version: string; reward: number; bonus: number; ucb: number; executions: number }

/** Per-arm UCB breakdown (reward + exploration bonus) for the active+challenger — a pure
* snapshot for debugging / `qodex skill versions` / analysis. */
export function ucbScores(manifest: SkillManifest, opts: RouteOptions = {}): UcbScore[] {
const c = opts.explorationFactor ?? Math.sqrt(2);
const arms = [manifest.activeVersion, manifest.challengerVersion]
.filter((x): x is string => !!x).map(v => manifest.versions[v]).filter((v): v is VersionDetail => !!v);
const norm = rewardNorm(arms);
const N = arms.reduce((s, v) => s + v.stats.executions, 0);
return arms.map(v => {
const reward = compositeReward(v, norm, opts.weights);
const bonus = v.stats.executions === 0 ? Infinity : c * Math.sqrt(Math.log(Math.max(1, N)) / v.stats.executions);
return { version: v.version, reward, bonus, ucb: reward + bonus, executions: v.stats.executions };
});
}

/**
* Choose which version to inject this turn. No challenger → the champion. UCB1 balances
* exploring the challenger against exploiting the better arm; an unsampled version is
* tried first (Infinity), and a challenger whose success rate collapses loses traffic.
* Choose which version to inject this turn:
* - no challenger / champion-only strategy → the champion (UCB OFF; sensitive skills),
* - static → ~25% challenger,
* - ucb1 → force-explore the challenger until it clears `minChallengerTrials` (so we never
* judge on too little signal), then pick the higher UCB (composite reward + bonus);
* ties go to the champion (don't disturb the stable version).
*/
export function routeSkillVersion(manifest: SkillManifest, c: number = Math.sqrt(2)): string {
export function routeSkillVersion(manifest: SkillManifest, opts: RouteOptions = {}): string {
const champ = manifest.activeVersion;
const chal = manifest.challengerVersion;
if (!chal || !manifest.versions[chal] || chal === champ) return champ;
if (manifest.routingStrategy === 'champion-only') return champ;
if (manifest.routingStrategy === 'static') return deterministicStatic(manifest) ? chal : champ;

if (manifest.routingStrategy === 'static') {
return deterministicStatic(manifest) ? chal : champ;
}
const v1 = manifest.versions[champ]!;
const v2 = manifest.versions[chal]!;
const N = v1.stats.executions + v2.stats.executions;
// Tie → champion (conservative: don't disturb the stable version on a tie).
return ucb1(v2, N, c) > ucb1(v1, N, c) ? chal : champ;
const minTrials = opts.minChallengerTrials ?? 5;
if (manifest.versions[chal]!.stats.executions < minTrials) return chal; // exploration floor

const scores = ucbScores(manifest, opts);
const champU = scores.find(s => s.version === champ)?.ucb ?? -Infinity;
const chalU = scores.find(s => s.version === chal)?.ucb ?? -Infinity;
return chalU > champU ? chal : champ;
}

/** Deterministic ~25% challenger pick for the 'static' strategy (no Math.random — keeps the
Expand All @@ -131,7 +197,7 @@ function deterministicStatic(manifest: SkillManifest): boolean {
export function recordVersionExecution(
manifest: SkillManifest,
version: string,
outcome: { success: boolean; tokens?: number },
outcome: { success: boolean; tokens?: number; durationMs?: number },
): SkillManifest {
const v = manifest.versions[version];
if (!v) return manifest;
Expand All @@ -141,6 +207,7 @@ export function recordVersionExecution(
executions: v.stats.executions + 1,
successes: v.stats.successes + (outcome.success ? 1 : 0),
totalTokensUsed: v.stats.totalTokensUsed + (outcome.tokens ?? 0),
totalDurationMs: (v.stats.totalDurationMs ?? 0) + (outcome.durationMs ?? 0),
},
};
return { ...manifest, versions: { ...manifest.versions, [version]: updated } };
Expand All @@ -152,16 +219,15 @@ export interface ChampionDecision {
reason: string;
}

const rate = (v: VersionDetail) => (v.stats.executions ? v.stats.successes / v.stats.executions : 0);

/**
* Converge the A/B test. Once the challenger has enough samples:
* Converge the A/B test on the COMPOSITE reward (success + token + time), once the
* challenger has enough samples:
* - clearly BETTER than the champion (by `margin`) → PROMOTE it to active.
* - clearly WORSE → RETIRE it (drop the challenger, keep the champion).
* - clearly WORSE → RETIRE it (kept in history, marked retired).
* - otherwise keep testing. Below `minExecutions` we never decide (too little signal).
* PURE.
*/
export function decideChampion(manifest: SkillManifest, opts: { minExecutions?: number; margin?: number } = {}): ChampionDecision {
export function decideChampion(manifest: SkillManifest, opts: { minExecutions?: number; margin?: number; weights?: RewardWeights } = {}): ChampionDecision {
const minExec = opts.minExecutions ?? 8;
const margin = opts.margin ?? 0.1;
const chal = manifest.challengerVersion;
Expand All @@ -171,7 +237,8 @@ export function decideChampion(manifest: SkillManifest, opts: { minExecutions?:
const chalV = manifest.versions[chal]!;
if (chalV.stats.executions < minExec) return { manifest, action: 'keep-testing', reason: `challenger has ${chalV.stats.executions}/${minExec} executions` };

const cr = rate(chalV), pr = rate(champV);
const norm = rewardNorm([champV, chalV]);
const cr = compositeReward(chalV, norm, opts.weights), pr = compositeReward(champV, norm, opts.weights);
if (cr >= pr + margin) {
const m: SkillManifest = { ...manifest, activeVersion: chal, challengerVersion: undefined };
return { manifest: m, action: 'promote', reason: `challenger ${(cr * 100).toFixed(0)}% beat champion ${(pr * 100).toFixed(0)}% by ≥${margin * 100}%` };
Expand Down
21 changes: 14 additions & 7 deletions src/skills/learning/versioned-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import { promises as fs } from 'fs';
import * as path from 'path';
import {
type SkillManifest, type ChampionDecision,
type SkillManifest, type ChampionDecision, type RouteOptions,
initManifest, createNextVersion, routeSkillVersion, recordVersionExecution, decideChampion, versionFileName,
} from './skill-versioning.js';

Expand All @@ -29,11 +29,12 @@ async function writeManifest(skillDir: string, m: SkillManifest): Promise<void>
await fs.rename(tmp, manifestPath(skillDir)); // atomic
}

/** The skill body to inject this turn + which version it is. Falls back to legacy SKILL.md. */
export async function routedSkillBody(skillDir: string): Promise<{ version: string; body: string } | null> {
/** The skill body to inject this turn + which version it is. Falls back to legacy SKILL.md.
* `opts` carries the configurable UCB knobs (exploration factor, min trials, weights). */
export async function routedSkillBody(skillDir: string, opts: RouteOptions = {}): Promise<{ version: string; body: string } | null> {
const m = await readManifest(skillDir);
if (m) {
const version = routeSkillVersion(m);
const version = routeSkillVersion(m, opts);
try { return { version, body: await fs.readFile(path.join(skillDir, versionFileName(version)), 'utf-8') }; }
catch { /* fall through to legacy */ }
}
Expand Down Expand Up @@ -64,12 +65,18 @@ export async function addChallenger(skillDir: string, skillId: string, body: str
return updatedManifest;
}

/** Record one execution outcome for the routed version, then try to converge the A/B test. */
export async function recordOutcomeAndConverge(skillDir: string, version: string, outcome: { success: boolean; tokens?: number }): Promise<ChampionDecision | null> {
/** Record one execution outcome (success + tokens + duration) for the routed version, then
* try to converge the A/B test on the composite reward. */
export async function recordOutcomeAndConverge(
skillDir: string,
version: string,
outcome: { success: boolean; tokens?: number; durationMs?: number },
opts: { minExecutions?: number; margin?: number } = {},
): Promise<ChampionDecision | null> {
const m = await readManifest(skillDir);
if (!m) return null;
const afterStats = recordVersionExecution(m, version, outcome);
const decision = decideChampion(afterStats);
const decision = decideChampion(afterStats, opts);
await writeManifest(skillDir, decision.manifest);
return decision;
}
Loading