KodingDev · KodingDev · Jun 25, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
@@ -2,7 +2,7 @@
   "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
   "name": "meridian",
   "description": "Research-first workflows, ruthless code review, orchestrator-led reasoning, and opaque subagent isolation for the entire development lifecycle.",
-  "version": "0.10.9",
+  "version": "0.11.0",
   "author": {
     "name": "KodingDev"
   },

diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json
@@ -2,7 +2,7 @@
   "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
   "name": "meridian",
   "description": "Research-first workflows, ruthless code review, orchestrator-led reasoning, and opaque subagent isolation for the entire development lifecycle.",
-  "version": "0.10.8",
+  "version": "0.11.0",
   "author": {
     "name": "KodingDev"
   },

diff --git a/.github/scripts/eval-summary.mjs b/.github/scripts/eval-summary.mjs
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+// Render a promptfoo result JSON as a Markdown table for the GitHub Actions job
+// summary (promptfoo has no markdown output format). Reads the path in argv[2],
+// writes the table to stdout.
+import { readFileSync } from "node:fs";
+
+const result = JSON.parse(readFileSync(process.argv[2], "utf8"));
+const rows = result.results?.results ?? result.results ?? [];
+
+let passed = 0;
+const lines = ["| | Prompt | Routed to | Expected |", "|---|---|---|---|"];
+for (const row of rows) {
+  const prompt = String(row.vars?.prompt ?? "")
+    .replace(/\|/g, "\\|")
+    .slice(0, 80);
+  const got = (row.response?.metadata?.skillCalls ?? []).map((s) => s.name).join(", ") || "(none)";
+  const assert = row.testCase?.assert?.[0];
+  const want = assert ? (assert.type === "skill-used" ? assert.value : "(none)") : "?";
+  if (row.success) passed++;
+  lines.push(`| ${row.success ? "✅" : "❌"} | ${prompt} | \`${got}\` | \`${want}\` |`);
+}
+
+console.log(`## Routing eval — ${passed}/${rows.length} passed\n`);
+console.log(lines.join("\n"));
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -63,4 +63,4 @@ jobs:
         with:
           node-version: 22
       - name: Test hooks
-        run: node --test test/meridian-hooks.test.mjs test/meridian-lib.test.mjs
+        run: node --test "test/*.test.mjs"
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -0,0 +1,39 @@
+name: Routing eval
+
+# On-demand only — never on push/PR. Needs the ANTHROPIC_API_KEY secret and makes
+# paid API calls, so it is not a merge gate.
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  eval:
+    name: Skill-routing eval
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+      - name: Install dev tooling
+        run: pnpm install --frozen-lockfile
+      - name: Run routing eval
+        id: eval
+        continue-on-error: true
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: pnpm exec promptfoo eval -c promptfooconfig.yaml --output eval-report.html eval-result.json
+      - name: Write the matrix to the job summary
+        if: always()
+        run: node .github/scripts/eval-summary.mjs eval-result.json >> "$GITHUB_STEP_SUMMARY"
+      - name: Upload the HTML report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: routing-eval-report
+          path: eval-report.html
+          if-no-files-found: warn
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .DS_Store
 skills-workspace/
 node_modules/
+.env
+.promptfoo/
diff --git a/.plugin/plugin.json b/.plugin/plugin.json
@@ -1,4 +1,5 @@
 {
   "name": "meridian",
+  "version": "0.11.0",
   "hooks": "./hooks/hooks-copilot.json"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,42 @@
+# Changelog
+
+All notable changes to Meridian are recorded here. The format follows
+[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project adheres
+to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). History before
+0.11.0 lives in the git log.
+
+## [0.11.0] - 2026-06-25
+
+### Added
+
+- A `PreToolUse` guard (Claude) that blocks `git` commits carrying AI attribution
+  (`Co-Authored-By: Claude`, "Generated with Claude", `claude.ai/code`, or a
+  `Claude-Session` trailer) and prevents staging the gitignored `.meridian/`
+  working artifacts, turning two output-style principles into enforced gates.
+- Consistency guards in the test suite: the per-host manifest versions must agree,
+  every `meridian:<skill>`/`meridian:<agent>` reference must resolve to something
+  that exists, and each skill's frontmatter name must match its directory.
+- A skill-routing eval harness (`eval/`, promptfoo + the `anthropic:claude-agent-sdk`
+  provider) that checks prompts route to the correct skill against the real plugin on
+  Sonnet. On-demand dev tooling (`pnpm eval`); not part of the offline CI gates.
+
+### Changed
+
+- The Craft & Simplicity review lens judges comments by value rather than count,
+  explicitly flagging chain-of-thought narrated as comments, self-evident
+  restatement, and oversized comment blocks.
+- The README lists the `sketch` workflow and points to the composing `meridian`,
+  `triangulate`, and `auto` skills.
+- The test runner discovers `test/*.test.mjs` by glob, so new suites need no
+  package.json or CI edit.
+
+### Fixed
+
+- Aligned the Claude, Cursor, and Copilot manifest versions, which had drifted to
+  0.10.9 and 0.10.8 because manifest validation only inspects the Claude manifest.
+- Hooks exit cleanly when no plugin-root environment variable is set, wrap their
+  filesystem calls so an I/O error degrades to a no-op, and still match a failure
+  signal typed with an accidental double-space.
+- Post-compaction orientation re-injection is regression-tested, keeping the
+  routing table alive when context compaction drops it.
+- Removed stray template markup from the entry-point routing skill.
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ Hard-won knowledge dies with the session. You debug something for two hours, nai
 |-------|------|
 | `research` | Verify APIs/libs against live docs before implementing |
 | `brainstorm` | Design exploration -> spec through conversation |
+| `sketch` | Lightweight spec for a small, well-scoped fix |
 | `execute` | Implement from spec with verification gates |
 | `delegate` | Dispatch subagents with clean context isolation |
 | `debug` | Root-cause investigation, no fixes without understanding |
@@ -32,6 +33,8 @@ Hard-won knowledge dies with the session. You debug something for two hours, nai
 | `commit` | Clean git commits, no AI attribution |
 | `document` | Human-readable docs from resolved work |
 
+Three more compose with these rather than standing alone: `meridian` (the routing reference, for when it's unclear which skill fits), `triangulate` (a verification lens that grounds specific-value claims against their source), and `auto` (a modifier that runs any task autonomously when you step away).
+
 ## What gets installed
 
 - A `Meridian` output style applied automatically while the plugin is enabled (overrides any `/output-style` selection while loaded). It carries the durable principles — three pillars, voice, commit-attribution override, the challenge protocol — directly in the system prompt rather than relying on per-turn reminders.

diff --git a/eval/README.md b/eval/README.md
@@ -0,0 +1,65 @@
+# Skill-routing eval
+
+Verifies that representative prompts route to the correct Meridian skill (or none),
+against the real plugin, on **Sonnet** (`claude-sonnet-4-6`) — the routing baseline:
+if Sonnet can't route a prompt, the design is moot. This is **on-demand dev tooling,
+not a CI gate** — it makes live, paid API calls (a full run is a few turns of the
+agent per scenario).
+
+## Prerequisites
+
+- Dev dependencies installed: `pnpm install`.
+- An Anthropic API key in a gitignored `.env` at the repo root:
+
+  ```
+  ANTHROPIC_API_KEY=sk-ant-...
+  ```
+
+  promptfoo loads `.env` automatically. `.env` is gitignored — never commit it.
+  Alternatively, set `apiKeyRequired: false` in `promptfooconfig.yaml` to run against
+  a local Claude Code login.
+
+## Run
+
+```
+pnpm eval        # run the corpus on Sonnet
+pnpm eval:view   # open the pass/fail matrix
+```
+
+## What it checks
+
+- `scenarios/positive.yaml` — one prompt per routable skill; asserts
+  `skill-used: meridian:<skill>`.
+- `scenarios/negative.yaml` — trivial prompts; asserts no skill fired.
+
+Self-contained prompts (where the intent is fully in the message) route reliably. A
+scenario whose correct route depends on context the prompt alone doesn't carry — a
+prior failed fix for a reroute, an existing spec for `execute` — is not a meaningful
+single-turn test; see the Known gap.
+
+## Adding a scenario
+
+Append to the matching file:
+
+```yaml
+- vars:
+    prompt: "<the user message>"
+  assert:
+    - type: skill-used
+      value: meridian:<skill> # or a javascript skillCalls.length===0 check for "none"
+  description: "<why this is the correct route>"
+```
+
+Every expected route must trace to a documented routing rule (the orientation table
+or a skill description), not intuition. When a real misroute surfaces, add it here.
+
+## Known gap
+
+The **failure-signal reroute** (a terse "still broken" routing to `debug`) is only
+meaningful _mid-flow_, after an actual failed fix — a cold first-message "still broken"
+has no prior failure to debug, so the model rightly declines. That needs prior
+conversation turns, which the `anthropic:claude-agent-sdk` provider models via session
+`resume`/`continue` rather than a declarative fixture, so it is deferred. The hook's
+_firing_ is already covered deterministically by `test/meridian-lib.test.mjs`
+(`isFailureSignal`) and `test/meridian-hooks.test.mjs`; what's deferred is the
+model-level test of whether the model obeys the injected reroute.
diff --git a/eval/scenarios/negative.yaml b/eval/scenarios/negative.yaml
@@ -0,0 +1,19 @@
+# Prompts that should trigger NO skill — the orientation "just do it" row.
+# skill-used has no negation form; assert skillCalls is empty via javascript.
+- vars:
+    prompt: "What does the parseConfig function do?"
+  assert:
+    - type: javascript
+      value: |
+        const skillCalls = context.providerResponse?.metadata?.skillCalls || [];
+        return skillCalls.length === 0;
+  description: "trivial question → just answer, no skill"
+
+- vars:
+    prompt: "Rename the getUser function to fetchUser everywhere it's used."
+  assert:
+    - type: javascript
+      value: |
+        const skillCalls = context.providerResponse?.metadata?.skillCalls || [];
+        return skillCalls.length === 0;
+  description: "trivial rename → just do it (orientation 'rename X to Y'), not sketch"
diff --git a/eval/scenarios/positive.yaml b/eval/scenarios/positive.yaml
@@ -0,0 +1,71 @@
+# One representative prompt per routable skill in the orientation table.
+# Each expect value traces to that skill's description / orientation example.
+- vars:
+    prompt: "Fix the hover state on the navbar links — they don't change colour."
+  assert:
+    - type: skill-used
+      value: meridian:sketch
+  description: "small, well-scoped UI fix → sketch"
+
+- vars:
+    prompt: "I want to build a notification system with email and in-app delivery."
+  assert:
+    - type: skill-used
+      value: meridian:brainstorm
+  description: "new multi-subsystem feature → brainstorm"
+
+- vars:
+    prompt: "I'm getting 'TypeError: cannot read properties of undefined (reading id)' when the dashboard loads."
+  assert:
+    - type: skill-used
+      value: meridian:debug
+  description: "bug with a stack trace → debug"
+
+- vars:
+    prompt: "I'm about to write the payment retry logic against the Stripe PaymentIntents API — verify how its idempotency keys actually behave before I code against them."
+  assert:
+    - type: skill-used
+      value: meridian:research
+  description: "about to write code against an external API → research (verify before building)"
+
+- vars:
+    prompt: "I just finished the auth refactor — review it before I merge."
+  assert:
+    - type: skill-used
+      value: meridian:review
+  description: "post-implementation quality check → review"
+
+- vars:
+    prompt: "Here's the reviewer's PR feedback to work through: extract the validation helper, and the retry loop has an off-by-one."
+  assert:
+    - type: skill-used
+      value: meridian:respond
+  description: "triaging reviewer feedback → respond"
+
+- vars:
+    prompt: "Commit this."
+  assert:
+    - type: skill-used
+      value: meridian:commit
+  description: "explicit commit request → commit"
+
+- vars:
+    prompt: "Write up what we learned debugging that race condition so the next session doesn't repeat it."
+  assert:
+    - type: skill-used
+      value: meridian:document
+  description: "capture hard-won knowledge → document"
+
+- vars:
+    prompt: "Requirements are locked and the design is approved — no planning needed, go implement the token-bucket rate limiter (100 req/min) as Express middleware now."
+  assert:
+    - type: skill-used
+      value: meridian:execute
+  description: "clear approved requirements, ready to build → execute"
+
+- vars:
+    prompt: "Do these three independent jobs in parallel: bump the lint config, split the utils file, and regenerate the API types."
+  assert:
+    - type: skill-used
+      value: meridian:delegate
+  description: "2+ independent tasks at once → delegate"
diff --git a/hooks/hooks.json b/hooks/hooks.json
@@ -6,7 +6,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT,cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/session-start.mjs'],{stdio:'inherit'})\"",
+            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT;if(!r)process.exit(0);let cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/session-start.mjs'],{stdio:'inherit'})\"",
             "async": false
           }
         ]
@@ -18,7 +18,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT,cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/user-prompt-submit.mjs'],{stdio:'inherit'})\"",
+            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT;if(!r)process.exit(0);let cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/user-prompt-submit.mjs'],{stdio:'inherit'})\"",
             "async": false
           }
         ]
@@ -30,7 +30,19 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT,cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/session-end.mjs'],{stdio:'inherit'})\"",
+            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT;if(!r)process.exit(0);let cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/session-end.mjs'],{stdio:'inherit'})\"",
+            "async": false
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node -e \"let r=process.env.CLAUDE_PLUGIN_ROOT||process.env.PLUGIN_ROOT;if(!r)process.exit(0);let cp=require('child_process');if(process.platform==='linux'&&/^[a-zA-Z]:/.test(r)){try{r=cp.execFileSync('wslpath',['-u',r],{encoding:'utf8'}).trim()}catch{r='/mnt/'+r[0].toLowerCase()+r.slice(2).split(String.fromCharCode(92)).join('/')}}cp.execFileSync(process.execPath,[r+'/hooks/pre-tool-use.mjs'],{stdio:'inherit'})\"",
             "async": false
           }
         ]

diff --git a/hooks/lib/signals.mjs b/hooks/lib/signals.mjs
@@ -54,7 +54,11 @@ export function isFailureSignal(prompt) {
   if (typeof prompt !== "string") return false;
   const trimmed = prompt.trim();
   if (!trimmed || trimmed.length > MAX_SIGNAL_LENGTH) return false;
-  const normalized = trimmed.toLowerCase().replace(/[\s.!?:;,~]+$/, "");
+  // Collapse internal whitespace so an accidental double-space ("still  broken") still matches.
+  const normalized = trimmed
+    .toLowerCase()
+    .replace(/[\s.!?:;,~]+$/, "")
+    .replace(/\s+/g, " ");
   return FAILURE_SIGNAL.test(normalized);
 }
 

diff --git a/hooks/lib/state.mjs b/hooks/lib/state.mjs
@@ -40,11 +40,11 @@ export function sessionDir(host, id) {
  */
 export function touch(host, id) {
   const dir = sessionDir(host, id);
-  mkdirSync(dir, { recursive: true });
   try {
+    mkdirSync(dir, { recursive: true });
     utimesSync(dir, new Date(), new Date());
   } catch {
-    // best-effort
+    // best-effort: a failed touch only risks an early prune of an idle session
   }
 }
 
@@ -107,5 +107,9 @@ export function pruneStale(host, currentId, maxAgeMs) {
  * @param {string} id
  */
 export function clear(host, id) {
-  rmSync(sessionDir(host, id), { recursive: true, force: true });
+  try {
+    rmSync(sessionDir(host, id), { recursive: true, force: true });
+  } catch {
+    // best-effort: cleanup only; a left-behind dir is pruned later by age
+  }
 }