Wolfvin · Wolfvin · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/references/rule-schema.json b/references/rule-schema.json
@@ -0,0 +1,161 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://github.com/Wolfvin/CodeLens/blob/main/references/rule-schema.json",
+  "title": "CodeLens Rule YAML Schema",
+  "description": "JSON Schema for CodeLens rule YAML files. Used by `codelens rule-validate` to catch typos, unknown keys, missing required fields, and type mismatches before a rule is loaded by the engine. The schema describes the superset of taint-style (sources/sinks/sanitizers) and pattern-style (pattern/patterns) rules; cross-field constraints (pattern vs patterns mutually exclusive, fix requires pattern) are enforced separately by the validator because JSON Schema cannot express them cleanly.",
+  "type": "object",
+  "required": ["rules"],
+  "additionalProperties": false,
+  "properties": {
+    "rules": {
+      "type": "array",
+      "minItems": 1,
+      "items": {"$ref": "#/$defs/rule"}
+    }
+  },
+  "$defs": {
+    "rule": {
+      "type": "object",
+      "required": ["id", "message", "severity", "language"],
+      "additionalProperties": false,
+      "properties": {
+        "id": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Stable unique rule identifier, e.g. 'py/sql-injection' or 'owasp/A01/broken-access-control'. Used in findings, SARIF, and # ruleid: test markers."
+        },
+        "name": {
+          "type": "string",
+          "description": "Human-readable rule name shown in reports."
+        },
+        "message": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Finding message displayed when the rule fires."
+        },
+        "severity": {
+          "type": "string",
+          "enum": ["critical", "high", "medium", "low", "info"],
+          "description": "Severity level. Maps to SARIF result level: critical/high -> error, medium -> warning, low/info -> note."
+        },
+        "language": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Target language. CodeLens tree-sitter-supported: python, javascript, typescript, tsx, rust, html, css. Other languages are accepted but pattern parseability is skipped."
+        },
+        "cwe": {
+          "type": "string",
+          "description": "CWE identifier, e.g. 'CWE-89'. Optional metadata."
+        },
+        "owasp": {
+          "type": "string",
+          "description": "OWASP Top 10 category, e.g. 'A01:2021'. Optional metadata."
+        },
+        "sources": {
+          "type": "array",
+          "items": {"type": "string"},
+          "description": "Taint sources — where untrusted data enters (e.g. 'flask.request.args'). Used by taint-style rules."
+        },
+        "sinks": {
+          "type": "array",
+          "items": {"type": "string"},
+          "description": "Taint sinks — where untrusted data becomes dangerous (e.g. 'cursor.execute'). Used by taint-style rules."
+        },
+        "sanitizers": {
+          "type": "array",
+          "items": {"type": "string"},
+          "description": "Sanitizers — functions that make data safe (e.g. 'parameterized_query'). Used by taint-style rules."
+        },
+        "pattern": {
+          "type": "string",
+          "description": "Pattern-style rule: a single AST pattern (Semgrep-compatible subset). Mutually exclusive with 'patterns'."
+        },
+        "patterns": {
+          "type": "array",
+          "items": {"type": ["string", "object"]},
+          "description": "Pattern-style rule: list of patterns (all must match). Mutually exclusive with 'pattern'."
+        },
+        "pattern-either": {
+          "type": "array",
+          "items": {"type": "object"},
+          "description": "Pattern-style rule: any of these patterns matches."
+        },
+        "pattern-not": {
+          "type": ["string", "object"],
+          "description": "Pattern-style rule: this pattern must NOT match."
+        },
+        "pattern-inside": {
+          "type": ["string", "object"],
+          "description": "Pattern-style rule: match must be inside this pattern."
+        },
+        "pattern-not-inside": {
+          "type": ["string", "object"],
+          "description": "Pattern-style rule: match must NOT be inside this pattern."
+        },
+        "pattern-regex": {
+          "type": "string",
+          "description": "Pattern-style rule: regex pattern (matched against source text, not AST)."
+        },
+        "metavariable-regex": {
+          "type": "object",
+          "description": "Constrain a metavariable by regex."
+        },
+        "metavariable-comparison": {
+          "type": "object",
+          "description": "Constrain a metavariable by Python expression."
+        },
+        "fix": {
+          "type": "string",
+          "description": "Autofix replacement string (may reference metavariables). Requires 'pattern', 'patterns', or 'pattern-either'."
+        },
+        "fix-regex": {
+          "type": "object",
+          "description": "Regex-based autofix. Requires 'pattern', 'patterns', or 'pattern-either'.",
+          "properties": {
+            "regex": {"type": "string"},
+            "replacement": {"type": "string"},
+            "count": {"type": "integer", "minimum": 0}
+          },
+          "required": ["regex", "replacement"]
+        },
+        "paths": {
+          "type": "object",
+          "description": "Per-rule path filter (gitignore-style globs).",
+          "properties": {
+            "include": {"type": "array", "items": {"type": "string"}},
+            "exclude": {"type": "array", "items": {"type": "string"}}
+          }
+        },
+        "metadata": {
+          "type": "object",
+          "description": "Free-form metadata dict. Serialized to SARIF 'properties' and JSON output 'metadata'."
+        },
+        "options": {
+          "type": "object",
+          "description": "Per-rule engine options (constant_propagation, symbolic_propagation, taint_intrafile, etc.)."
+        },
+        "timeout": {
+          "type": ["integer", "number"],
+          "minimum": 0,
+          "description": "Per-rule timeout in seconds (overrides global --timeout). Requires --allow-rule-timeout-control."
+        },
+        "max-match-per-file": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Per-rule cap on reported matches per file."
+        },
+        "project-depends-on": {
+          "type": "array",
+          "items": {"type": "object"},
+          "description": "SCA rule: only match if project depends on the specified package(s).",
+          "properties": {
+            "namespace": {"type": "string"},
+            "package": {"type": "string"},
+            "version": {"type": "string"}
+          },
+          "required": ["namespace", "package", "version"]
+        }
+      }
+    }
+  }
+}
diff --git a/scripts/commands/registry_validate.py b/scripts/commands/registry_validate.py
@@ -0,0 +1,44 @@
+"""registry-validate command — Validate registry against file system.
+
+Renamed from `validate` in v8.x to make room for `rule-validate` (rule YAML
+validation). The old `validate` command name still works as a deprecated alias
+(see ``scripts/commands/validate.py``) but prints a one-line stderr warning
+and will be removed in a future release.
+"""
+
+import sys
+
+from validate_engine import validate_registry
+from commands import register_command
+
+
+def add_args(parser):
+    """Register registry-validate arguments."""
+    parser.add_argument(
+        "workspace",
+        nargs="?",
+        default=None,
+        help="Path to workspace root (auto-detected if omitted)",
+    )
+
+
+def execute(args, workspace):
+    """Execute the registry-validate command.
+
+    Args:
+        args: Parsed argparse namespace with ``workspace``.
+        workspace: Resolved workspace root path.
+
+    Returns:
+        Dict with the registry validation result (``validate_registry``
+        return shape).
+    """
+    return validate_registry(workspace)
+
+
+register_command(
+    "registry-validate",
+    "Validate registry against file system (renamed from `validate`)",
+    add_args,
+    execute,
+)
diff --git a/scripts/commands/rule_test.py b/scripts/commands/rule_test.py
@@ -0,0 +1,183 @@
+"""rule-test command — snapshot testing for rule YAML files.
+
+Runs a rule against positive/negative code samples (``.test.yaml``) and
+verifies the rule fires (or doesn't fire) where expected via inline
+``# ruleid: <id>`` / ``# ok`` markers. All logic lives in
+``scripts/rule_test_runner.py``; this file is the thin CLI wrapper.
+
+Usage::
+
+    codelens rule-test tests/rule_fixtures/py_sql_injection.yaml
+    codelens rule-test tests/rule_fixtures/         # run all rules in a dir
+    codelens rule-test --json tests/rule_fixtures/
+    codelens rule-test --test-ignore-todo tests/rule_fixtures/
+
+Exit codes:
+    0 — all tests pass (or no tests ran)
+    1 — at least one test failed or errored
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+
+from commands import register_command
+from rule_test_runner import (
+    TestResult,
+    determine_exit_code,
+    run_tests,
+    run_tests_recursive,
+)
+
+
+def add_args(parser):
+    """Register rule-test CLI arguments."""
+    parser.add_argument(
+        "rule_path",
+        help="Path to a rule YAML file or a directory of rule files",
+    )
+    parser.add_argument(
+        "--test-ignore-todo",
+        action="store_true",
+        default=False,
+        help="Skip '# todoruleid:' markers (staged rules not yet enforced)",
+    )
+    parser.add_argument(
+        "--json",
+        dest="json_output",
+        action="store_true",
+        default=False,
+        help="Output machine-readable JSON instead of human-readable text",
+    )
+
+
+def _format_human(results: List[TestResult]) -> str:
+    """Render test results as human-readable text.
+
+    One block per rule: ``<rule-id>: PASS (3/3 samples)`` or fail with
+    a per-failure diff. Ends with a summary line.
+    """
+    lines: List[str] = []
+    total_pass = sum(1 for r in results if r.is_pass)
+    total_fail = sum(1 for r in results if not r.is_pass)
+    total_samples = sum(r.total for r in results)
+    total_passed_samples = sum(r.passed for r in results)
+    total_skipped = sum(r.skipped for r in results)
+
+    for result in results:
+        rule_id = result.rule_id or Path(result.rule_path).stem
+        if result.error:
+            lines.append(f"\n{rule_id}: ERROR — {result.error}")
+            continue
+
+        if result.total == 0:
+            lines.append(f"\n{rule_id}: SKIP (no samples)")
+            continue
+
+        # Per-rule verdict line — the most important line for CI parsers.
+        verdict = "PASS" if result.is_pass else "FAIL"
+        sample_summary = f"{result.passed}/{result.total} samples"
+        if result.skipped:
+            sample_summary += f" ({result.skipped} skipped)"
+        lines.append(f"\n{rule_id}: {verdict} ({sample_summary})")
+
+        # Per-failure detail so authors can fix the rule.
+        for failure in result.failures:
+            lines.append(f"  ✗ {failure.sample_name} line {failure.line}: {failure.message}")
+
+    # Summary line.
+    lines.append("\n" + "=" * 60)
+    if total_fail > 0:
+        lines.append(
+            f"FAIL: {total_fail}/{len(results)} rule(s) failed, "
+            f"{total_passed_samples}/{total_samples} samples passed "
+            f"({total_skipped} skipped)"
+        )
+    else:
+        lines.append(
+            f"PASS: {total_pass}/{len(results)} rule(s), "
+            f"{total_passed_samples}/{total_samples} samples passed "
+            f"({total_skipped} skipped)"
+        )
+
+    return "\n".join(lines)
+
+
+def _format_json(results: List[TestResult]) -> str:
+    """Render test results as JSON for CI / programmatic consumers."""
+    payload: Dict[str, Any] = {
+        "status": "ok" if all(r.is_pass for r in results) else "fail",
+        "exit_code": determine_exit_code(results),
+        "total_rules": len(results),
+        "total_pass": sum(1 for r in results if r.is_pass),
+        "total_fail": sum(1 for r in results if not r.is_pass),
+        "total_samples": sum(r.total for r in results),
+        "total_passed_samples": sum(r.passed for r in results),
+        "total_skipped": sum(r.skipped for r in results),
+        "results": [r.to_dict() for r in results],
+    }
+    return json.dumps(payload, indent=2)
+
+
+def execute(args, workspace):
+    """Execute the rule-test command.
+
+    Returns a dict (so the result flows through the standard CodeLens
+    output formatter) AND sets the process exit code via ``sys.exit`` so
+    CI pipelines get the correct 0/1 signal.
+
+    Args:
+        args: Parsed argparse namespace with ``rule_path``, ``test_ignore_todo``,
+            and ``json_output``.
+        workspace: Workspace root (unused — rule-test is path-based).
+
+    Returns:
+        Dict with ``status``, ``exit_code``, ``results``, and the rendered
+        ``output`` string (human or JSON).
+    """
+    raw_path = os.path.expanduser(args.rule_path)
+    path = Path(raw_path).resolve()
+
+    if not path.exists():
+        # Surface a clear error rather than crashing — the path may be a
+        # typo, and the user benefits from an actionable message.
+        print(f"Error: path does not exist: {path}", file=sys.stderr)
+        sys.exit(1)
+
+    # A single file → run tests for that one rule. A directory → walk and
+    # run tests for every rule with a ``.test.yaml`` companion.
+    if path.is_file():
+        results = [run_tests(path, ignore_todo=args.test_ignore_todo)]
+    else:
+        results = run_tests_recursive(path, ignore_todo=args.test_ignore_todo)
+
+    exit_code = determine_exit_code(results)
+
+    if args.json_output:
+        output = _format_json(results)
+    else:
+        output = _format_human(results)
+
+    print(output)
+    sys.exit(exit_code)
+
+    # Unreachable, but keeps the return-type contract honest for callers
+    # that import ``execute`` directly (e.g., tests).
+    return {
+        "status": "ok" if exit_code == 0 else "fail",
+        "exit_code": exit_code,
+        "results": [r.to_dict() for r in results],
+        "output": output,
+    }
+
+
+register_command(
+    "rule-test",
+    "Run snapshot tests for rule YAML files (inline # ruleid: / # ok markers)",
+    add_args,
+    execute,
+)