Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions references/rule-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://github.com/Wolfvin/CodeLens/blob/main/references/rule-schema.json",
"title": "CodeLens Rule YAML Schema",
"description": "JSON Schema for CodeLens rule YAML files. Used by `codelens rule-validate` to catch typos, unknown keys, missing required fields, and type mismatches before a rule is loaded by the engine. The schema describes the superset of taint-style (sources/sinks/sanitizers) and pattern-style (pattern/patterns) rules; cross-field constraints (pattern vs patterns mutually exclusive, fix requires pattern) are enforced separately by the validator because JSON Schema cannot express them cleanly.",
"type": "object",
"required": ["rules"],
"additionalProperties": false,
"properties": {
"rules": {
"type": "array",
"minItems": 1,
"items": {"$ref": "#/$defs/rule"}
}
},
"$defs": {
"rule": {
"type": "object",
"required": ["id", "message", "severity", "language"],
"additionalProperties": false,
"properties": {
"id": {
"type": "string",
"minLength": 1,
"description": "Stable unique rule identifier, e.g. 'py/sql-injection' or 'owasp/A01/broken-access-control'. Used in findings, SARIF, and # ruleid: test markers."
},
"name": {
"type": "string",
"description": "Human-readable rule name shown in reports."
},
"message": {
"type": "string",
"minLength": 1,
"description": "Finding message displayed when the rule fires."
},
"severity": {
"type": "string",
"enum": ["critical", "high", "medium", "low", "info"],
"description": "Severity level. Maps to SARIF result level: critical/high -> error, medium -> warning, low/info -> note."
},
"language": {
"type": "string",
"minLength": 1,
"description": "Target language. CodeLens tree-sitter-supported: python, javascript, typescript, tsx, rust, html, css. Other languages are accepted but pattern parseability is skipped."
},
"cwe": {
"type": "string",
"description": "CWE identifier, e.g. 'CWE-89'. Optional metadata."
},
"owasp": {
"type": "string",
"description": "OWASP Top 10 category, e.g. 'A01:2021'. Optional metadata."
},
"sources": {
"type": "array",
"items": {"type": "string"},
"description": "Taint sources — where untrusted data enters (e.g. 'flask.request.args'). Used by taint-style rules."
},
"sinks": {
"type": "array",
"items": {"type": "string"},
"description": "Taint sinks — where untrusted data becomes dangerous (e.g. 'cursor.execute'). Used by taint-style rules."
},
"sanitizers": {
"type": "array",
"items": {"type": "string"},
"description": "Sanitizers — functions that make data safe (e.g. 'parameterized_query'). Used by taint-style rules."
},
"pattern": {
"type": "string",
"description": "Pattern-style rule: a single AST pattern (Semgrep-compatible subset). Mutually exclusive with 'patterns'."
},
"patterns": {
"type": "array",
"items": {"type": ["string", "object"]},
"description": "Pattern-style rule: list of patterns (all must match). Mutually exclusive with 'pattern'."
},
"pattern-either": {
"type": "array",
"items": {"type": "object"},
"description": "Pattern-style rule: any of these patterns matches."
},
"pattern-not": {
"type": ["string", "object"],
"description": "Pattern-style rule: this pattern must NOT match."
},
"pattern-inside": {
"type": ["string", "object"],
"description": "Pattern-style rule: match must be inside this pattern."
},
"pattern-not-inside": {
"type": ["string", "object"],
"description": "Pattern-style rule: match must NOT be inside this pattern."
},
"pattern-regex": {
"type": "string",
"description": "Pattern-style rule: regex pattern (matched against source text, not AST)."
},
"metavariable-regex": {
"type": "object",
"description": "Constrain a metavariable by regex."
},
"metavariable-comparison": {
"type": "object",
"description": "Constrain a metavariable by Python expression."
},
"fix": {
"type": "string",
"description": "Autofix replacement string (may reference metavariables). Requires 'pattern', 'patterns', or 'pattern-either'."
},
"fix-regex": {
"type": "object",
"description": "Regex-based autofix. Requires 'pattern', 'patterns', or 'pattern-either'.",
"properties": {
"regex": {"type": "string"},
"replacement": {"type": "string"},
"count": {"type": "integer", "minimum": 0}
},
"required": ["regex", "replacement"]
},
"paths": {
"type": "object",
"description": "Per-rule path filter (gitignore-style globs).",
"properties": {
"include": {"type": "array", "items": {"type": "string"}},
"exclude": {"type": "array", "items": {"type": "string"}}
}
},
"metadata": {
"type": "object",
"description": "Free-form metadata dict. Serialized to SARIF 'properties' and JSON output 'metadata'."
},
"options": {
"type": "object",
"description": "Per-rule engine options (constant_propagation, symbolic_propagation, taint_intrafile, etc.)."
},
"timeout": {
"type": ["integer", "number"],
"minimum": 0,
"description": "Per-rule timeout in seconds (overrides global --timeout). Requires --allow-rule-timeout-control."
},
"max-match-per-file": {
"type": "integer",
"minimum": 0,
"description": "Per-rule cap on reported matches per file."
},
"project-depends-on": {
"type": "array",
"items": {"type": "object"},
"description": "SCA rule: only match if project depends on the specified package(s).",
"properties": {
"namespace": {"type": "string"},
"package": {"type": "string"},
"version": {"type": "string"}
},
"required": ["namespace", "package", "version"]
}
}
}
}
}
44 changes: 44 additions & 0 deletions scripts/commands/registry_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""registry-validate command — Validate registry against file system.

Renamed from `validate` in v8.x to make room for `rule-validate` (rule YAML
validation). The old `validate` command name still works as a deprecated alias
(see ``scripts/commands/validate.py``) but prints a one-line stderr warning
and will be removed in a future release.
"""

import sys

from validate_engine import validate_registry
from commands import register_command


def add_args(parser):
"""Register registry-validate arguments."""
parser.add_argument(
"workspace",
nargs="?",
default=None,
help="Path to workspace root (auto-detected if omitted)",
)


def execute(args, workspace):
"""Execute the registry-validate command.

Args:
args: Parsed argparse namespace with ``workspace``.
workspace: Resolved workspace root path.

Returns:
Dict with the registry validation result (``validate_registry``
return shape).
"""
return validate_registry(workspace)


register_command(
"registry-validate",
"Validate registry against file system (renamed from `validate`)",
add_args,
execute,
)
183 changes: 183 additions & 0 deletions scripts/commands/rule_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""rule-test command — snapshot testing for rule YAML files.

Runs a rule against positive/negative code samples (``.test.yaml``) and
verifies the rule fires (or doesn't fire) where expected via inline
``# ruleid: <id>`` / ``# ok`` markers. All logic lives in
``scripts/rule_test_runner.py``; this file is the thin CLI wrapper.

Usage::

codelens rule-test tests/rule_fixtures/py_sql_injection.yaml
codelens rule-test tests/rule_fixtures/ # run all rules in a dir
codelens rule-test --json tests/rule_fixtures/
codelens rule-test --test-ignore-todo tests/rule_fixtures/

Exit codes:
0 — all tests pass (or no tests ran)
1 — at least one test failed or errored
"""

from __future__ import annotations

import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, List

from commands import register_command
from rule_test_runner import (
TestResult,
determine_exit_code,
run_tests,
run_tests_recursive,
)


def add_args(parser):
"""Register rule-test CLI arguments."""
parser.add_argument(
"rule_path",
help="Path to a rule YAML file or a directory of rule files",
)
parser.add_argument(
"--test-ignore-todo",
action="store_true",
default=False,
help="Skip '# todoruleid:' markers (staged rules not yet enforced)",
)
parser.add_argument(
"--json",
dest="json_output",
action="store_true",
default=False,
help="Output machine-readable JSON instead of human-readable text",
)


def _format_human(results: List[TestResult]) -> str:
"""Render test results as human-readable text.

One block per rule: ``<rule-id>: PASS (3/3 samples)`` or fail with
a per-failure diff. Ends with a summary line.
"""
lines: List[str] = []
total_pass = sum(1 for r in results if r.is_pass)
total_fail = sum(1 for r in results if not r.is_pass)
total_samples = sum(r.total for r in results)
total_passed_samples = sum(r.passed for r in results)
total_skipped = sum(r.skipped for r in results)

for result in results:
rule_id = result.rule_id or Path(result.rule_path).stem
if result.error:
lines.append(f"\n{rule_id}: ERROR — {result.error}")
continue

if result.total == 0:
lines.append(f"\n{rule_id}: SKIP (no samples)")
continue

# Per-rule verdict line — the most important line for CI parsers.
verdict = "PASS" if result.is_pass else "FAIL"
sample_summary = f"{result.passed}/{result.total} samples"
if result.skipped:
sample_summary += f" ({result.skipped} skipped)"
lines.append(f"\n{rule_id}: {verdict} ({sample_summary})")

# Per-failure detail so authors can fix the rule.
for failure in result.failures:
lines.append(f" ✗ {failure.sample_name} line {failure.line}: {failure.message}")

# Summary line.
lines.append("\n" + "=" * 60)
if total_fail > 0:
lines.append(
f"FAIL: {total_fail}/{len(results)} rule(s) failed, "
f"{total_passed_samples}/{total_samples} samples passed "
f"({total_skipped} skipped)"
)
else:
lines.append(
f"PASS: {total_pass}/{len(results)} rule(s), "
f"{total_passed_samples}/{total_samples} samples passed "
f"({total_skipped} skipped)"
)

return "\n".join(lines)


def _format_json(results: List[TestResult]) -> str:
"""Render test results as JSON for CI / programmatic consumers."""
payload: Dict[str, Any] = {
"status": "ok" if all(r.is_pass for r in results) else "fail",
"exit_code": determine_exit_code(results),
"total_rules": len(results),
"total_pass": sum(1 for r in results if r.is_pass),
"total_fail": sum(1 for r in results if not r.is_pass),
"total_samples": sum(r.total for r in results),
"total_passed_samples": sum(r.passed for r in results),
"total_skipped": sum(r.skipped for r in results),
"results": [r.to_dict() for r in results],
}
return json.dumps(payload, indent=2)


def execute(args, workspace):
"""Execute the rule-test command.

Returns a dict (so the result flows through the standard CodeLens
output formatter) AND sets the process exit code via ``sys.exit`` so
CI pipelines get the correct 0/1 signal.

Args:
args: Parsed argparse namespace with ``rule_path``, ``test_ignore_todo``,
and ``json_output``.
workspace: Workspace root (unused — rule-test is path-based).

Returns:
Dict with ``status``, ``exit_code``, ``results``, and the rendered
``output`` string (human or JSON).
"""
raw_path = os.path.expanduser(args.rule_path)
path = Path(raw_path).resolve()

if not path.exists():
# Surface a clear error rather than crashing — the path may be a
# typo, and the user benefits from an actionable message.
print(f"Error: path does not exist: {path}", file=sys.stderr)
sys.exit(1)

# A single file → run tests for that one rule. A directory → walk and
# run tests for every rule with a ``.test.yaml`` companion.
if path.is_file():
results = [run_tests(path, ignore_todo=args.test_ignore_todo)]
else:
results = run_tests_recursive(path, ignore_todo=args.test_ignore_todo)

exit_code = determine_exit_code(results)

if args.json_output:
output = _format_json(results)
else:
output = _format_human(results)

print(output)
sys.exit(exit_code)

# Unreachable, but keeps the return-type contract honest for callers
# that import ``execute`` directly (e.g., tests).
return {
"status": "ok" if exit_code == 0 else "fail",
"exit_code": exit_code,
"results": [r.to_dict() for r in results],
"output": output,
}


register_command(
"rule-test",
"Run snapshot tests for rule YAML files (inline # ruleid: / # ok markers)",
add_args,
execute,
)
Loading
Loading