codellm-devkit · rahlk · Jul 2, 2026 · Jul 2, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -29,9 +29,14 @@ it first; everything else is a stage it calls, in order:
    JSDoc, with precise source spans.
 3. **call graph** (`src/semantic_analysis`) — `selectProvider()` picks tsc / jelly /
    union; each provider returns edges + external (phantom) symbols.
-4. **cache** (`src/utils/cache.ts`) — content-hash cache under `.codeanalyzer/`, so
-   re-analysis only touches what changed.
-5. **output** (`src/build`, `src/build/neo4j`) — `analysis.json`, a self-contained
+4. **program graphs** (`src/dataflow`) — level 3 only (`-a 3`): CFG → post-dominance/CDG →
+   access-path def-use → PDG → SCC-condensed bottom-up summaries → SDG, emitted as the
+   `program_graphs` section keyed by `(signature, node_id)`. Decisions: the "Level 3"
+   half of `.claude/SCHEMA_DECISIONS.md`; contract + staged follow-ups: issue #2.
+5. **cache** (`src/utils/cache.ts`) — content-hash cache under `.codeanalyzer/`, so
+   re-analysis only touches what changed (level 3 also records summaries +
+   dependency edges in `graphs_summaries.json`).
+6. **output** (`src/build`, `src/build/neo4j`) — `analysis.json`, a self-contained
    `graph.cypher` snapshot, or an incremental Bolt push to a live database.
 
 The shape of everything is the **schema** in `src/schema` (`TSApplication` is the top
@@ -47,10 +52,11 @@ a contract.
 | `src/options` | Parsed CLI options / `AnalysisOptions` |
 | `src/syntactic_analysis` | Symbol table (ts-morph traversal) |
 | `src/semantic_analysis` | Call-graph providers (tsc, jelly, union), phantoms |
-| `src/schema` | `TSApplication` types + signatures (the output contract) |
+| `src/dataflow` | Level-3 program graphs: CFG, dominance/CDG, def-use, summaries, SDG, slicing |
+| `src/schema` | `TSApplication` types + signatures + `program_graphs` (the output contract) |
 | `src/build` | Dep materialization + output; `build/neo4j` = graph projection |
 | `src/utils` | fs, caching, logging, serialization, version |
-| `test` | Bun tests + `fixtures/sample-app` |
+| `test` | Bun tests + `fixtures/sample-app` (levels 1–2) + `fixtures/dataflow-app` (level-3 gates) |
 
 ## Commands
 

diff --git a/README.md b/README.md
@@ -158,8 +158,17 @@ Options:
                                  (default: "neo4j", env: NEO4J_PASSWORD)
   --neo4j-database <db>          Neo4j database name (env: NEO4J_DATABASE)
   -a, --analysis-level <n>       analysis depth: 1 = symbol table + tsc resolver
-                                 call graph + RTA (default); 2 = call graph
-                                 (default: "1")
+                                 call graph + RTA (default); 2 = call graph; 3 =
+                                 + program graphs (CFG/PDG/SDG) (default: "1")
+  --graphs <list>                level-3 graph sections to emit,
+                                 comma-separated: cfg | dfg | pdg | sdg
+                                 (default: all; requires -a 3)
+  --graph-field-depth <k>        access-path depth bound (k-limit) for level-3
+                                 dataflow (default: "3")
+  -j, --jobs <n>                 worker parallelism for level-3 graphs (default:
+                                 sequential; opt in with N ≥ 2 on large projects
+                                 — each worker loads its own copy of the
+                                 program)
   -t, --target-files <paths...>  restrict analysis to specific files
                                  (incremental)
   --skip-tests                   skip test trees (default)
@@ -213,6 +222,12 @@ Options:
    cants --input ./my-ts-project --eager --cache-dir /path/to/custom-cache
    ```
 
+6. **Program graphs (level 3): CFG/PDG/SDG in `analysis.json`:**
+   ```sh
+   cants --input ./my-ts-project -a 3                    # full program_graphs section
+   cants --input ./my-ts-project -a 3 --graphs cfg,pdg   # scope the emitted graphs
+   ```
+
 ## Output targets
 
 `cants` builds one analysis in memory and can emit it three ways (`--emit`):
@@ -234,6 +249,56 @@ A `TSApplication` document — the canonical CLDK contract the Python SDK parses
 Caller- and callee-side identifiers come from a single signature canonicalizer, so call-graph
 `source`/`target` values byte-match the corresponding `symbol_table` / `external_symbols` keys.
 
+### Program graphs (`-a 3`)
+
+`--analysis-level 3` adds a `program_graphs` section to `analysis.json` — native, whole-program
+dependence graphs built in-process from the same ts-morph AST (no external engine), per the CLDK
+level-3 dataflow contract:
+
+```jsonc
+{
+  "program_graphs": {
+    "schema_version": "1.0.0",
+    "k_limit": 3,                    // access-path depth bound (--graph-field-depth)
+    "functions": {
+      "<signature>": {
+        "cfg": { "nodes": [...], "edges": [...] },   // exceptional control-flow graph
+        "pdg": { "edges": [...] }                    // CDG (control) + DDG (data) dependence
+      }
+    },
+    "sdg_edges": [ /* cross-function CALL / PARAM_IN / PARAM_OUT / SUMMARY edges */ ]
+  }
+}
+```
+
+Every graph node is keyed by `(signature, node_id)` — the same signature canonicalizer as the
+symbol table and call graph — so graphs, call edges, and callables all join. `--graphs
+cfg,dfg,pdg,sdg` scopes the emitted sections (default: all).
+
+**Substrate (locked in [issue #2](https://github.com/codellm-devkit/codeanalyzer-typescript/issues/2)):**
+the CFG and reaching-definitions are hand-built from the ts-morph AST; the call-graph oracle is
+the existing provenance-merged tsc ∪ Jelly graph; aliasing is a flow-insensitive copy-alias MVP
+(Jelly points-to-backed propagation is a staged upgrade). Function summaries are composed
+bottom-up over the SCC condensation of the call graph, with k-limited access paths; module
+globals ride the SDG as extra parameters. The analysis is deliberately sound-leaning and
+over-approximate; known unsoundness (dynamic `eval`, reflection/monkey-patching, npm-internal
+effects) is recorded in `.claude/SCHEMA_DECISIONS.md`. Backward slicing and taint run as queries
+over the SDG (slicing ships now inside the analyzer; the configurable taint pack is staged).
+
+**Parallelism (`-j/--jobs`).** The pipeline implements the level-3 parallel execution model:
+stage-1–4 extraction fans out per callable over a Bun worker pool (partitioned by file) and is
+posted *before* the call-graph solve so the two overlap; summary composition runs as a
+Kahn-style ready-queue wavefront over the SCC condensation DAG (the SCC is the atomic unit).
+`--jobs N` output is **byte-identical** to `--jobs 1` (node ids are span-ordered, all edge lists
+are collect-then-sorted, and the SCC fixpoint is a pure function of its inputs) — enforced by a
+differential test. It is off by default and worth opting into only on large codebases: ts-morph
+ASTs cannot cross the worker boundary, so each extraction worker loads its own copy of the
+program, which dominates the parallelizable graph math on small/mid repos (self-analysis runs
+2.5× slower at `-j 14`). Worker failure at any stage degrades to the sequential path with a
+warning — never to wrong or missing output.
+
+Levels 1/2 are unaffected: nothing in level 3 runs unless `-a 3` is requested.
+
 ### Neo4j graph
 
 `--emit neo4j` projects the same analysis into a labeled property graph (declarations keyed by

diff --git a/package.json b/package.json
@@ -9,7 +9,7 @@
   },
   "scripts": {
     "start": "bun run src/index.ts",
-    "build": "bun build ./src/main.ts --compile --external @babel/preset-typescript --outfile dist/cants",
+    "build": "bun build ./src/main.ts ./src/dataflow/worker.ts --compile --external @babel/preset-typescript --outfile dist/cants",
     "gen:schema": "bun run src/index.ts --emit schema > schema.neo4j.json",
     "gen:readme": "bun run scripts/update-readme.ts",
     "test:container": "RUN_CONTAINER_TESTS=1 bun test test/neo4j-bolt.test.ts",

diff --git a/src/cli.ts b/src/cli.ts
@@ -1,6 +1,7 @@
 import * as path from "node:path";
 import { Command, Option } from "commander";
 import type { AnalysisOptions, CallGraphProviderName, EmitTarget } from "./options";
+import { ALL_GRAPHS, type GraphSelector } from "./schema";
 
 /**
  * Build the commander program. Shared by parseArgs and by the README generator
@@ -35,7 +36,20 @@ export function buildProgram(): Command {
         .default("neo4j"),
     )
     .addOption(new Option("--neo4j-database <db>", "Neo4j database name").env("NEO4J_DATABASE"))
-    .option("-a, --analysis-level <n>", "analysis depth: 1 = symbol table + tsc resolver call graph + RTA (default); 2 = call graph", "1")
+    .option(
+      "-a, --analysis-level <n>",
+      "analysis depth: 1 = symbol table + tsc resolver call graph + RTA (default); 2 = call graph; 3 = + program graphs (CFG/PDG/SDG)",
+      "1",
+    )
+    .option(
+      "--graphs <list>",
+      "level-3 graph sections to emit, comma-separated: cfg | dfg | pdg | sdg (default: all; requires -a 3)",
+    )
+    .option("--graph-field-depth <k>", "access-path depth bound (k-limit) for level-3 dataflow", "3")
+    .option(
+      "-j, --jobs <n>",
+      "worker parallelism for level-3 graphs (default: sequential; opt in with N ≥ 2 on large projects — each worker loads its own copy of the program)",
+    )
     .option("-t, --target-files <paths...>", "restrict analysis to specific files (incremental)")
     .option("--skip-tests", "skip test trees (default)")
     .option("--include-tests", "include test trees")
@@ -61,7 +75,45 @@ export function parseArgs(argv: string[]): AnalysisOptions {
   program.parse(argv, { from: "user" });
   const o = program.opts();
 
-  const level = String(o.analysisLevel) === "2" ? 2 : 1;
+  const levelStr = String(o.analysisLevel);
+  if (!["1", "2", "3"].includes(levelStr)) {
+    program.error(`error: invalid --analysis-level '${levelStr}' (expected 1, 2, or 3)`);
+  }
+  const level = Number(levelStr) as 1 | 2 | 3;
+
+  // --graphs: strict validation (never a silent fallback), and only meaningful at -a 3.
+  let graphs: GraphSelector[] = [...ALL_GRAPHS];
+  if (o.graphs !== undefined) {
+    if (level !== 3) program.error("error: --graphs requires --analysis-level 3");
+    const requested = String(o.graphs)
+      .split(",")
+      .map((g) => g.trim())
+      .filter((g) => g.length > 0);
+    if (!requested.length) program.error("error: --graphs requires at least one of: cfg, dfg, pdg, sdg");
+    for (const g of requested) {
+      if (!(ALL_GRAPHS as string[]).includes(g)) {
+        program.error(`error: unknown --graphs value '${g}' (expected: cfg, dfg, pdg, sdg)`);
+      }
+    }
+    graphs = [...new Set(requested)] as GraphSelector[];
+  }
+
+  const kStr = String(o.graphFieldDepth);
+  const k = Number(kStr);
+  if (!Number.isInteger(k) || k < 1) {
+    program.error(`error: invalid --graph-field-depth '${kStr}' (expected a positive integer)`);
+  }
+
+  // -j/--jobs: explicit value must be a positive integer; omitted ⇒ 0 = auto, resolved against
+  // the project size at extraction time (see startExtraction).
+  let jobs = 0;
+  if (o.jobs !== undefined) {
+    const j = Number(String(o.jobs));
+    if (!Number.isInteger(j) || j < 1) {
+      program.error(`error: invalid --jobs '${String(o.jobs)}' (expected a positive integer)`);
+    }
+    jobs = j;
+  }
   const emit: EmitTarget = o.emit === "neo4j" ? "neo4j" : o.emit === "schema" ? "schema" : "json";
   // --emit schema is a static artifact and needs no project; every other target requires -i.
   if (emit !== "schema" && !o.input) program.error("required option '-i, --input <path>' not specified");
@@ -94,6 +146,9 @@ export function parseArgs(argv: string[]): AnalysisOptions {
     neo4jPassword: String(o.neo4jPassword),
     neo4jDatabase: o.neo4jDatabase ? String(o.neo4jDatabase) : null,
     analysisLevel: level,
+    graphs,
+    graphFieldDepth: k,
+    jobs,
     targetFiles: targets,
     skipTests: o.includeTests ? false : true,
     eager: Boolean(o.eager),

diff --git a/src/core.ts b/src/core.ts
@@ -1,4 +1,5 @@
 import * as path from "node:path";
+import { buildProgramGraphs, startExtraction } from "./dataflow";
 import { selectProvider } from "./semantic_analysis";
 import { loadCache, saveCache } from "./utils";
 import { materialize } from "./build";
@@ -11,7 +12,7 @@ import { Logger } from "./utils";
  * The orchestrator. Order mirrors the reference analyzers: materialize deps → build the symbol
  * table → build the resolver call graph → cache the base → return the Application.
  */
-export function analyze(opts: AnalysisOptions): TSApplication {
+export async function analyze(opts: AnalysisOptions): Promise<TSApplication> {
   const log = new Logger(opts.verbosity);
   log.info(`analyzing ${opts.input} (level ${opts.analysisLevel})`);
   const cacheDir = opts.cacheDir ?? path.join(opts.input, ".codeanalyzer");
@@ -22,6 +23,11 @@ export function analyze(opts: AnalysisOptions): TSApplication {
   const cached = opts.eager ? null : loadCache(cacheDir);
   const { project, symbol_table } = buildSymbolTable(opts, mat, cached?.symbol_table ?? null, log);
 
+  // Level 3: post stage-1–4 graph extraction to the worker pool BEFORE the call-graph solve —
+  // extraction doesn't need callee resolution, so the two run concurrently (the contract's
+  // "points-to solve runs concurrently with stages 1–4") and join in buildProgramGraphs.
+  const extraction = opts.analysisLevel === 3 ? startExtraction(project, symbol_table, mat.tsConfigFilePath, opts, log) : null;
+
   // Call graph via the selected provider (union of tsc+jelly by default; --tsc-only / jelly opt-in).
   const provider = selectProvider(opts.callGraphProvider);
   log.info(`call graph provider: ${provider.name}`);
@@ -34,6 +40,13 @@ export function analyze(opts: AnalysisOptions): TSApplication {
     external_symbols: cg.external_symbols,
     synthesized_callables: cg.synthesized_callables,
   };
+
+  // Level 3 join: stages 5–7 (summary wavefront + SDG) consume the extraction AND the
+  // provider-backfilled callee signatures. Strictly flag-gated so -a 1/-a 2 cost nothing.
+  if (extraction) {
+    app.program_graphs = await buildProgramGraphs(extraction, symbol_table, opts, log);
+  }
+
   saveCache(cacheDir, { symbol_table, call_graph });
   return app;
 }