From 1e6d5cacc1624af116979ea9aa099e7143677193 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 17 Jun 2026 21:56:00 -0400 Subject: [PATCH 01/21] bugc: flatten TCO back-edge JUMP context (#214) The TCO back-edge JUMP previously emitted a gather wrapper around its invoke and return contexts. Multiple discriminator keys can coexist on a single context object without gather wrapping, so the JUMP now carries a flat combined context with both `invoke` and `return` keys directly. Updates the countCallSites helper in optimizer-contexts.test to check invoke and return independently rather than as an either/or, so flat multi-discriminator contexts get counted in both buckets. The TCO-specific assertion now finds the back-edge JUMP by the presence of both discriminators rather than by a gather wrapper. --- .../generation/control-flow/terminator.ts | 15 ++---- .../src/evmgen/optimizer-contexts.test.ts | 49 +++++++++++-------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts index 0bb5b5939..21284ac1b 100644 --- a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts +++ b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts @@ -411,7 +411,8 @@ function generateReturnEpilogue( /** * Build JUMP instruction options for a TCO-replaced tail call. * - * The JUMP carries BOTH contexts in a gather: + * The JUMP carries BOTH discriminators on a single flat + * context object: * - return: the previous iteration's return * - invoke: the new iteration's call * @@ -441,14 +442,12 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { } : undefined; - const returnCtx: Format.Program.Context.Return = { + const combined: Format.Program.Context.Return & + Format.Program.Context.Invoke = { return: { identifier: tailCall.function, ...(declaration ? { declaration } : {}), }, - }; - - const invoke: Format.Program.Context.Invoke = { invoke: { jump: true as const, identifier: tailCall.function, @@ -463,11 +462,7 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { }, }; - const gather: Format.Program.Context.Gather = { - gather: [returnCtx, invoke], - }; - - return { debug: { context: gather as Format.Program.Context } }; + return { debug: { context: combined as Format.Program.Context } }; } /** PUSH an integer as the smallest PUSHn. */ diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index 4fe92325f..f22e46def 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -13,9 +13,9 @@ * verifies the expected invoke/return contexts are present * with the right identifiers. TCO is a special case: the * back-edge JUMP that replaces the recursive call carries a - * gather context with BOTH the previous iteration's return - * and the new iteration's invoke, so frame depth stays - * constant across the optimization. + * single flat context with BOTH the previous iteration's + * return and the new iteration's invoke discriminators, so + * frame depth stays constant across the optimization. */ import { describe, it, expect } from "vitest"; @@ -64,7 +64,7 @@ interface CallSiteCounts { /** * JUMP carrying a return context (TCO back-edge, where * the previous iteration's return is paired with the new - * iteration's invoke in a gather). + * iteration's invoke on a single flat context). */ returnJump: Record; } @@ -82,9 +82,13 @@ function unwrapLeaves(ctx: Format.Program.Context): Format.Program.Context[] { /** * Scan a program and count invoke/return contexts by - * instruction type and function identifier. Handles gather - * contexts so TCO's (return + invoke) JUMPs get counted in - * both the invokeJump and returnJump buckets. + * instruction type and function identifier. Each leaf is + * checked for invoke and return independently (not as an + * either/or) so a flat multi-discriminator context — like + * the TCO back-edge JUMP carrying both `invoke` and + * `return` — gets counted in both buckets. Enclosing + * gather wrappers are still unwrapped for defensive + * coverage. */ function countCallSites(program: Format.Program): CallSiteCounts { const counts: CallSiteCounts = { @@ -108,7 +112,8 @@ function countCallSites(program: Format.Program): CallSiteCounts { } else if (mn === "JUMPDEST") { counts.invokeJumpdest[id] = (counts.invokeJumpdest[id] ?? 0) + 1; } - } else if (Context.isReturn(leaf)) { + } + if (Context.isReturn(leaf)) { const id = leaf.return.identifier ?? "?"; if (mn === "JUMPDEST") { counts.returnJumpdest[id] = (counts.returnJumpdest[id] ?? 0) + 1; @@ -409,7 +414,7 @@ code { r = check(3, 4); }`; // `count` is tail-recursive: the recursive call is in // return position. At levels 2 and 3, TCO rewrites the // recursive call into a back-edge JUMP. That JUMP - // carries a gather context with BOTH: + // carries a single flat context with BOTH discriminators: // - return: previous iteration's return // - invoke: new iteration's call // @@ -472,26 +477,28 @@ code { r = count(0, 5); }`; // The TCO back-edge JUMP additionally carries a // return context for `count` (the previous - // iteration's return), paired with its invoke in - // a gather. This keeps the debugger's logical - // frame depth constant across the back-edge. + // iteration's return), paired with its invoke on + // a single flat context. This keeps the debugger's + // logical frame depth constant across the + // back-edge. expect(counts.returnJump).toEqual({ count: 1 }); - // The invoke target inside the gather must be - // patched to the actual count entry, not left as - // the placeholder offset 0. This guards against - // patchInvokeTarget failing to walk into gather. + // The TCO back-edge JUMP is the one carrying both + // invoke and return discriminators on the same + // context object. Its invoke target must be patched + // to the actual count entry, not left as the + // placeholder offset 0 — this guards against + // patchInvokeTarget missing flat combined contexts. const tcoJump = program.instructions.find( (instr) => instr.operation?.mnemonic === "JUMP" && instr.context !== undefined && - Context.isGather(instr.context), + Context.isInvoke(instr.context) && + Context.isReturn(instr.context), ); expect(tcoJump).toBeDefined(); - const gather = tcoJump!.context as Format.Program.Context.Gather; - const invokeLeaf = gather.gather.find(Context.isInvoke); - expect(invokeLeaf).toBeDefined(); - const invocation = invokeLeaf!.invoke; + const ctx = tcoJump!.context as Format.Program.Context.Invoke; + const invocation = ctx.invoke; expect(Invocation.isInternalCall(invocation)).toBe(true); const internalCall = invocation as Format.Program.Context.Invoke.Invocation.InternalCall; From 3518f4903f541c9d270494488d1425fd567e3475 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 17 Jun 2026 22:31:46 -0400 Subject: [PATCH 02/21] format: make invoke.target optional for internal calls (#213) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * format: make invoke.target optional for internal calls Internal calls via JUMP normally carry a code pointer to the callee's entry point. When the compiler inlines a function, the JUMP is elided — there is no physical call instruction and no code target to point at. The callee identity (identifier, declaration, type) remains meaningful, but the target pointer does not. Same pattern as #211 (making return.data optional). Unblocks inlining: bugc can emit invoke contexts on inlined first instructions without fabricating a target pointer. - Schema: drop target from InternalCall.required, expand description, add worked example for inlined case - TS types: mark target optional; guard relaxed - Spec page: document optionality and point at transform + gather for inlining annotation - bugc: guard target access in patchInvokeInContext; tests assert target defined before dereferencing * format: prefer flat form for invoke + transform composition Pair with #212's flat-form guidance: when an inlined body's first instruction carries both an invoke and a transform, those belong as sibling keys on a single context — gather isn't needed because `invoke` and `transform` don't collide. --- .../bugc/src/evmgen/call-contexts.test.ts | 6 ++-- .../bugc/src/evmgen/generation/function.ts | 2 ++ .../src/evmgen/optimizer-contexts.test.ts | 3 +- packages/format/src/types/program/context.ts | 5 ++-- .../spec/program/context/function/invoke.mdx | 9 ++++++ .../context/function/invoke.schema.yaml | 29 ++++++++++++++++++- 6 files changed, 47 insertions(+), 7 deletions(-) diff --git a/packages/bugc/src/evmgen/call-contexts.test.ts b/packages/bugc/src/evmgen/call-contexts.test.ts index c05f58708..c670d1660 100644 --- a/packages/bugc/src/evmgen/call-contexts.test.ts +++ b/packages/bugc/src/evmgen/call-contexts.test.ts @@ -97,7 +97,8 @@ code { expect(typeof invoke.declaration!.range!.length).toBe("number"); // Target should be a code pointer (not stack) - expect(Pointer.Region.isCode(call.target.pointer)).toBe(true); + expect(call.target).toBeDefined(); + expect(Pointer.Region.isCode(call.target!.pointer)).toBe(true); // Caller JUMP should NOT have argument pointers // (args live on the callee JUMPDEST invoke context) @@ -156,7 +157,8 @@ code { expect(call.identifier).toBe("add"); // Target should be a code pointer - expect(Pointer.Region.isCode(call.target.pointer)).toBe(true); + expect(call.target).toBeDefined(); + expect(Pointer.Region.isCode(call.target!.pointer)).toBe(true); // Should have argument pointers matching // function parameters diff --git a/packages/bugc/src/evmgen/generation/function.ts b/packages/bugc/src/evmgen/generation/function.ts index 5b1944b1e..8e7230155 100644 --- a/packages/bugc/src/evmgen/generation/function.ts +++ b/packages/bugc/src/evmgen/generation/function.ts @@ -537,6 +537,8 @@ function patchInvokeInContext( const offset = functionRegistry[invoke.identifier]; if (offset === undefined) return; + if (!invoke.target) return; + const ptr = invoke.target.pointer; if (Format.Pointer.Region.isCode(ptr)) { ptr.offset = `0x${offset.toString(16)}`; diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index f22e46def..d13cdfa33 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -502,7 +502,8 @@ code { r = count(0, 5); }`; expect(Invocation.isInternalCall(invocation)).toBe(true); const internalCall = invocation as Format.Program.Context.Invoke.Invocation.InternalCall; - const invokeTarget = internalCall.target.pointer; + expect(internalCall.target).toBeDefined(); + const invokeTarget = internalCall.target!.pointer; expect(invokeTarget).toBeDefined(); expect( "offset" in invokeTarget ? invokeTarget.offset : undefined, diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts index 104f27196..5c9bfb7c9 100644 --- a/packages/format/src/types/program/context.ts +++ b/packages/format/src/types/program/context.ts @@ -171,7 +171,7 @@ export namespace Context { export namespace Invocation { export interface InternalCall extends Function.Identity { jump: true; - target: Function.PointerRef; + target?: Function.PointerRef; arguments?: Function.PointerRef; } @@ -180,8 +180,7 @@ export namespace Context { !!value && "jump" in value && value.jump === true && - "target" in value && - Function.isPointerRef(value.target) && + (!("target" in value) || Function.isPointerRef(value.target)) && (!("arguments" in value) || Function.isPointerRef(value.arguments)); export interface ExternalCall extends Function.Identity { diff --git a/packages/web/spec/program/context/function/invoke.mdx b/packages/web/spec/program/context/function/invoke.mdx index b249a1a0a..af1c1da7a 100644 --- a/packages/web/spec/program/context/function/invoke.mdx +++ b/packages/web/spec/program/context/function/invoke.mdx @@ -51,6 +51,15 @@ caller's JUMP has already consumed the destination from the stack, so pointer slot values reflect the post-JUMP layout. The target points to a code location and arguments are passed on the stack. +The `target` field is optional. It may be omitted when there is no +meaningful code pointer to record — most notably at the first +instruction of an inlined function body, where the inlining pass +has elided the JUMP that would normally carry the target. The +callee identity (`identifier`, `declaration`, `type`) remains +meaningful in this case; a sibling `transform: ["inline"]` key +on the same context indicates that the call was inlined rather +than physically invoked. + Date: Wed, 1 Jul 2026 22:17:33 -0400 Subject: [PATCH 03/21] format: add transform context for compiler optimizations (#212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * format: add transform context for compiler optimizations Adds a new context type annotating instructions with the compiler transformations that produced them. The value is an array of short identifiers; the list may repeat the same identifier when the transformation has been applied multiple times (e.g., ["inline", "inline"] for doubly-inlined code). Transform is *additional* annotation. The invoke/return contexts for the logical call are still emitted at the call boundary so debuggers see the source-level call stack; the transform context tells debuggers how the call was physically realized. Consumers that ignore transform contexts get a sound source-level view from the semantic contexts alone. v1 identifiers: - "inline": marked instruction is part of an inlined function body; surrounding invoke/return contexts name the inlined callee. - "tailcall": marked instruction is a tail-call-optimized back-edge JUMP or continuation, where the call was realized without pushing/popping a full activation. The identifier set is extensible. Debuggers unfamiliar with a given identifier should preserve it as an opaque label. Order in the array is not semantically significant — the multiset is what matters. Unblocks the final shape of TCO back-edge annotations in bugc (#210): a tail-call-optimized JUMP can now carry `gather: [return, invoke, transform: ["tailcall"]]`. Includes: - schemas/program/context/transform.schema.yaml - schemas/program/context.schema.yaml: wire into the if/$ref union. - packages/format/src/types/program/context.ts: Context.Transform interface, isTransform guard, and Transform.Identifier union preserving autocomplete for known values. - packages/format/src/types/program/context.test.ts: register Context.isTransform with the schema guard test harness. - packages/web/spec/program/context/transform.mdx: spec page covering role, v1 identifiers, repetition/composition, and interaction with gather. * format: expand transform v1 vocabulary with fold and coalesce Adds two more identifiers to the v1 transform context vocabulary, based on bugc optimizer's audit of transformations the compiler currently performs or will perform: - "fold" — compile-time constant folding. The marked instruction carries the result (typically a PUSH) replacing a compute sequence that appeared in source. - "coalesce" — read-write merging. The marked instruction is part of a SHL/OR sequence (or similar) introduced by the compiler to combine adjacent source-level reads or writes, such as packing narrower fields into a single storage slot. Together with the previously-defined "inline" and "tailcall", this covers the four transformations bugc emits today or will emit in the near term (inline once a function inlining pass lands). Propagate was considered for v1 and deferred as borderline. Updates: - transform.schema.yaml: description enumerates the four v1 identifiers; examples include single-identifier cases for each plus combinations ["inline", "fold"], ["coalesce", "coalesce"]. - context.ts: Transform.Identifier union extended with "fold" and "coalesce" (still keeps `string & {}` for extensibility and autocomplete). - transform.mdx: subsection for each identifier with a concrete EVM-level example, updated repetition/composition section with new combinations. * format: prefer flat context composition, document gather scope The context schema's discriminator keys combine via allOf of if/then rules, so a single context object can carry multiple keys at once (e.g., `invoke`, `return`, and `transform` all side by side). Use gather only when two contexts would collide on the same key. - transform spec: switch the TCO back-edge example from gather to the flat form; revise the tailcall bullet accordingly - transform schema: note in the description that flat composition is preferred; gather is for key collisions - gather spec: add a "When to use" section flagging the flat form as the default and listing the canonical collision cases (multiple frames, multiple variables blocks) --- .../format/src/types/program/context.test.ts | 4 + packages/format/src/types/program/context.ts | 30 ++++- packages/web/spec/program/context/gather.mdx | 28 +++++ .../web/spec/program/context/transform.mdx | 119 ++++++++++++++++++ schemas/program/context.schema.yaml | 8 ++ schemas/program/context/transform.schema.yaml | 79 ++++++++++++ 6 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 packages/web/spec/program/context/transform.mdx create mode 100644 schemas/program/context/transform.schema.yaml diff --git a/packages/format/src/types/program/context.test.ts b/packages/format/src/types/program/context.test.ts index 4470a322d..a36e1e4c7 100644 --- a/packages/format/src/types/program/context.test.ts +++ b/packages/format/src/types/program/context.test.ts @@ -46,4 +46,8 @@ testSchemaGuards("ethdebug/format/program/context", [ schema: "schema:ethdebug/format/program/context/function/revert", guard: Context.isRevert, }, + { + schema: "schema:ethdebug/format/program/context/transform", + guard: Context.isTransform, + }, ] as const); diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts index 5c9bfb7c9..e943f1ec2 100644 --- a/packages/format/src/types/program/context.ts +++ b/packages/format/src/types/program/context.ts @@ -11,7 +11,8 @@ export type Context = | Context.Frame | Context.Invoke | Context.Return - | Context.Revert; + | Context.Revert + | Context.Transform; export const isContext = (value: unknown): value is Context => [ @@ -24,6 +25,7 @@ export const isContext = (value: unknown): value is Context => Context.isInvoke, Context.isReturn, Context.isRevert, + Context.isTransform, ].some((guard) => guard(value)); export namespace Context { @@ -273,4 +275,30 @@ export namespace Context { (!("reason" in value) || Function.isPointerRef(value.reason)) && (!("panic" in value) || typeof value.panic === "number"); } + + export interface Transform { + transform: Transform.Identifier[]; + } + + export const isTransform = (value: unknown): value is Transform => + typeof value === "object" && + !!value && + "transform" in value && + Array.isArray(value.transform) && + value.transform.length > 0 && + value.transform.every( + (item) => typeof item === "string" && item.length > 0, + ); + + export namespace Transform { + // Recognized v1 identifiers. Unknown strings are permitted + // (the identifier set is extensible); the union preserves + // autocomplete for known values. + export type Identifier = + | "inline" + | "tailcall" + | "fold" + | "coalesce" + | (string & {}); + } } diff --git a/packages/web/spec/program/context/gather.mdx b/packages/web/spec/program/context/gather.mdx index eb9cc3613..0501ed70d 100644 --- a/packages/web/spec/program/context/gather.mdx +++ b/packages/web/spec/program/context/gather.mdx @@ -6,6 +6,34 @@ import SchemaViewer from "@site/src/components/SchemaViewer"; # Gather multiple contexts +A `gather` context asserts that every one of its child contexts +holds at the marked instruction. It is the tool for composing +multiple context facts that cannot coexist as sibling keys on a +single object. + + +## When to use `gather` + +The context schema is open: a single context object may carry +any number of discriminator keys together — `code`, `variables`, +`invoke`, `return`, `transform`, and so on all compose as +siblings on the same object. Prefer the flat form when it +works. + +Reach for `gather` only when two or more facts would collide on +the same key. The canonical cases are: + +- **Multiple `frame`s** — an instruction that maps + simultaneously to an IR step and a source step needs one + entry per frame, each with its own `code` range. +- **Multiple `variables` blocks** — when separate pipeline + passes each contribute variable information (e.g., one + names the variable, the other supplies its pointer), each + set lives in its own context. + +If every child context uses a different discriminator key, a +`gather` can be collapsed into a single flat object with the +same meaning — and that flat form is the preferred style. diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx new file mode 100644 index 000000000..427e42eb6 --- /dev/null +++ b/packages/web/spec/program/context/transform.mdx @@ -0,0 +1,119 @@ +--- +sidebar_position: 8 +--- + +import SchemaViewer from "@site/src/components/SchemaViewer"; + +# Transform contexts + +A transform context annotates an instruction with the compiler +transformations that produced it. The value is a list of short +identifiers; the list may repeat the same identifier when the +transformation has been applied multiple times—for example, +doubly-inlined code carries `transform: ["inline", "inline"]`. + + + +## Role: additional annotation + +A transform context does not replace semantic contexts. When the +compiler inlines a function, the caller's debug info should still +carry invoke/return contexts naming the inlined callee at the +call boundary—so the debugger's logical call stack reflects the +source-level structure. The transform context is _additional_ +information telling the debugger **how** the call was realized. + +Consumers are free to ignore transform contexts entirely; the +invoke/return contexts alone always give a sound source-level +view. Consumers that understand transform contexts can offer +optimization-aware presentations: + +- Render inlined code as a collapsible block tied to the + original callee's source location. +- Show which call sites were tail-call-optimized vs. realized as + full call/return sequences. +- Explain apparent anomalies in the trace (e.g., a JUMP that + carries an invoke context is a TCO back-edge). + +## v1 identifiers + +Four identifiers are recognized in v1: + +- **`"inline"`** — the marked instruction is part of an inlined + function body. Surrounding invoke/return contexts name the + inlined callee; this marker tells the debugger the physical + code does not correspond to a separate activation record. +- **`"tailcall"`** — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where the + call was realized without pushing/popping a full activation. + A JUMP carrying a `tailcall` transform typically sits on a + context that also carries both a `return` (from the previous + iteration) and an `invoke` (of the new iteration). +- **`"fold"`** — the marked instruction carries the result of + a compile-time constant fold. Typically a PUSH of the folded + value replacing a compute sequence (e.g., `ADD` over two + known constants) that appeared in source. The instruction's + surrounding `code` context, if present, points to the + original expression. +- **`"coalesce"`** — the marked instruction is part of a + read-write merging sequence the compiler introduced to + combine adjacent source-level reads or writes. Common + examples include SHL/OR sequences that pack narrower fields + into a single storage slot, or wider loads split into + narrower field extractions. The user did not write these + instructions directly; the `coalesce` marker lets a debugger + present the sequence as one source-level operation rather + than stepping through each byte-shuffling opcode. + +The identifier set is extensible. Compilers may emit additional +identifiers for optimizations not yet standardized; debuggers +should preserve unfamiliar identifiers as opaque labels rather +than rejecting them. + +## Repetition and composition + +Identifiers may repeat. A function inlined into another inlined +function produces `transform: ["inline", "inline"]`. A coalesce +sequence nested inside another coalesced region produces +`transform: ["coalesce", "coalesce"]`. + +Different transformations compose: +`transform: ["inline", "tailcall"]` marks an instruction inside +an inlined body that was itself a TCO back-edge in the callee; +`transform: ["inline", "fold"]` marks a constant-folded PUSH +sitting inside an inlined body. + +Order in the array is not semantically significant—only the +multiset of identifiers matters. + +## Composing with other contexts + +A context object can carry several discriminator keys at once — +`code`, `variables`, `invoke`, `return`, `transform`, and so on +all live in the same object. A TCO back-edge JUMP, for example, +typically combines three facts as sibling keys on a single +context: + +```yaml +return: + identifier: "fact" + declaration: { ... } +invoke: + jump: true + identifier: "fact" + target: { pointer: { location: code, offset: ... } } +transform: ["tailcall"] +``` + +The `return` and `invoke` state the source-level facts +(iteration N returned, iteration N+1 was invoked); the +`transform` explains how the compiler realized that pair as a +single JUMP. + +Reach for [`gather`](/spec/program/context/gather) only when +two contexts would collide on the same key — e.g., two +independent `variables` blocks or two `frame`s from different +pipeline stages. When keys don't collide, the flat form is +preferred. diff --git a/schemas/program/context.schema.yaml b/schemas/program/context.schema.yaml index a57fce654..1a82e76df 100644 --- a/schemas/program/context.schema.yaml +++ b/schemas/program/context.schema.yaml @@ -89,6 +89,14 @@ allOf: description: | Indicates association with a function revert. $ref: "schema:ethdebug/format/program/context/function/revert" + - if: + required: ["transform"] + then: + description: | + Compiler transformations applied to produce this instruction + (e.g., inlining, tail-call optimization). Additional + annotation — does not replace semantic contexts. + $ref: "schema:ethdebug/format/program/context/transform" unevaluatedProperties: false diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml new file mode 100644 index 000000000..8951a00ca --- /dev/null +++ b/schemas/program/context/transform.schema.yaml @@ -0,0 +1,79 @@ +$schema: "https://json-schema.org/draft/2020-12/schema" +$id: "schema:ethdebug/format/program/context/transform" + +title: ethdebug/format/program/context/transform +description: | + Annotates an instruction with compiler transformations that + produced it. The value is a list of short identifiers naming + each transformation; the list may repeat an identifier when + the same transformation has been applied more than once (e.g., + `["inline", "inline"]` for doubly-inlined code). + + A transform context is *additional* annotation — it does not + replace semantic contexts. When the compiler inlines a + function, the invoke/return contexts for the logical call + should still be emitted at the call boundary so the debugger's + source-level call stack remains coherent. The transform + context tells debuggers **how** the call was realized. + + Combine a transform with other discriminator keys (`invoke`, + `return`, `code`, etc.) by placing them side-by-side on the + same context object — `gather` is only needed when two + contexts would collide on the same key. + + Consumers that ignore transform contexts still get a sound + source-level view from the invoke/return contexts alone. + Consumers that understand transform contexts can offer + optimization-aware presentations — e.g., rendering inlined + code as a collapsible block, or reconciling tail-call-optimized + back-edges with the logical call stack. + + The identifier set is extensible. v1 defines: + + - `"inline"` — the marked instruction is part of an inlined + function body. Surrounding invoke/return contexts name the + inlined callee. + - `"tailcall"` — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where + the call was realized as a direct jump (or reuse of the + caller's frame) rather than a standard call/return sequence. + - `"fold"` — the marked instruction carries the result of a + compile-time constant fold. Typically a PUSH of the folded + value, replacing a compute sequence that appeared in source. + - `"coalesce"` — the marked instruction is part of a + read-write merging sequence (e.g., SHL/OR sequences packing + narrower fields into a wider word) that the user did not + explicitly write; the compiler introduced it to combine + adjacent source-level reads or writes. + + Debuggers unfamiliar with a given identifier should preserve + it as an opaque label. + + Order in the array is not semantically significant — only the + multiset of identifiers matters. + +type: object +properties: + transform: + title: Applied transformations + description: | + List of transformation identifiers. Identifiers may + repeat; order is not semantically significant. + type: array + items: + type: string + minLength: 1 + minItems: 1 + +required: + - transform + +examples: + - transform: ["inline"] + - transform: ["tailcall"] + - transform: ["fold"] + - transform: ["coalesce"] + - transform: ["inline", "inline"] + - transform: ["inline", "tailcall"] + - transform: ["inline", "fold"] + - transform: ["coalesce", "coalesce"] From 314b42cdfdb9ca016b35c910f1cd4cf48e9a23a9 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 1 Jul 2026 23:00:28 -0400 Subject: [PATCH 04/21] bugc: emit tailcall transform context on TCO back-edge (#217) The TCO back-edge JUMP already carries a flat context with both invoke (the new iteration's call) and return (the previous iteration's return). Add a third sibling key, transform: ["tailcall"], marking the instruction as a tail-call-optimized back-edge. This is an additive annotation: it does not replace the invoke/return pair (which state the source-level facts) but tells debuggers the pair was realized as a TCO back-edge rather than a real frame push/pop, so they can avoid inventing a spurious frame. Consumers that ignore transform contexts still get a sound source-level view from invoke/return alone. Widens the emitted context type to Return & Invoke & Transform and extends the optimizer-contexts test to assert the back-edge JUMP carries transform containing "tailcall". --- .../evmgen/generation/control-flow/terminator.ts | 16 +++++++++++++--- .../bugc/src/evmgen/optimizer-contexts.test.ts | 11 +++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts index 21284ac1b..94cbf9caa 100644 --- a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts +++ b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts @@ -411,10 +411,11 @@ function generateReturnEpilogue( /** * Build JUMP instruction options for a TCO-replaced tail call. * - * The JUMP carries BOTH discriminators on a single flat - * context object: + * The JUMP carries three keys on a single flat context + * object: * - return: the previous iteration's return * - invoke: the new iteration's call + * - transform: ["tailcall"] * * Semantically the debugger sees frame depth stay constant * across the back-edge JUMP: the previous frame pops, the @@ -422,6 +423,13 @@ function generateReturnEpilogue( * terminal RETURN (elsewhere) emits a return context * normally, popping the final iteration's frame. * + * The `transform: ["tailcall"]` key is an additive + * annotation: it does not replace the invoke/return pair + * (which state the source-level facts) but tells debuggers + * the pair was realized as a TCO back-edge rather than a + * real frame push/pop, so they can avoid inventing a + * spurious frame. + * * The invoke mirrors the normal caller-JUMP invoke * (identity + declaration + code target, no argument * pointers). The return omits `data` because TCO does not @@ -443,7 +451,8 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { : undefined; const combined: Format.Program.Context.Return & - Format.Program.Context.Invoke = { + Format.Program.Context.Invoke & + Format.Program.Context.Transform = { return: { identifier: tailCall.function, ...(declaration ? { declaration } : {}), @@ -460,6 +469,7 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { }, }, }, + transform: ["tailcall"], }; return { debug: { context: combined as Format.Program.Context } }; diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index d13cdfa33..f471408e9 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -497,6 +497,17 @@ code { r = count(0, 5); }`; Context.isReturn(instr.context), ); expect(tcoJump).toBeDefined(); + + // The same back-edge JUMP additionally carries a + // `transform: ["tailcall"]` context. This is an + // additive annotation telling debuggers the + // invoke/return pair was realized as a TCO + // back-edge rather than a real frame push/pop. + expect(Context.isTransform(tcoJump!.context)).toBe(true); + expect( + (tcoJump!.context as Format.Program.Context.Transform).transform, + ).toContain("tailcall"); + const ctx = tcoJump!.context as Format.Program.Context.Invoke; const invocation = ctx.invoke; expect(Invocation.isInternalCall(invocation)).toBe(true); From 3d5b461dbcf7c2da7b1f2eac4796cfd544b6efb6 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 1 Jul 2026 23:34:08 -0400 Subject: [PATCH 05/21] programs-react: render tailcall transform + fix TCO call stack (#218) Add tailcall (transform context) support to the trace widgets: - extractTransformFromInstruction: gather/pick-aware collector for compiler transform identifiers (duck-typed until #212's guard lands) - extractCallInfoFromInstruction: attach isTailCall when a tailcall transform is present alongside the invoke/return - buildCallStack: a TCO back-edge carries both return and invoke on one instruction; replace the top frame in place (reuse) instead of popping to empty, and mark it isTailCall. Fixes a real call-stack correctness bug for tail-recursive loops. - CallStackDisplay: tail-call chip on the reused frame - CallInfoPanel: tail-call banner variant - Propagate isTailCall through ResolvedCallFrame / ResolvedCallInfo - CSS (+ web theme copies) for the transform/tailcall styling Tested: 9 new unit tests in mockTrace.test.ts covering extraction, the isTailCall flag, and frame replacement. Does not touch the docs TraceDrawer opt level or examples (held for product decisions). --- .../src/components/CallInfoPanel.css | 6 + .../src/components/CallInfoPanel.tsx | 17 ++- .../src/components/CallStackDisplay.css | 12 ++ .../src/components/CallStackDisplay.tsx | 8 ++ .../src/components/TraceContext.tsx | 6 + packages/programs-react/src/index.ts | 1 + packages/programs-react/src/utils/index.ts | 1 + .../src/utils/mockTrace.test.ts | 117 ++++++++++++++++++ .../programs-react/src/utils/mockTrace.ts | 98 ++++++++++++++- .../theme/ProgramExample/CallInfoPanel.css | 6 + .../theme/ProgramExample/CallStackDisplay.css | 12 ++ 11 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 packages/programs-react/src/utils/mockTrace.test.ts diff --git a/packages/programs-react/src/components/CallInfoPanel.css b/packages/programs-react/src/components/CallInfoPanel.css index 75cd06511..b2835861c 100644 --- a/packages/programs-react/src/components/CallInfoPanel.css +++ b/packages/programs-react/src/components/CallInfoPanel.css @@ -27,6 +27,12 @@ border-left: 3px solid var(--programs-revert-accent, #cf222e); } +.call-info-banner-tailcall { + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border-left: 3px solid var(--programs-transform-accent, #a475f9); +} + .call-info-refs { display: flex; flex-direction: column; diff --git a/packages/programs-react/src/components/CallInfoPanel.tsx b/packages/programs-react/src/components/CallInfoPanel.tsx index 09da2b3d7..e0e10ed2b 100644 --- a/packages/programs-react/src/components/CallInfoPanel.tsx +++ b/packages/programs-react/src/components/CallInfoPanel.tsx @@ -29,6 +29,10 @@ function formatBanner(info: ResolvedCallInfo): string { ? `(${info.argumentNames.join(", ")})` : "()"; + if (info.isTailCall) { + return `Tail call: ${name} (frame reused)`; + } + if (info.kind === "invoke") { const prefix = info.callType === "external" @@ -50,11 +54,14 @@ function formatBanner(info: ResolvedCallInfo): string { return `Reverted in ${name}()`; } -function bannerClassName(kind: ResolvedCallInfo["kind"]): string { - if (kind === "invoke") { +function bannerClassName(info: ResolvedCallInfo): string { + if (info.isTailCall) { + return "call-info-banner-tailcall"; + } + if (info.kind === "invoke") { return "call-info-banner-invoke"; } - if (kind === "return") { + if (info.kind === "return") { return "call-info-banner-return"; } return "call-info-banner-revert"; @@ -76,9 +83,7 @@ export function CallInfoPanel({ return (
-
+
{formatBanner(currentCallInfo)}
diff --git a/packages/programs-react/src/components/CallStackDisplay.css b/packages/programs-react/src/components/CallStackDisplay.css index 9143b8d76..90afee044 100644 --- a/packages/programs-react/src/components/CallStackDisplay.css +++ b/packages/programs-react/src/components/CallStackDisplay.css @@ -48,3 +48,15 @@ .call-stack-parens { color: var(--programs-text-muted, #888); } + +.call-stack-tailcall { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 0.8em; + font-weight: 500; + white-space: nowrap; + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border: 1px solid var(--programs-transform-accent, #a475f9); +} diff --git a/packages/programs-react/src/components/CallStackDisplay.tsx b/packages/programs-react/src/components/CallStackDisplay.tsx index 09e2bf7aa..983f75be1 100644 --- a/packages/programs-react/src/components/CallStackDisplay.tsx +++ b/packages/programs-react/src/components/CallStackDisplay.tsx @@ -94,6 +94,14 @@ export function CallStackDisplay({ ({formatArgs(frame, resolvedCallStack)}) + {frame.isTailCall && ( + + ⮌ tail call + + )} ))} diff --git a/packages/programs-react/src/components/TraceContext.tsx b/packages/programs-react/src/components/TraceContext.tsx index 581b72143..795f3f2f4 100644 --- a/packages/programs-react/src/components/TraceContext.tsx +++ b/packages/programs-react/src/components/TraceContext.tsx @@ -118,6 +118,8 @@ export interface ResolvedCallInfo { panic?: number; /** Resolved pointer refs */ pointerRefs: ResolvedPointerRef[]; + /** True when a tailcall transform is present (TCO). */ + isTailCall?: boolean; } /** @@ -136,6 +138,8 @@ export interface ResolvedCallFrame { value?: string; error?: string; }>; + /** True when this frame was (re)entered via a tail call. */ + isTailCall?: boolean; } /** @@ -382,6 +386,7 @@ export function TraceProvider({ identifier: frame.identifier, stepIndex: frame.stepIndex, callType: frame.callType, + isTailCall: frame.isTailCall, resolvedArgs: argCacheRef.current.get(frame.stepIndex), })); setResolvedCallStack(initial); @@ -477,6 +482,7 @@ export function TraceProvider({ callType: extractedCallInfo.callType, argumentNames: extractedCallInfo.argumentNames, panic: extractedCallInfo.panic, + isTailCall: extractedCallInfo.isTailCall, pointerRefs: extractedCallInfo.pointerRefs.map((ref) => ({ label: ref.label, pointer: ref.pointer, diff --git a/packages/programs-react/src/index.ts b/packages/programs-react/src/index.ts index 6253933c2..8d8610771 100644 --- a/packages/programs-react/src/index.ts +++ b/packages/programs-react/src/index.ts @@ -59,6 +59,7 @@ export { findInstructionAtPc, extractVariablesFromInstruction, extractCallInfoFromInstruction, + extractTransformFromInstruction, buildPcToInstructionMap, buildCallStack, type CallInfo, diff --git a/packages/programs-react/src/utils/index.ts b/packages/programs-react/src/utils/index.ts index a79f07b08..e6dfdbefb 100644 --- a/packages/programs-react/src/utils/index.ts +++ b/packages/programs-react/src/utils/index.ts @@ -18,6 +18,7 @@ export { findInstructionAtPc, extractVariablesFromInstruction, extractCallInfoFromInstruction, + extractTransformFromInstruction, buildPcToInstructionMap, buildCallStack, type TraceStep, diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts new file mode 100644 index 000000000..49ea5e568 --- /dev/null +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -0,0 +1,117 @@ +/** + * Tests for trace context extraction, transform (tailcall) + * detection, and call-stack construction. + */ + +import { describe, it, expect } from "vitest"; +import type { Program } from "@ethdebug/format"; +import { + extractTransformFromInstruction, + extractCallInfoFromInstruction, + buildCallStack, + buildPcToInstructionMap, + type TraceStep, +} from "./mockTrace.js"; + +/** Build a minimal instruction with a context at an offset. */ +function instr(offset: number, context: unknown): Program.Instruction { + return { + offset, + operation: { mnemonic: "JUMPDEST", arguments: [] }, + context, + } as unknown as Program.Instruction; +} + +describe("extractTransformFromInstruction", () => { + it("returns identifiers from a direct transform context", () => { + const i = instr(0, { transform: ["tailcall"] }); + expect(extractTransformFromInstruction(i)).toEqual(["tailcall"]); + }); + + it("finds transform identifiers nested inside a gather", () => { + const i = instr(0, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }); + expect(extractTransformFromInstruction(i)).toEqual(["tailcall"]); + }); + + it("collects multiple identifiers across nested contexts", () => { + const i = instr(0, { + gather: [{ transform: ["inline"] }, { transform: ["tailcall"] }], + }); + expect(extractTransformFromInstruction(i).sort()).toEqual([ + "inline", + "tailcall", + ]); + }); + + it("returns an empty array when no transform is present", () => { + const i = instr(0, { invoke: { jump: true, identifier: "sum" } }); + expect(extractTransformFromInstruction(i)).toEqual([]); + }); +}); + +describe("extractCallInfoFromInstruction tailcall flag", () => { + it("marks isTailCall when a tailcall transform is present", () => { + const i = instr(0, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }); + const info = extractCallInfoFromInstruction(i); + expect(info?.isTailCall).toBe(true); + }); + + it("leaves isTailCall falsy for a plain invoke", () => { + const i = instr(0, { invoke: { jump: true, identifier: "sum" } }); + const info = extractCallInfoFromInstruction(i); + expect(info?.isTailCall).toBeFalsy(); + }); +}); + +describe("buildCallStack TCO frame replacement", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, // entry invoke → push sum + { pc: 10, opcode: "JUMP" }, // TCO back-edge → replace frame + ]; + + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }), + ], + } as unknown as Program; + + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps the stack depth stable across a tail call", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + // Without the fix, the return-first gather pops to empty. + expect(stack).toHaveLength(1); + }); + + it("replaces the top frame and marks it as a tail call", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack[0].identifier).toBe("sum"); + expect(stack[0].isTailCall).toBe(true); + expect(stack[0].stepIndex).toBe(1); + }); + + it("does not mark a normal (pre-tailcall) frame", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].isTailCall).toBeFalsy(); + }); +}); diff --git a/packages/programs-react/src/utils/mockTrace.ts b/packages/programs-react/src/utils/mockTrace.ts index 26a912fc7..9279c85b4 100644 --- a/packages/programs-react/src/utils/mockTrace.ts +++ b/packages/programs-react/src/utils/mockTrace.ts @@ -2,7 +2,7 @@ * Utilities for creating mock execution traces. */ -import type { Program } from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; /** * A single step in an execution trace. @@ -119,6 +119,49 @@ export interface CallInfo { label: string; pointer: unknown; }>; + /** + * True when a `tailcall` transform is present on the same + * instruction — the call was realized as a tail-call + * (TCO), reusing the current frame rather than nesting. + */ + isTailCall?: boolean; +} + +/** + * Extract compiler `transform` annotation identifiers + * (e.g. "tailcall", "inline") from an instruction's context + * tree, walking gather/pick composites. + */ +export function extractTransformFromInstruction( + instruction: Program.Instruction, +): string[] { + if (!instruction.context) { + return []; + } + return extractTransformFromContext(instruction.context); +} + +function extractTransformFromContext(context: Program.Context): string[] { + if (Program.Context.isTransform(context)) { + return context.transform; + } + + // gather/pick are still key-probed here, matching the + // sibling extractors in this file (a broader guard + // migration is tracked separately). + const ctx = context as unknown as Record; + + if ("gather" in ctx && Array.isArray(ctx.gather)) { + return (ctx.gather as Program.Context[]).flatMap( + extractTransformFromContext, + ); + } + + if ("pick" in ctx && Array.isArray(ctx.pick)) { + return (ctx.pick as Program.Context[]).flatMap(extractTransformFromContext); + } + + return []; } /** @@ -131,7 +174,14 @@ export function extractCallInfoFromInstruction( if (!instruction.context) { return undefined; } - return extractCallInfoFromContext(instruction.context); + const info = extractCallInfoFromContext(instruction.context); + if (!info) { + return undefined; + } + const isTailCall = extractTransformFromContext(instruction.context).includes( + "tailcall", + ); + return isTailCall ? { ...info, isTailCall: true } : info; } function extractCallInfoFromContext( @@ -274,6 +324,22 @@ export interface CallFrame { argumentNames?: string[]; /** Individual argument pointers for value resolution */ argumentPointers?: unknown[]; + /** + * True when this frame was (re)entered via a tail call + * (TCO). The frame was reused in place rather than nested. + */ + isTailCall?: boolean; +} + +/** + * Determine the call type of a raw invoke record from its + * discriminant key. + */ +function invokeCallType(inv: Record): CallFrame["callType"] { + if ("jump" in inv) return "internal"; + if ("message" in inv) return "external"; + if ("create" in inv) return "create"; + return undefined; } /** @@ -299,6 +365,34 @@ export function buildCallStack( continue; } + if (callInfo.isTailCall) { + // A TCO back-edge carries both return and invoke on a + // single instruction: the previous iteration returns + // and the next iteration is invoked, reusing the same + // activation. Replace the top frame in place (depth is + // unchanged) rather than popping then pushing. Pull the + // new iteration's identity from the invoke leaf, since + // the return leaf may be surfaced first. + const ctx = instruction.context as Record; + const inv = findInvokeField(ctx); + const argResult = extractArgInfo(instruction); + const invId = inv?.identifier as string | undefined; + const frame: CallFrame = { + identifier: invId ?? callInfo.identifier, + stepIndex: i, + callType: inv ? invokeCallType(inv) : callInfo.callType, + argumentNames: argResult?.names, + argumentPointers: argResult?.pointers, + isTailCall: true, + }; + if (stack.length > 0) { + stack[stack.length - 1] = frame; + } else { + stack.push(frame); + } + continue; + } + if (callInfo.kind === "invoke") { // The compiler emits invoke on both the caller JUMP // and callee entry JUMPDEST for the same call. These diff --git a/packages/web/src/theme/ProgramExample/CallInfoPanel.css b/packages/web/src/theme/ProgramExample/CallInfoPanel.css index 75cd06511..b2835861c 100644 --- a/packages/web/src/theme/ProgramExample/CallInfoPanel.css +++ b/packages/web/src/theme/ProgramExample/CallInfoPanel.css @@ -27,6 +27,12 @@ border-left: 3px solid var(--programs-revert-accent, #cf222e); } +.call-info-banner-tailcall { + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border-left: 3px solid var(--programs-transform-accent, #a475f9); +} + .call-info-refs { display: flex; flex-direction: column; diff --git a/packages/web/src/theme/ProgramExample/CallStackDisplay.css b/packages/web/src/theme/ProgramExample/CallStackDisplay.css index 9143b8d76..90afee044 100644 --- a/packages/web/src/theme/ProgramExample/CallStackDisplay.css +++ b/packages/web/src/theme/ProgramExample/CallStackDisplay.css @@ -48,3 +48,15 @@ .call-stack-parens { color: var(--programs-text-muted, #888); } + +.call-stack-tailcall { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 0.8em; + font-weight: 500; + white-space: nowrap; + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border: 1px solid var(--programs-transform-accent, #a475f9); +} From 185e831842618840d5fb628b93acf02b575a0a36 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 1 Jul 2026 23:36:09 -0400 Subject: [PATCH 06/21] docs: add tail-call optimization tracing example (#219) Add two self-tail-recursive BUG programs (accumulator sum and factorial) and a Tail-call optimization section to the tracing page. Both programs fold under bugc's level-2 optimizer (verified: the recursive call terminator is eliminated and replaced with a loop trampoline), so they exercise the new tailcall transform context in the tracer widget. The section explains how a TCO back-edge JUMP composes return, invoke, and transform: ["tailcall"] as sibling keys on one context (the flat form), and how a debugger can reconcile that with the source-level call stack. Also register program/context/transform in the web schemaIndex; it was missing (gather was present), which broke the docs build for #212's transform spec page. --- .../core-schemas/programs/tracing-examples.ts | 42 +++++++++++ .../docs/core-schemas/programs/tracing.mdx | 71 +++++++++++++++++++ packages/web/src/schemas.ts | 11 ++- 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/packages/web/docs/core-schemas/programs/tracing-examples.ts b/packages/web/docs/core-schemas/programs/tracing-examples.ts index 08cb5f8df..a6c67fc5d 100644 --- a/packages/web/docs/core-schemas/programs/tracing-examples.ts +++ b/packages/web/docs/core-schemas/programs/tracing-examples.ts @@ -96,3 +96,45 @@ create { code { result = isEven(4); }`; + +export const tailRecursiveSum = `name TailSum; + +define { + function sum(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return sum(n - 1, acc + n); } + }; +} + +storage { + [0] result: uint256; +} + +create { + result = 0; +} + +code { + result = sum(5, 0); +}`; + +export const tailRecursiveFactorial = `name TailFactorial; + +define { + function fact(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return fact(n - 1, acc * n); } + }; +} + +storage { + [0] result: uint256; +} + +create { + result = 0; +} + +code { + result = fact(5, 1); +}`; diff --git a/packages/web/docs/core-schemas/programs/tracing.mdx b/packages/web/docs/core-schemas/programs/tracing.mdx index a22c6551d..b493e8990 100644 --- a/packages/web/docs/core-schemas/programs/tracing.mdx +++ b/packages/web/docs/core-schemas/programs/tracing.mdx @@ -11,6 +11,8 @@ import { multipleStorageSlots, functionCallAndReturn, mutualRecursion, + tailRecursiveSum, + tailRecursiveFactorial, } from "./tracing-examples"; # Tracing execution @@ -245,6 +247,75 @@ code instead of (or alongside) a reason pointer: }`} +## Tail-call optimization + +The recursion examples above push a new frame for every call — step +through them and watch the call stack grow. A compiler can often avoid +that. When a recursive call sits in **tail position** — its result is +returned directly, with no further work after it — the compiler can +reuse the current frame instead of pushing a new one. This is +**tail-call optimization** (TCO), and it turns recursion into a loop. + +The two programs below are written so bugc's optimizer folds them. Each +accumulates its result in an `acc` parameter and hands it to the next +call in tail position, so no work is left pending on the stack. + +Use the optimizer control on each example to switch between level 0 +(no optimization) and level 2 (TCO on), then step through and compare +the call stack. + + + + + +At level 0, each `sum`/`fact` call is a real invoke/return pair and the +call stack grows one frame per iteration. At level 2, the recursive +call becomes a **back-edge**: a single JUMP that ends one iteration and +begins the next without pushing a frame. The call stack stays flat. + +That one JUMP carries three facts at once, composed as sibling keys on a +single context — the flat form described on the +[transform context](/spec/program/context/transform) page: + + + {`{ + "return": { + "identifier": "sum" + }, + "invoke": { + "jump": true, + "identifier": "sum", + "target": { + "pointer": { "location": "code", "offset": "0x33", "length": 1 } + } + }, + "transform": ["tailcall"] +}`} + + +The `return` and `invoke` state the source-level facts — the previous +iteration returned, the next was invoked — and `transform: ["tailcall"]` +explains how the compiler realized that pair as one JUMP. Because no +value crosses a frame boundary here, the `return` carries no `data` and +the `invoke` no `arguments`: the accumulator is threaded through the +loop directly, and the invoke `target` points at the loop header the +JUMP re-enters. A debugger that ignores the transform still reads a +coherent invoke/return sequence; one that understands it can show that +the call stack isn't really growing, and present the recursion as the +loop it compiled to. + ## Trace data structure A trace step captures the EVM state at a single point: diff --git a/packages/web/src/schemas.ts b/packages/web/src/schemas.ts index 84ba8c701..31496043a 100644 --- a/packages/web/src/schemas.ts +++ b/packages/web/src/schemas.ts @@ -228,7 +228,16 @@ const programSchemaIndex: SchemaIndex = { href: "/spec/program/context", }, - ...["name", "code", "variables", "remark", "pick", "gather", "frame"] + ...[ + "name", + "code", + "variables", + "remark", + "pick", + "gather", + "frame", + "transform", + ] .map((name) => ({ [`schema:ethdebug/format/program/context/${name}`]: { href: `/spec/program/context/${name}`, From 25dd2ca61c0e140332f260f7533650fb86ad6c77 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Wed, 1 Jul 2026 23:47:35 -0400 Subject: [PATCH 07/21] docs: link first frame mention to its spec page (#220) In transform.mdx and gather.mdx, the first reference to frame contexts now links to /spec/program/context/frame, matching the existing [`gather`](...) link precedent. Frame is the one composition concept a reader reaching these pages may not have met yet. --- packages/web/spec/program/context/gather.mdx | 3 ++- packages/web/spec/program/context/transform.mdx | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/web/spec/program/context/gather.mdx b/packages/web/spec/program/context/gather.mdx index 0501ed70d..3e3a90b59 100644 --- a/packages/web/spec/program/context/gather.mdx +++ b/packages/web/spec/program/context/gather.mdx @@ -26,7 +26,8 @@ works. Reach for `gather` only when two or more facts would collide on the same key. The canonical cases are: -- **Multiple `frame`s** — an instruction that maps +- **Multiple [`frame`](/spec/program/context/frame)s** — an instruction + that maps simultaneously to an IR step and a source step needs one entry per frame, each with its own `code` range. - **Multiple `variables` blocks** — when separate pipeline diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx index 427e42eb6..e2b4b3cd7 100644 --- a/packages/web/spec/program/context/transform.mdx +++ b/packages/web/spec/program/context/transform.mdx @@ -114,6 +114,7 @@ single JUMP. Reach for [`gather`](/spec/program/context/gather) only when two contexts would collide on the same key — e.g., two -independent `variables` blocks or two `frame`s from different +independent `variables` blocks or two +[`frame`](/spec/program/context/frame)s from different pipeline stages. When keys don't collide, the flat form is preferred. From 13a5843fc9aaa106769ad832e1bd150c336b29a5 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 00:16:07 -0400 Subject: [PATCH 08/21] bugc: preserve function loc/sourceId through optimizer (#223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cloneFunction dropped the optional Ir.Function.loc and sourceId fields, returning only { name, parameters, entry, blocks }. Since the first optimization pass clones the module, every function lost its declaration source info from optimization level 1 upward. evmgen gates declaration emission on func.loc && func.sourceId, so all invoke/return contexts lost their declaration source ranges at optimized levels — measurably 3/3 declared at level 0, 0/3 at levels 1-3 on the widget's runtimeInstructions path. Copy loc/sourceId in cloneFunction's return so declarations survive optimization. Adds a regression test asserting invoke/return contexts still carry declaration at levels 1, 2, and 3 (with a level-0 baseline). --- .../src/evmgen/optimizer-contexts.test.ts | 62 +++++++++++++++++++ packages/bugc/src/optimizer/optimizer.ts | 6 ++ 2 files changed, 68 insertions(+) diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index f471408e9..7891bf488 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -532,3 +532,65 @@ code { r = count(0, 5); }`; } }); }); + +/** + * Count invoke/return contexts that carry a `declaration` + * (source id + range for the called/returning function's + * declaration). Walks gather wrappers and flat + * multi-discriminator contexts. + */ +function countDeclarations(program: Format.Program): { + invoke: number; + return: number; +} { + let invoke = 0; + let ret = 0; + for (const instr of program.instructions) { + if (!instr.context) continue; + for (const leaf of unwrapLeaves(instr.context)) { + if (Context.isInvoke(leaf) && leaf.invoke.declaration) invoke += 1; + if (Context.isReturn(leaf) && leaf.return.declaration) ret += 1; + } + } + return { invoke, return: ret }; +} + +/** + * Regression: the optimizer must preserve each function's + * `loc`/`sourceId` so evmgen can emit `declaration` source + * ranges on invoke/return contexts. cloneFunction used to + * drop these fields, so every declaration vanished from + * optimization level 1 upward. + */ +describe("optimizer preserves function declaration info", () => { + const source = `name Recur; + +define { + function count(n: uint256, target: uint256) -> uint256 { + if (n < target) { return count(n + 1, target); } + else { return n; } + }; +} + +storage { [0] r: uint256; } +create { r = 0; } +code { r = count(0, 5); }`; + + it("carries declarations at level 0 (baseline)", async () => { + const program = await compileAt(source, 0); + const decls = countDeclarations(program); + expect(decls.invoke).toBeGreaterThan(0); + expect(decls.return).toBeGreaterThan(0); + }); + + for (const level of [1, 2, 3] as const) { + it(`preserves declarations through optimization at level ${level}`, async () => { + const program = await compileAt(source, level); + const decls = countDeclarations(program); + // The optimizer must not strip function loc/sourceId: + // invoke/return contexts still carry declaration ranges. + expect(decls.invoke).toBeGreaterThan(0); + expect(decls.return).toBeGreaterThan(0); + }); + } +}); diff --git a/packages/bugc/src/optimizer/optimizer.ts b/packages/bugc/src/optimizer/optimizer.ts index 5b32a83cc..2165d479a 100644 --- a/packages/bugc/src/optimizer/optimizer.ts +++ b/packages/bugc/src/optimizer/optimizer.ts @@ -230,6 +230,12 @@ export abstract class BaseOptimizationStep implements OptimizationStep { parameters: [...func.parameters], entry: func.entry, blocks: clonedBlocks, + // Preserve declaration source info so evmgen can emit + // `declaration` ranges on invoke/return contexts. + // Dropping these here erased all declarations from + // optimization level 1 upward. + ...(func.loc ? { loc: func.loc } : {}), + ...(func.sourceId !== undefined ? { sourceId: func.sourceId } : {}), }; } From 9bb47f8d61fdee5e504337fa7163874457141961 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 00:43:10 -0400 Subject: [PATCH 09/21] =?UTF-8?q?web:=20tracer=20drawer=20=E2=80=94=20opt-?= =?UTF-8?q?level=20selector,=20tailcall=20render,=20right-column=20panels?= =?UTF-8?q?=20(#222)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * web: fix tracer-drawer opcodes/state panels not filling height The trace panels live in a flex:1 grid whose implicit row was auto- sized to content, so dragging the drawer taller left dead space below the panels instead of growing them. Give the grid an explicit 1fr row and min-height:0 (on the grid and its items) so both panels absorb the added vertical space and scroll internally. * web: add optimizer-level selector (O0/O2) to tracer drawer The drawer hardcoded optimizer level 0. Add an O0/O2 toggle in the drawer header that recompiles + retraces at the chosen level, so readers can flip to level 2 and watch optimizer transforms (e.g. the tailcall annotation on TCO back-edges) appear. compileAndTrace now takes the level explicitly; a ref mirrors the state so the example- load effect reads the current level without re-running on toggle. * programs-react: cover the flat (production) TCO back-edge shape bugc #217 emits the TCO back-edge as a single flat context object (return + invoke + transform keys together), but the existing tests only exercised the gather shape. Add flat-shape variants for transform extraction, the isTailCall flag, and frame replacement, plus a guard that stripping the marker (the #10 failure mode) drops tail-call handling. * web: dedupe call stack + tailcall render + right-column panels - Reuse the shared, tailcall-aware buildCallStack / extractCallInfo / extractTransform from @ethdebug/programs-react via a thin adapter (bugc .debug.context -> ethdebug format shape); drop the drawer's inline call-stack builder and local extractCallInfo. - Render the tail-call chip on the reused call-stack frame and a tail-call variant on the call-info banner. - Right column (gnidan's picks 1/2/4): resolved variable values (name: value via pointer resolution), gas remaining + per-step delta, and a transform annotations panel with per-tag glosses. Sections are now collapsible. * web: widen optimizer selector to O0/O1/O2/O3 gnidan's call: expose all four bugc optimizer levels (each distinct — L1 fold/prop/DCE, L2 +CSE/TCO/jump-opt, L3 +merging) rather than a two-state O0/O2 toggle, future-proofing for other transforms. The recompile+retrace already took the level; just widen the control from two buttons to four (mapped over OPT_LEVELS, per-level tooltips). The tailcall demo still lands on O2. --- .../src/utils/mockTrace.test.ts | 73 +++ .../src/theme/ProgramExample/TraceDrawer.css | 150 +++++ .../src/theme/ProgramExample/TraceDrawer.tsx | 596 ++++++++++-------- 3 files changed, 554 insertions(+), 265 deletions(-) diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts index 49ea5e568..1d0bef30b 100644 --- a/packages/programs-react/src/utils/mockTrace.test.ts +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -115,3 +115,76 @@ describe("buildCallStack TCO frame replacement", () => { expect(stack[0].isTailCall).toBeFalsy(); }); }); + +// The compiler (bugc #217) emits the TCO back-edge as a +// single FLAT context object carrying return + invoke + +// transform keys together (not a gather). This is the +// actual production shape, so it needs direct coverage. +describe("flat (production) TCO back-edge shape", () => { + const flatBackEdge = { + return: { identifier: "sum" }, + invoke: { + jump: true, + identifier: "sum", + target: { pointer: { location: "code", offset: 0, length: 1 } }, + }, + transform: ["tailcall"], + }; + + it("extracts the tailcall transform from the flat object", () => { + expect(extractTransformFromInstruction(instr(0, flatBackEdge))).toEqual([ + "tailcall", + ]); + }); + + it("marks isTailCall on the flat back-edge", () => { + const info = extractCallInfoFromInstruction(instr(0, flatBackEdge)); + expect(info?.isTailCall).toBe(true); + }); + + it("replaces the frame in place for a flat back-edge", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, + { pc: 10, opcode: "JUMP" }, + ]; + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, flatBackEdge), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("sum"); + expect(stack[0].isTailCall).toBe(true); + expect(stack[0].callType).toBe("internal"); + }); + + it("loses tail-call handling when the marker is stripped", () => { + // Guards the task #10 failure mode: with the transform + // marker gone, the flat {return, invoke} back-edge is + // treated as a plain invoke — the frame-replacement path + // never runs, so no frame is flagged as a tail call and + // the widget can no longer render it as a frame reuse. + const stripped = { + return: { identifier: "sum" }, + invoke: { jump: true, identifier: "sum" }, + }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, + { pc: 10, opcode: "JUMP" }, + ]; + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, stripped), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack.some((f) => f.isTailCall)).toBe(false); + }); +}); diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.css b/packages/web/src/theme/ProgramExample/TraceDrawer.css index 9f36d24c0..d320e269e 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.css +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.css @@ -25,6 +25,61 @@ overflow: hidden; } +/* Header actions */ +.trace-drawer-actions { + display: flex; + align-items: center; + gap: 10px; +} + +.opt-level-toggle { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 6px 2px 8px; + border-radius: 6px; + background: var(--ifm-color-emphasis-100); +} + +.opt-level-label { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--ifm-color-content-secondary); + margin-right: 2px; +} + +.opt-level-btn { + padding: 2px 8px; + border: 1px solid var(--ifm-color-emphasis-300); + border-radius: 4px; + background: var(--ifm-background-color); + color: var(--ifm-color-content-secondary); + font-size: 12px; + font-weight: 600; + font-family: var(--ifm-font-family-monospace); + cursor: pointer; + transition: + background 0.15s, + color 0.15s; +} + +.opt-level-btn:hover:not(:disabled) { + background: var(--ifm-color-emphasis-200); +} + +.opt-level-btn.active { + background: var(--ifm-color-primary); + border-color: var(--ifm-color-primary); + color: white; +} + +.opt-level-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + /* Header buttons */ .trace-drawer-btn { padding: 6px 14px; @@ -204,11 +259,37 @@ border-left: 3px solid var(--ifm-color-danger); } +/* Transform / tail-call accent (no ifm purple semantic, so + a theme-tolerant purple tint with content-colored text). */ +.call-info-tailcall { + background: rgba(130, 80, 223, 0.12); + color: var(--ifm-color-content); + border-left: 3px solid #8250df; +} + +.call-stack-tailcall { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 10px; + font-weight: 600; + white-space: nowrap; + background: rgba(130, 80, 223, 0.15); + color: var(--ifm-color-content); + border: 1px solid rgba(130, 80, 223, 0.45); +} + /* Trace panels */ .trace-panels { display: grid; grid-template-columns: 1fr 1fr; + /* Single row that fills the flex height, so the panels + absorb vertical space when the drawer is drag-expanded + (an `auto` row would size to content and leave dead + space below). */ + grid-template-rows: 1fr; flex: 1; + min-height: 0; overflow: hidden; gap: 1px; background: var(--ifm-color-emphasis-200); @@ -216,6 +297,9 @@ .trace-panel { background: var(--ifm-background-color); + /* min-height:0 lets the grid item shrink so its own + overflow scrolls instead of expanding the grid. */ + min-height: 0; overflow: auto; } @@ -248,6 +332,72 @@ color: var(--ifm-color-primary-darkest); } +.opcode-gas { + margin-left: auto; + font-size: 11px; + font-variant-numeric: tabular-nums; + color: var(--ifm-color-content-secondary); +} + +/* Collapsible right-column sections */ +.trace-section { + border-bottom: 1px solid var(--ifm-color-emphasis-200); +} + +.trace-section-summary { + padding: 6px 12px; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--ifm-color-content-secondary); + background: var(--ifm-background-surface-color); + cursor: pointer; + user-select: none; + list-style-position: inside; +} + +.trace-section-summary:hover { + color: var(--ifm-color-content); +} + +/* Transform annotations */ +.transform-list { + padding: 6px 12px; + display: flex; + flex-direction: column; + gap: 6px; +} + +.transform-item { + display: flex; + align-items: baseline; + gap: 8px; + font-size: 12px; +} + +.transform-tag { + flex-shrink: 0; + padding: 0 6px; + border-radius: 8px; + font-family: var(--ifm-font-family-monospace); + font-weight: 600; + font-size: 11px; + color: var(--ifm-color-content); + background: rgba(130, 80, 223, 0.15); + border: 1px solid rgba(130, 80, 223, 0.45); +} + +.transform-gloss { + color: var(--ifm-color-content-secondary); +} + +.variable-value { + font-family: var(--ifm-font-family-monospace); + color: var(--ifm-color-content); + word-break: break-all; +} + .current-opcode .opcode-pc { font-size: 12px; color: var(--ifm-color-content-secondary); diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx index cd9b37416..cf68e4d71 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx @@ -25,6 +25,15 @@ import { } from "@ethdebug/bugc-react"; import { Executor, createTraceCollector, type TraceStep } from "@ethdebug/evm"; import { dereference, Data, type Machine } from "@ethdebug/pointers"; +import { + buildCallStack, + extractCallInfoFromInstruction, + extractTransformFromInstruction, + type CallFrame, + type CallInfo, + type TraceStep as ProgramsTraceStep, +} from "@ethdebug/programs-react"; +import type { Program } from "@ethdebug/format"; import { Drawer } from "@theme/Drawer"; import { useTracePlayground } from "./TracePlaygroundContext"; @@ -42,6 +51,16 @@ interface CompileResult { bytecode?: BytecodeOutput; } +/** bugc optimizer levels the tracer can compile at. */ +type OptLevel = 0 | 1 | 2 | 3; +const OPT_LEVELS: readonly OptLevel[] = [0, 1, 2, 3]; +const OPT_LEVEL_TITLES: Record = { + 0: "No optimization", + 1: "Level 1 — constant folding, propagation, dead-code elimination", + 2: "Level 2 — adds CSE, tail-call optimization, jump optimization", + 3: "Level 3 — adds block/return/read-write merging", +}; + function TraceDrawerContent(): JSX.Element { const { example, isOpen, toggleDrawer, closeDrawer, setSource } = useTracePlayground(); @@ -56,6 +75,14 @@ function TraceDrawerContent(): JSX.Element { const [isTracing, setIsTracing] = useState(false); const [traceError, setTraceError] = useState(null); const [storage, setStorage] = useState>({}); + // Optimizer level the tracer compiles at. Readers flip + // 0 ↔ 2 to watch optimizer transforms (e.g. the tailcall + // annotation on TCO back-edges) appear. A ref mirrors it + // so the example-load effect can read the current value + // without re-running when only the level changes. + const [optimizerLevel, setOptimizerLevel] = useState(0); + const optimizerLevelRef = useRef(optimizerLevel); + optimizerLevelRef.current = optimizerLevel; // Build PC -> instruction map for source highlighting const pcToInstruction = useMemo(() => { @@ -92,77 +119,52 @@ function TraceDrawerContent(): JSX.Element { return extractVariables(instruction.debug.context); }, [trace, currentStep, pcToInstruction]); - // Extract call info from current instruction context - const currentCallInfo = useMemo(() => { - if (trace.length === 0 || currentStep >= trace.length) { - return undefined; + // Adapt the bugc instruction map + evm trace to the shared + // programs-react call-stack helpers, which read the + // ethdebug format shape (instruction.context) and a {pc} + // trace. This lets the drawer reuse the same tailcall-aware + // buildCallStack as the standalone TraceViewer instead of + // duplicating the logic. + const formatPcToInstruction = useMemo(() => { + const m = new Map(); + for (const [pc, inst] of pcToInstruction) { + m.set(pc, { + offset: pc, + context: inst.debug?.context, + } as unknown as Program.Instruction); } + return m; + }, [pcToInstruction]); - const step = trace[currentStep]; - const instruction = pcToInstruction.get(step.pc); - if (!instruction?.debug?.context) return undefined; - - return extractCallInfo(instruction.debug.context); - }, [trace, currentStep, pcToInstruction]); + const programsTrace = useMemo( + () => trace.map((s) => ({ pc: s.pc, opcode: s.opcode })), + [trace], + ); - // Build call stack by scanning invoke/return/revert up to - // current step - const callStack = useMemo(() => { - const frames: Array<{ - identifier?: string; - stepIndex: number; - callType?: string; - argumentNames?: string[]; - argumentPointers?: unknown[]; - }> = []; - - for (let i = 0; i <= currentStep && i < trace.length; i++) { - const step = trace[i]; - const instruction = pcToInstruction.get(step.pc); - if (!instruction?.debug?.context) continue; - - const info = extractCallInfo(instruction.debug.context); - if (!info) continue; - - if (info.kind === "invoke") { - // The compiler emits invoke on both the caller - // JUMP and callee entry JUMPDEST for the same - // call. These occur on consecutive trace steps. - // Only skip if the top frame matches AND was - // pushed on the immediately preceding step — - // otherwise this is a new call (e.g. recursion). - const top = frames[frames.length - 1]; - const isDuplicate = - top && - top.identifier === info.identifier && - top.callType === info.callType && - top.stepIndex === i - 1; - if (isDuplicate) { - // Use the callee entry step for resolution — - // argument pointers reference stack slots - // valid at the JUMPDEST, not the JUMP. - // Argument names also live on the callee entry. - top.stepIndex = i; - top.argumentNames = info.argumentNames ?? top.argumentNames; - top.argumentPointers = info.argumentPointers; - } else { - frames.push({ - identifier: info.identifier, - stepIndex: i, - callType: info.callType, - argumentNames: info.argumentNames, - argumentPointers: info.argumentPointers, - }); - } - } else if (info.kind === "return" || info.kind === "revert") { - if (frames.length > 0) { - frames.pop(); - } - } - } + const currentInstruction = useMemo(() => { + const step = trace[currentStep]; + if (!step) return undefined; + return formatPcToInstruction.get(step.pc); + }, [trace, currentStep, formatPcToInstruction]); - return frames; - }, [trace, currentStep, pcToInstruction]); + // Extract call info from current instruction context + const currentCallInfo = useMemo(() => { + if (!currentInstruction) return undefined; + return extractCallInfoFromInstruction(currentInstruction); + }, [currentInstruction]); + + // Compiler transform tags on the current instruction + // (e.g. "tailcall"), for the transform annotations panel. + const currentTransforms = useMemo(() => { + if (!currentInstruction) return []; + return extractTransformFromInstruction(currentInstruction); + }, [currentInstruction]); + + // Build call stack via the shared, tailcall-aware helper. + const callStack = useMemo( + () => buildCallStack(programsTrace, formatPcToInstruction, currentStep), + [programsTrace, formatPcToInstruction, currentStep], + ); // Resolve argument values for call stack frames const argCacheRef = useRef>(new Map()); @@ -237,94 +239,147 @@ function TraceDrawerContent(): JSX.Element { }; }, [callStack, trace, storage]); + // Resolve the current instruction's variable values by + // dereferencing each variable's pointer against the step + // state (reuses the same machinery as argument resolution). + const [resolvedVarValues, setResolvedVarValues] = useState< + Map + >(new Map()); + + useEffect(() => { + const step = trace[currentStep]; + if (!step || currentVariables.length === 0) { + setResolvedVarValues(new Map()); + return; + } + + let cancelled = false; + const state = traceStepToState(step, storage); + const next = new Map(); + + Promise.all( + currentVariables.map(async (v) => { + if (!v.pointer) return; + try { + next.set(v.identifier, await resolvePointer(v.pointer, state)); + } catch { + // leave unresolved + } + }), + ).then(() => { + if (!cancelled) setResolvedVarValues(next); + }); + + return () => { + cancelled = true; + }; + }, [currentVariables, currentStep, trace, storage]); + + // Gas remaining at the current step plus the delta consumed + // reaching it (when the executor reports gas). + const gasText = useMemo(() => { + const step = trace[currentStep]; + if (!step || step.gasRemaining === undefined) return ""; + const rem = step.gasRemaining.toLocaleString(); + const prev = trace[currentStep - 1]; + if (prev?.gasRemaining !== undefined) { + const delta = prev.gasRemaining - step.gasRemaining; + if (delta > 0n) return `gas ${rem} (−${delta.toLocaleString()})`; + } + return `gas ${rem}`; + }, [trace, currentStep]); + // Compile source and run trace in one shot. // Takes source directly to avoid stale-state issues. - const compileAndTrace = useCallback(async (sourceCode: string) => { - setIsCompiling(true); - setCompileResult(null); - setTrace([]); - setCurrentStep(0); - setTraceError(null); - setStorage({}); - - let bytecode: BytecodeOutput | undefined; - - try { - const result = await bugCompile({ - to: "bytecode", - source: sourceCode, - optimizer: { level: 0 }, - }); + const compileAndTrace = useCallback( + async (sourceCode: string, level: OptLevel) => { + setIsCompiling(true); + setCompileResult(null); + setTrace([]); + setCurrentStep(0); + setTraceError(null); + setStorage({}); + + let bytecode: BytecodeOutput | undefined; - if (!result.success) { - const errors = result.messages[Severity.Error] || []; + try { + const result = await bugCompile({ + to: "bytecode", + source: sourceCode, + optimizer: { level }, + }); + + if (!result.success) { + const errors = result.messages[Severity.Error] || []; + setCompileResult({ + success: false, + error: errors[0]?.message || "Compilation failed", + }); + return; + } + + bytecode = { + runtime: result.value.bytecode.runtime, + create: result.value.bytecode.create, + runtimeInstructions: result.value.bytecode.runtimeInstructions, + createInstructions: result.value.bytecode.createInstructions, + }; + + setCompileResult({ success: true, bytecode }); + } catch (e) { setCompileResult({ success: false, - error: errors[0]?.message || "Compilation failed", + error: e instanceof Error ? e.message : String(e), }); return; + } finally { + setIsCompiling(false); } - bytecode = { - runtime: result.value.bytecode.runtime, - create: result.value.bytecode.create, - runtimeInstructions: result.value.bytecode.runtimeInstructions, - createInstructions: result.value.bytecode.createInstructions, - }; - - setCompileResult({ success: true, bytecode }); - } catch (e) { - setCompileResult({ - success: false, - error: e instanceof Error ? e.message : String(e), - }); - return; - } finally { - setIsCompiling(false); - } - - if (!bytecode) return; + if (!bytecode) return; - setIsTracing(true); + setIsTracing(true); - try { - const executor = new Executor(); + try { + const executor = new Executor(); - if (bytecode.create) { - const createHex = Array.from(bytecode.create) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - await executor.deploy(createHex); - } + if (bytecode.create) { + const createHex = Array.from(bytecode.create) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + await executor.deploy(createHex); + } - const [handler, getTrace] = createTraceCollector(); - await executor.execute({}, handler); + const [handler, getTrace] = createTraceCollector(); + await executor.execute({}, handler); - const collectedTrace = getTrace(); - setTrace(collectedTrace.steps); - setCurrentStep(0); + const collectedTrace = getTrace(); + setTrace(collectedTrace.steps); + setCurrentStep(0); - const storageEntries: Record = {}; - for (let i = 0n; i < 16n; i++) { - const value = await executor.getStorage(i); - if (value !== 0n) { - const slot = `0x${i.toString(16).padStart(2, "0")}`; - storageEntries[slot] = `0x${value.toString(16).padStart(64, "0")}`; + const storageEntries: Record = {}; + for (let i = 0n; i < 16n; i++) { + const value = await executor.getStorage(i); + if (value !== 0n) { + const slot = `0x${i.toString(16).padStart(2, "0")}`; + storageEntries[slot] = `0x${value.toString(16).padStart(64, "0")}`; + } } + setStorage(storageEntries); + } catch (e) { + setTraceError(e instanceof Error ? e.message : String(e)); + } finally { + setIsTracing(false); } - setStorage(storageEntries); - } catch (e) { - setTraceError(e instanceof Error ? e.message : String(e)); - } finally { - setIsTracing(false); - } - }, []); + }, + [], + ); // Auto compile+trace when a new example is loaded useEffect(() => { if (example?.source) { setLocalSource(example.source); - compileAndTrace(example.source); + compileAndTrace(example.source, optimizerLevelRef.current); } }, [example, compileAndTrace]); @@ -337,8 +392,17 @@ function TraceDrawerContent(): JSX.Element { ); const handleCompileAndTrace = useCallback(() => { - compileAndTrace(source); - }, [source, compileAndTrace]); + compileAndTrace(source, optimizerLevel); + }, [source, compileAndTrace, optimizerLevel]); + + const handleLevelChange = useCallback( + (level: OptLevel) => { + if (level === optimizerLevel) return; + setOptimizerLevel(level); + compileAndTrace(source, level); + }, + [source, compileAndTrace, optimizerLevel], + ); const stepForward = () => { setCurrentStep((prev) => Math.min(prev + 1, trace.length - 1)); @@ -395,18 +459,42 @@ function TraceDrawerContent(): JSX.Element { const isBusy = isCompiling || isTracing; const headerActions = ( - +
+
+ Opt + {OPT_LEVELS.map((level) => ( + + ))} +
+ +
); return ( @@ -522,6 +610,14 @@ function TraceDrawerContent(): JSX.Element { > {frame.identifier || "(anonymous)"}( {formatFrameArgs(frame, resolvedArgs)}) + {frame.isTailCall && ( + + ⮌ tail call + + )} )) @@ -530,7 +626,11 @@ function TraceDrawerContent(): JSX.Element { {currentCallInfo && (
{formatCallBanner(currentCallInfo)}
@@ -554,23 +654,34 @@ function TraceDrawerContent(): JSX.Element { @ 0x{currentTraceStep.pc.toString(16)} + {gasText && ( + {gasText} + )}
-
Stack
- + {currentTransforms.length > 0 && ( +
+ +
+ )} + +
+ +
{currentVariables.length > 0 && ( - <> -
Variables
- - +
+ +
)} {Object.keys(storage).length > 0 && ( - <> -
Storage
+
- +
)} )} @@ -685,107 +796,93 @@ function formatBigInt(value: bigint): string { interface Variable { identifier: string; type?: string; + pointer?: unknown; } interface VariablesDisplayProps { variables: Variable[]; + resolved: Map; } -function VariablesDisplay({ variables }: VariablesDisplayProps): JSX.Element { +function VariablesDisplay({ + variables, + resolved, +}: VariablesDisplayProps): JSX.Element { return (
- {variables.map((variable, i) => ( -
- {variable.identifier} - {variable.type && ( - {variable.type} - )} + {variables.map((variable, i) => { + const value = resolved.get(variable.identifier); + return ( +
+ {variable.identifier} + {value !== undefined && ( + {formatAsDecimal(value)} + )} + {variable.type && ( + {variable.type} + )} +
+ ); + })} +
+ ); +} + +/** One-line glosses for known transform identifiers. */ +const TRANSFORM_GLOSS: Record = { + tailcall: "tail call — frame reused, no new activation (TCO)", + inline: "inlined function body", + fold: "constant-folded at compile time", + coalesce: "merged read/write sequence", +}; + +function TransformList({ transforms }: { transforms: string[] }): JSX.Element { + return ( +
+ {transforms.map((t, i) => ( +
+ {t} + + {TRANSFORM_GLOSS[t] ?? "compiler transform"} +
))}
); } -/** - * Info about a call context (invoke/return/revert). - */ -interface CallInfoResult { - kind: "invoke" | "return" | "revert"; - identifier?: string; - callType?: string; - argumentNames?: string[]; - argumentPointers?: unknown[]; +/** A collapsible right-column section. */ +function Section({ + title, + defaultOpen = true, + children, +}: { + title: string; + defaultOpen?: boolean; + children: React.ReactNode; +}): JSX.Element { + return ( +
+ {title} + {children} +
+ ); } /** - * Extract call info from an ethdebug format context object. + * Info about a call context (invoke/return/revert). */ -function extractCallInfo(context: unknown): CallInfoResult | undefined { - if (!context || typeof context !== "object") { - return undefined; - } - - const ctx = context as Record; - - if ("invoke" in ctx && ctx.invoke) { - const inv = ctx.invoke as Record; - let callType: string | undefined; - if ("jump" in inv) callType = "internal"; - else if ("message" in inv) callType = "external"; - else if ("create" in inv) callType = "create"; - - const argInfo = extractArgInfoFromInvoke(inv); - return { - kind: "invoke", - identifier: inv.identifier as string | undefined, - callType, - argumentNames: argInfo?.names, - argumentPointers: argInfo?.pointers, - }; - } - - if ("return" in ctx && ctx.return) { - const ret = ctx.return as Record; - return { - kind: "return", - identifier: ret.identifier as string | undefined, - }; - } - - if ("revert" in ctx && ctx.revert) { - const rev = ctx.revert as Record; - return { - kind: "revert", - identifier: rev.identifier as string | undefined, - }; - } - - // Walk gather/pick - if ("gather" in ctx && Array.isArray(ctx.gather)) { - for (const sub of ctx.gather) { - const info = extractCallInfo(sub); - if (info) return info; - } - } - - if ("pick" in ctx && Array.isArray(ctx.pick)) { - for (const sub of ctx.pick) { - const info = extractCallInfo(sub); - if (info) return info; - } - } - - return undefined; -} - /** * Format a call info banner string. */ -function formatCallBanner(info: CallInfoResult): string { +function formatCallBanner(info: CallInfo): string { const name = info.identifier || "(anonymous)"; const params = info.argumentNames ? `(${info.argumentNames.join(", ")})` : "()"; + if (info.isTailCall) { + return `Tail call: ${name} (frame reused)`; + } switch (info.kind) { case "invoke": { const prefix = info.callType === "create" ? "Creating" : "Calling"; @@ -798,38 +895,6 @@ function formatCallBanner(info: CallInfoResult): string { } } -function extractArgInfoFromInvoke( - inv: Record, -): { names?: string[]; pointers?: unknown[] } | undefined { - const args = inv.arguments as Record | undefined; - if (!args) return undefined; - - const pointer = args.pointer as Record | undefined; - if (!pointer) return undefined; - - const group = pointer.group as Array> | undefined; - if (!Array.isArray(group)) return undefined; - - const names: string[] = []; - const pointers: unknown[] = []; - let hasAnyName = false; - for (const entry of group) { - const name = entry.name as string | undefined; - if (name) { - names.push(name); - hasAnyName = true; - } else { - names.push("_"); - } - pointers.push(entry); - } - - return { - names: hasAnyName ? names : undefined, - pointers, - }; -} - /** * Extract variables from an ethdebug format context object. */ @@ -849,6 +914,7 @@ function extractVariables(context: unknown): Variable[] { variables.push({ identifier: String(variable.identifier), type: variable.type ? formatType(variable.type) : undefined, + pointer: variable.pointer, }); } } From 3f2fa5eb9c78f9ddf3956b331a2794021cd3ee64 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 00:46:50 -0400 Subject: [PATCH 10/21] docs: match TCO example prose to the Opt selector (O0-O3) (#221) Update the tail-call-optimization walkthrough to reference the tracer drawer's Opt optimization-level selector (O0-O3), telling readers to compare O0 with O2 (TCO kicks in at level 2), instead of the generic "optimizer control" wording. --- packages/web/docs/core-schemas/programs/tracing.mdx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/web/docs/core-schemas/programs/tracing.mdx b/packages/web/docs/core-schemas/programs/tracing.mdx index b493e8990..d901224fb 100644 --- a/packages/web/docs/core-schemas/programs/tracing.mdx +++ b/packages/web/docs/core-schemas/programs/tracing.mdx @@ -260,9 +260,10 @@ The two programs below are written so bugc's optimizer folds them. Each accumulates its result in an `acc` parameter and hands it to the next call in tail position, so no work is left pending on the stack. -Use the optimizer control on each example to switch between level 0 -(no optimization) and level 2 (TCO on), then step through and compare -the call stack. +Use the **Opt** selector in the trace drawer to set the optimization +level. Compile at **O0** (no optimization) and again at **O2** +(optimizations on, including TCO), then step through and compare the +call stack. TCO kicks in at **O2**. -At level 0, each `sum`/`fact` call is a real invoke/return pair and the -call stack grows one frame per iteration. At level 2, the recursive +At **O0**, each `sum`/`fact` call is a real invoke/return pair and the +call stack grows one frame per iteration. At **O2**, the recursive call becomes a **back-edge**: a single JUMP that ends one iteration and begins the next without pushing a frame. The call stack stays flat. From e6fb3f154c4e36f14aa19c0e5b1d68b4ce7e4737 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 16:45:17 -0400 Subject: [PATCH 11/21] bugc: emit fold transform on constant folding (#225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bugc: emit fold transform on constant folding Marks constant-folded values with transform:["fold"] so debuggers can show that a value is a compile-time-evaluated constant rather than source the user wrote. Adds Ir.Utils.addTransform(debug, ...ids), which composes transform markers as a flat sibling key on a debug context (per the flat- composition convention) and appends to any existing transform array — so an instruction touched by multiple passes accumulates the multiset (e.g. ["fold","coalesce"]). ConstantFoldingStep applies it to folded binary and hash results. Although a folded const instruction is typically dissolved by constant propagation + DCE, ConstantPropagationStep already carries the folded const's operationDebug into the consuming instruction, so the marker survives to the emitted bytecode's runtimeInstructions (the tracer widget's path) at levels 1-3. Adds an end-to-end test asserting the fold marker reaches the bytecode at levels 1/2/3 and is absent at level 0. * bugc: route tailcall transform through addTransform helper Refactor buildTailCallJumpOptions to emit its transform:["tailcall"] marker via Ir.Utils.addTransform instead of a hand-written key, so all transform emission (fold/tailcall/coalesce/...) routes through the one helper. Behavior is identical — the back-edge JUMP still carries return + invoke + transform:["tailcall"] — but composition stays consistent (e.g. a folded-then-tailcall instruction would accumulate the multiset) and there's no divergent hand-rolled site. Existing #217 tailcall tests pass unchanged. --- .../generation/control-flow/terminator.ts | 18 ++-- .../src/evmgen/transform-contexts.test.ts | 88 +++++++++++++++++++ packages/bugc/src/ir/utils/debug.ts | 33 +++++++ .../src/optimizer/steps/constant-folding.ts | 10 ++- 4 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 packages/bugc/src/evmgen/transform-contexts.test.ts diff --git a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts index 94cbf9caa..a87f5349f 100644 --- a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts +++ b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts @@ -1,5 +1,6 @@ import type * as Format from "@ethdebug/format"; import type * as Ir from "#ir"; +import { Utils as IrUtils } from "#ir"; import type * as Evm from "#evm"; import type { Stack } from "#evm"; import type { State } from "#evmgen/state"; @@ -440,7 +441,7 @@ function generateReturnEpilogue( * resolved later by patchInvokeTarget. */ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { - debug: { context: Format.Program.Context }; + debug: Ir.Instruction.Debug; } { const declaration = tailCall.declarationLoc && tailCall.declarationSourceId @@ -451,8 +452,7 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { : undefined; const combined: Format.Program.Context.Return & - Format.Program.Context.Invoke & - Format.Program.Context.Transform = { + Format.Program.Context.Invoke = { return: { identifier: tailCall.function, ...(declaration ? { declaration } : {}), @@ -469,10 +469,18 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { }, }, }, - transform: ["tailcall"], }; - return { debug: { context: combined as Format.Program.Context } }; + // Route through the shared helper so all transform emission + // (fold/tailcall/coalesce/...) composes consistently: the + // `transform` marker becomes a flat sibling key appended to + // any existing transform array. + return { + debug: IrUtils.addTransform( + { context: combined as Format.Program.Context }, + "tailcall", + ), + }; } /** PUSH an integer as the smallest PUSHn. */ diff --git a/packages/bugc/src/evmgen/transform-contexts.test.ts b/packages/bugc/src/evmgen/transform-contexts.test.ts new file mode 100644 index 000000000..3ff1f481a --- /dev/null +++ b/packages/bugc/src/evmgen/transform-contexts.test.ts @@ -0,0 +1,88 @@ +/** + * Verifies that optimizer `transform` markers are emitted onto + * the resulting bytecode's debug contexts, on the same + * `runtimeInstructions` path the docs tracer widget consumes. + * + * Level 1 constant folding attaches `transform: ["fold"]` to the + * folded value's instruction; a debugger can then show that a + * PUSH is a compile-time-evaluated constant rather than source + * the user wrote. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +type OptLevel = 0 | 1 | 2 | 3; + +async function compileBytecode(source: string, level: OptLevel) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + `Compilation failed at level ${level}:\n` + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + return result.value.bytecode; +} + +/** Flatten a context into leaves, unwrapping gather/pick. */ +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +/** + * Count instructions in the widget-path array whose context + * (at top level or in any leaf) carries a transform containing + * the given identifier. + */ +function countTransform( + instructions: { debug?: { context?: Format.Program.Context } }[], + id: string, +): number { + let count = 0; + for (const instr of instructions) { + const ctx = instr.debug?.context; + if (!ctx) continue; + const hit = [ctx, ...leaves(ctx)].some( + (c) => Context.isTransform(c) && c.transform.includes(id), + ); + if (hit) count += 1; + } + return count; +} + +describe("optimizer emits fold transform contexts", () => { + // `2 + 3` and `4 * 5` fold to constants at level 1. + const source = `name Fold; + +storage { [0] r: uint256; } +create { r = 0; } +code { r = (2 + 3) * (4 * 5); }`; + + it("emits no fold transform at level 0", async () => { + const bc = await compileBytecode(source, 0); + expect(countTransform(bc.runtimeInstructions, "fold")).toBe(0); + }); + + for (const level of [1, 2, 3] as const) { + it(`emits fold transform at level ${level}`, async () => { + const bc = await compileBytecode(source, level); + expect(countTransform(bc.runtimeInstructions, "fold")).toBeGreaterThan(0); + }); + } +}); diff --git a/packages/bugc/src/ir/utils/debug.ts b/packages/bugc/src/ir/utils/debug.ts index 5feb48b9c..13516fe71 100644 --- a/packages/bugc/src/ir/utils/debug.ts +++ b/packages/bugc/src/ir/utils/debug.ts @@ -350,3 +350,36 @@ export function preserveSubInstructionDebug( ...additionalContexts.map((c) => ({ context: c })), ); } + +/** + * Add one or more `transform` optimization markers to a debug + * context, composing them as a flat sibling key alongside any + * existing context discriminators (per the flat-composition + * convention: gather is only for same-key collisions). + * + * Markers are appended to any existing `transform` array on the + * context, so an instruction touched by multiple passes + * accumulates the multiset — e.g. a folded value later merged + * yields `transform: ["fold", "coalesce"]`. + */ +export function addTransform( + debug: Ir.Instruction.Debug | undefined, + ...ids: Format.Program.Context.Transform.Identifier[] +): Ir.Instruction.Debug { + const existing = debug?.context; + + const prior: Format.Program.Context.Transform.Identifier[] = + existing && + "transform" in existing && + Array.isArray((existing as Format.Program.Context.Transform).transform) + ? (existing as Format.Program.Context.Transform).transform + : []; + + const transform = [...prior, ...ids]; + + if (!existing) { + return { context: { transform } }; + } + + return { context: { ...existing, transform } }; +} diff --git a/packages/bugc/src/optimizer/steps/constant-folding.ts b/packages/bugc/src/optimizer/steps/constant-folding.ts index 20f99d585..725c7f530 100644 --- a/packages/bugc/src/optimizer/steps/constant-folding.ts +++ b/packages/bugc/src/optimizer/steps/constant-folding.ts @@ -134,7 +134,10 @@ export class ConstantFoldingStep extends BaseOptimizationStep { value: result, type: this.getResultType(inst.op, typeof result), dest: inst.dest, - operationDebug: Ir.Utils.preserveDebug(inst), + operationDebug: Ir.Utils.addTransform( + Ir.Utils.preserveDebug(inst), + "fold", + ), }; } @@ -266,7 +269,10 @@ export class ConstantFoldingStep extends BaseOptimizationStep { value: hashValue, type: Ir.Type.Scalar.bytes32, dest: inst.dest, - operationDebug: Ir.Utils.preserveDebug(inst), + operationDebug: Ir.Utils.addTransform( + Ir.Utils.preserveDebug(inst), + "fold", + ), }; } From 4a4633011cfa646fabf7e13e4572d69e33d3a395 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 17:51:50 -0400 Subject: [PATCH 12/21] bugc: emit coalesce transform on read-write merging (#228) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marks the SHL/OR field-packing sequence produced by ReadWriteMerging (level 3) with transform:["coalesce"], so debuggers can show that a packed-storage write is compiler-synthesized rather than source the user wrote. Every instruction the merge produces (shifts, ORs, and the merged write) routes its debug through Ir.Utils.addTransform, which appends coalesce to any existing transform array — so a folded value packed into a word composes as ["fold","coalesce"]. Per #212, coalesce = read-write (SHL/OR) packing specifically; the CFG-merging passes (block-merging, return-merging) are left unmarked for v1. Adds an end-to-end test: coalesce marker present at level 3, absent at levels 0-2. --- .../src/evmgen/transform-contexts.test.ts | 26 +++++++++++++++++++ .../src/optimizer/steps/read-write-merging.ts | 12 +++++++++ 2 files changed, 38 insertions(+) diff --git a/packages/bugc/src/evmgen/transform-contexts.test.ts b/packages/bugc/src/evmgen/transform-contexts.test.ts index 3ff1f481a..5fdeee574 100644 --- a/packages/bugc/src/evmgen/transform-contexts.test.ts +++ b/packages/bugc/src/evmgen/transform-contexts.test.ts @@ -86,3 +86,29 @@ code { r = (2 + 3) * (4 * 5); }`; }); } }); + +describe("optimizer emits coalesce transform contexts", () => { + // Two adjacent packed writes of a runtime value to one storage + // slot; read/write merging (level 3) packs them with SHL/OR into + // a single word write. + const source = `name Coalesce; + +define { struct S { a: uint128; b: uint128; }; } +storage { [0] s: S; [1] src: uint256; } +create {} +code { let v = src; s.a = v; s.b = v; }`; + + for (const level of [0, 1, 2] as const) { + it(`emits no coalesce transform at level ${level}`, async () => { + const bc = await compileBytecode(source, level); + expect(countTransform(bc.runtimeInstructions, "coalesce")).toBe(0); + }); + } + + it("emits coalesce transform at level 3", async () => { + const bc = await compileBytecode(source, 3); + expect(countTransform(bc.runtimeInstructions, "coalesce")).toBeGreaterThan( + 0, + ); + }); +}); diff --git a/packages/bugc/src/optimizer/steps/read-write-merging.ts b/packages/bugc/src/optimizer/steps/read-write-merging.ts index 1ac2ecde5..cb24ba1c7 100644 --- a/packages/bugc/src/optimizer/steps/read-write-merging.ts +++ b/packages/bugc/src/optimizer/steps/read-write-merging.ts @@ -368,6 +368,18 @@ export class ReadWriteMergingStep extends BaseOptimizationStep { reason: `Merged ${writes.length} writes to same location`, }); + // Mark every instruction produced by the merge with + // transform:["coalesce"] so debuggers can show the SHL/OR + // field-packing sequence as compiler-synthesized rather than + // source the user wrote. Appends to any existing transform + // array (e.g. a folded packed value → ["fold","coalesce"]). + for (const inst of instructions) { + inst.operationDebug = Ir.Utils.addTransform( + inst.operationDebug, + "coalesce", + ); + } + return instructions; } From f5f66a9453bd7f49bc6b89a2f6358ffa5fad73f6 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 18:44:51 -0400 Subject: [PATCH 13/21] format: spec the inlined-call virtual activation contract (#229) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * format: spec the inlined-call virtual activation contract Document how an inlined internal call is represented and how a debugger reconstructs its virtual activation, per the inlining design (gnidan rulings: reuse jump:true, omit arg/return value pointers in v1, adopt DWARF "activation" naming). - invoke.mdx: add "Inlined internal calls" (jump:true = call kind not a literal JUMP; target omitted; transform:["inline"]) and a "Reconstructing activations" section defining the real-vs-virtual activation distinction, annotation-driven per-instruction membership (robust to non-contiguous inlined bodies), graceful identity degradation, and v1 value resolution (structure + locals; no arguments/data pointers). - transform.mdx: enrich the inline id (virtual activation, cross- link) and add an inlined-call-site worked example noting that a helper inlined at N sites yields N virtual activations. No schema change. Scope-agnostic re: the single- vs multi-site heuristic (bugc pass scope, decision #3, still open). * format: describe inlining eligibility qualitatively (decision #3) Reflect gnidan's #3 heuristic (leaf or small, non-recursive, internal, at all call sites) as compiler-typical, keeping the threshold qualitative — the format is invariant to the eligibility rule. * format: sharpen activation discriminator + membership wording Apply debugger review precision notes on #229: - Real-vs-virtual: the reliable discriminator is the inline transform marker, not target-absence — a real internal call may also omit target (#213), so no-target does NOT imply virtual. Reword the real/virtual clauses accordingly. - Membership: reword "iff it carries transform:["inline"]" (read as exact array equality) to "carries an inline identifier in its transform list" so composed markers like ["inline","fold"] still confer membership. * format: frame virtual-activation structure as compiler-typical Apply debugger note (a): "reliably carries ... identity" contradicted the graceful-degradation paragraph (identity is optional). Reword to "an inlining compiler typically preserves declaration and per-instruction source ranges; identity fields remain optional and degrade gracefully" — a compiler-behavior expectation, not a format guarantee. --- .../spec/program/context/function/invoke.mdx | 94 +++++++++++++++++++ .../web/spec/program/context/transform.mdx | 27 +++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/packages/web/spec/program/context/function/invoke.mdx b/packages/web/spec/program/context/function/invoke.mdx index af1c1da7a..4e6d88bd3 100644 --- a/packages/web/spec/program/context/function/invoke.mdx +++ b/packages/web/spec/program/context/function/invoke.mdx @@ -65,6 +65,35 @@ than physically invoked. pointer="#/$defs/InternalCall" /> +### Inlined internal calls + +When the compiler inlines a callee, there is no JUMP and no +runtime activation record: the callee's instructions are spliced +directly into the caller. The call still happened at the source +level, so it is still marked with an invoke context — one that +describes the _kind_ of call without a physical target: + +- `jump: true` marks the invocation as an **internal call kind** + (as opposed to a message call or contract creation). It does + **not** assert that a JUMP instruction executes here — an + inlined call has none. +- `target` is omitted: there is no code location to point at, + because the JUMP that would carry it was elided. +- a sibling `transform: ["inline"]` key marks the instruction as + belonging to an inlined body. + +The callee identity (`identifier`, `declaration`, `type`, all +optional) is preserved, so the inlined function still appears on +the debugger's call stack: the debugger reconstructs a **virtual +activation** for it (see +[Reconstructing activations](#reconstructing-activations)). + +Compilers typically inline small or leaf non-recursive callees at +every call site, so the _same_ callee can produce several +independent virtual activations across a trace — one per inlined +site. (The precise eligibility rule is a compiler choice; the +format is the same however inlining decisions are made.) + ## External call An external call represents a call to another contract via CALL, @@ -86,3 +115,68 @@ presence of `salt` implies CREATE2. schema={{ id: "schema:ethdebug/format/program/context/function/invoke" }} pointer="#/$defs/ContractCreation" /> + +## Reconstructing activations + +A debugger reconstructs the logical call stack from `invoke` and +`return` contexts. Each entry on that stack is an **activation** +(the DWARF term for a call-stack entry). Activation handling is +uniform whether or not the call was inlined: + +- **Push** an activation when an `invoke` context is encountered + and **pop** it when the matching `return` context is + encountered, in trace order. + +An inlined callee therefore appears on the call stack exactly as a +non-inlined one does. Two kinds of activation differ only in how +they are backed, distinguished by the presence of an `inline` +transform marker — **not** by whether `target` is present: + +- A **real activation** comes from an `invoke` **without** an + `inline` transform marker. It corresponds to an actual call at + runtime, corroborated by machine state — a return address on the + EVM stack — and occupies a real stack region. +- A **virtual activation** comes from an `invoke` whose context + carries `transform: ["inline"]` (an `inline` identifier in its + transform list). It has **no runtime corroboration** and occupies + no EVM stack region; it exists only in the debug annotations. Its + `target` is typically omitted (the JUMP was elided), but + `target`-absence is not itself the signal — a real internal call + may also omit `target` (see [Internal call](#internal-call)). The + reliable discriminator is the `inline` marker. + +### Activation membership + +An instruction belongs to the innermost open virtual activation if +and only if its context carries an `inline` identifier in its +transform list — so composed markers such as `["inline", "fold"]` +still confer membership. The nesting depth is the number of +`"inline"` occurrences in the list (doubly-inlined code carries +`["inline", "inline"]`). Membership is determined +per-instruction from this marker, **not** from instruction ranges: +optimization passes may relocate or interleave an inlined body, so +a positional "everything between the invoke and the return" rule +would be unsound. + +### Identity and values + +Every function-identity field (`identifier`, `declaration`, +`type`) is optional, so a virtual activation degrades gracefully — +from full identity down to an anonymous inlined frame — with no +fabricated data. A debugger renders whatever is present. + +An inlining compiler typically preserves the callee's declaration +and per-instruction source ranges for a virtual activation, and can +resolve inlined locals that it homed in addressable memory, via +[`variables`](/spec/program/context/variables) contexts. Identity +fields remain optional and degrade gracefully as described above. +Such a compiler does **not** emit `invoke.arguments` or +`return.data` pointers in this first version; individual parameter +values may still be inspectable as locals inside the body where +they are memory-homed. A virtual activation with no resolvable +values is still a valid, displayable frame. + +A debugger that ignores `transform` contexts still sees a coherent +`invoke`/`return` pair and a sound source-level call stack. One +that understands them can present virtual activations distinctly — +for example, collapsible and tied to the callee's source location. diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx index e2b4b3cd7..1a784a3a7 100644 --- a/packages/web/spec/program/context/transform.mdx +++ b/packages/web/spec/program/context/transform.mdx @@ -42,9 +42,12 @@ optimization-aware presentations: Four identifiers are recognized in v1: - **`"inline"`** — the marked instruction is part of an inlined - function body. Surrounding invoke/return contexts name the - inlined callee; this marker tells the debugger the physical - code does not correspond to a separate activation record. + function body. A surrounding `invoke`/`return` pair names the + inlined callee, and a debugger reconstructs a _virtual + activation_ for it (see + [inlined internal calls](/spec/program/context/function/invoke#inlined-internal-calls)). + This marker tells the debugger the physical code has no separate + runtime activation record. - **`"tailcall"`** — the marked instruction is a tail-call-optimized back-edge JUMP or continuation, where the call was realized without pushing/popping a full activation. @@ -112,6 +115,24 @@ The `return` and `invoke` state the source-level facts `transform` explains how the compiler realized that pair as a single JUMP. +An inlined call site combines an invoke with an inline transform. +The invoke marks the call kind with `jump: true` but omits +`target`, because the JUMP was elided: + +```yaml +invoke: + jump: true + identifier: "square" + declaration: { ... } +transform: ["inline"] +``` + +Each instruction of the inlined body also carries +`transform: ["inline"]`, and a matching `return` closes the +[virtual activation](/spec/program/context/function/invoke#reconstructing-activations). +A small helper inlined at several call sites produces one such +`invoke`/`return` pair — and one virtual activation — per site. + Reach for [`gather`](/spec/program/context/gather) only when two contexts would collide on the same key — e.g., two independent `variables` blocks or two From 22bf0c8bd7464dcca48c15f54c7fcc75fd845ce9 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 19:54:28 -0400 Subject: [PATCH 14/21] bugc: add function inlining pass with inline transform (L2) (#230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds InliningStep (level 2, first — after L1 fold, before CSE/TCO/ JumpOpt) that replaces calls to eligible internal functions with a copy of the callee body spliced into the caller. Every inlined instruction is annotated transform:["inline"] via addTransform, and the body is bracketed by a virtual invoke (jump:true, identity, no target — the #213 optional-target signal) / virtual return, so a debugger can reconstruct a virtual activation. This lights up `inline` in the tracer, completing the transform set. v1 eligibility (correctness over coverage; follow-ups noted): - internal, non-recursive, single-return, LEAF callee; - applied at all call sites; callee deleted once fully inlined; - NOT inlined into self-recursive / TCO'd callers: inlining a helper into a tail-recursive call's arguments rewrites count(succ(n)) -> count(n+1), which the tail-call optimizer mishandles (pre-existing bug, tracked separately). Guarding this keeps the tailcall demos pristine. Return values use dest-substitution (not a continuation phi), which is robust to L3 block-merging; deep clones use structuredClone to preserve bigint const values. Updates optimizer-contexts tests: leaf helpers (add/dbl/addThree) now inline at L2+ (no real caller JUMP; inline activation instead), while recursive/multi-return functions (fact, isEven/isOdd, count) are untouched. Deferred to follow-ups: non-leaf/nested callees, multi-return, size-threshold (non-leaf) inlining. --- .../src/evmgen/optimizer-contexts.test.ts | 106 +++- .../bugc/src/optimizer/simple-optimizer.ts | 7 +- packages/bugc/src/optimizer/steps/index.ts | 1 + .../bugc/src/optimizer/steps/inlining.test.ts | 144 +++++ packages/bugc/src/optimizer/steps/inlining.ts | 563 ++++++++++++++++++ 5 files changed, 797 insertions(+), 24 deletions(-) create mode 100644 packages/bugc/src/optimizer/steps/inlining.test.ts create mode 100644 packages/bugc/src/optimizer/steps/inlining.ts diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index 7891bf488..f0d9539aa 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -127,6 +127,22 @@ function countCallSites(program: Format.Program): CallSiteCounts { return counts; } +/** Count instructions carrying a `transform: ["inline"]` marker. */ +function countInline(program: Format.Program): number { + let n = 0; + for (const instr of program.instructions) { + if (!instr.context) continue; + if ( + unwrapLeaves(instr.context).some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ) + ) { + n += 1; + } + } + return n; +} + describe("optimizer preserves invoke/return contexts", () => { const allLevels: OptLevel[] = [0, 1, 2, 3]; @@ -148,11 +164,23 @@ code { r = add(10, 20); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - // One caller JUMP, one callee JUMPDEST, one - // continuation JUMPDEST — all naming "add". - expect(counts.invokeJump).toEqual({ add: 1 }); - expect(counts.invokeJumpdest).toEqual({ add: 1 }); - expect(counts.returnJumpdest).toEqual({ add: 1 }); + if (level >= 2) { + // `add` is a leaf single-return helper: inlining (L2+) + // replaces the real call with a virtual inline + // activation, so there's no caller JUMP for `add`. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + // One caller JUMP, one callee JUMPDEST, one + // continuation JUMPDEST — all naming "add". + expect(counts.invokeJump).toEqual({ add: 1 }); + expect(counts.invokeJumpdest).toEqual({ add: 1 }); + expect(counts.returnJumpdest).toEqual({ add: 1 }); + } // Behavior is still correct. const result = await executeProgram(source, { @@ -185,9 +213,20 @@ code { r = add(2 + 3, 4 * 5); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ add: 1 }); - expect(counts.invokeJumpdest).toEqual({ add: 1 }); - expect(counts.returnJumpdest).toEqual({ add: 1 }); + if (level >= 2) { + // `add` inlined at L2+ — virtual inline activation, no + // real caller JUMP. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ add: 1 }); + expect(counts.invokeJumpdest).toEqual({ add: 1 }); + expect(counts.returnJumpdest).toEqual({ add: 1 }); + } const result = await executeProgram(source, { calldata: "", @@ -224,9 +263,20 @@ code { const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ dbl: 2 }); - expect(counts.invokeJumpdest).toEqual({ dbl: 1 }); - expect(counts.returnJumpdest).toEqual({ dbl: 2 }); + if (level >= 2) { + // Both `dbl` sites are inlined (leaf single-return) into + // separate virtual activations; no real caller JUMPs. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ dbl: 2 }); + expect(counts.invokeJumpdest).toEqual({ dbl: 1 }); + expect(counts.returnJumpdest).toEqual({ dbl: 2 }); + } const result = await executeProgram(source, { calldata: "", @@ -349,18 +399,28 @@ code { r = addThree(1, 2, 3); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ - addThree: 1, - add: 2, - }); - expect(counts.invokeJumpdest).toEqual({ - addThree: 1, - add: 1, - }); - expect(counts.returnJumpdest).toEqual({ - addThree: 1, - add: 2, - }); + if (level >= 2) { + // `add` (leaf) inlines into `addThree` at both sites; + // that makes `addThree` itself a leaf, so on a later + // fixpoint iteration it inlines into `main` too. End + // state: no real caller JUMPs — everything is inline + // activations. + expect(counts.invokeJump).toEqual({}); + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ + addThree: 1, + add: 2, + }); + expect(counts.invokeJumpdest).toEqual({ + addThree: 1, + add: 1, + }); + expect(counts.returnJumpdest).toEqual({ + addThree: 1, + add: 2, + }); + } const result = await executeProgram(source, { calldata: "", diff --git a/packages/bugc/src/optimizer/simple-optimizer.ts b/packages/bugc/src/optimizer/simple-optimizer.ts index b078c9638..7c64642d7 100644 --- a/packages/bugc/src/optimizer/simple-optimizer.ts +++ b/packages/bugc/src/optimizer/simple-optimizer.ts @@ -14,6 +14,7 @@ import { ReturnMergingStep, ReadWriteMergingStep, TailCallOptimizationStep, + InliningStep, } from "./steps/index.js"; /** @@ -58,9 +59,13 @@ function createOptimizationPipeline(level: number): OptimizationStep[] { ); } - // Level 2: Add CSE, tail call optimization, and jump optimization + // Level 2: Add inlining, CSE, tail call optimization, and + // jump optimization. Inlining runs first (after L1 fold) so + // TCO/CSE still apply to inlined code and `["fold","inline"]` + // composes. if (level >= 2) { steps.push( + new InliningStep(), new CommonSubexpressionEliminationStep(), new TailCallOptimizationStep(), new JumpOptimizationStep(), diff --git a/packages/bugc/src/optimizer/steps/index.ts b/packages/bugc/src/optimizer/steps/index.ts index 75a02833f..640bac63c 100644 --- a/packages/bugc/src/optimizer/steps/index.ts +++ b/packages/bugc/src/optimizer/steps/index.ts @@ -7,3 +7,4 @@ export { BlockMergingStep } from "./block-merging.js"; export { ReturnMergingStep } from "./return-merging.js"; export { ReadWriteMergingStep } from "./read-write-merging.js"; export { TailCallOptimizationStep } from "./tail-call-optimization.js"; +export { InliningStep } from "./inlining.js"; diff --git a/packages/bugc/src/optimizer/steps/inlining.test.ts b/packages/bugc/src/optimizer/steps/inlining.test.ts new file mode 100644 index 000000000..ecd49bcc4 --- /dev/null +++ b/packages/bugc/src/optimizer/steps/inlining.test.ts @@ -0,0 +1,144 @@ +/** + * Behavioral tests for the function-inlining pass (level 2). + * + * Inlining must (a) preserve runtime behavior exactly, and + * (b) emit `transform: ["inline"]` on the inlined body so the + * debugger can reconstruct a virtual activation for the call. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import { executeProgram } from "#test/evm/behavioral"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +async function inlineMarks(source: string, level: 0 | 1 | 2 | 3) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + "compile failed:\n" + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + let count = 0; + for (const instr of result.value.bytecode.runtimeInstructions) { + const ctx = instr.debug?.context; + if (!ctx) continue; + if ( + [ctx, ...leaves(ctx)].some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ) + ) { + count += 1; + } + } + return count; +} + +describe("function inlining (level 2)", () => { + describe("leaf helper, single return", () => { + const source = `name Demo; +define { + function add(a: uint256, b: uint256) -> uint256 { return a + b; }; +} +storage { [0] r: uint256; } +create {} +code { r = add(3, 4); }`; + + it("produces the same result at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(7n); + } + }); + + it("emits no inline marks at level 0", async () => { + expect(await inlineMarks(source, 0)).toBe(0); + }); + + it("emits inline marks at level 2", async () => { + expect(await inlineMarks(source, 2)).toBeGreaterThan(0); + }); + }); + + describe("multiple call sites", () => { + const source = `name Multi; +define { + function dbl(x: uint256) -> uint256 { return x + x; }; +} +storage { [0] r: uint256; } +create { r = 0; } +code { + let a = dbl(5); + let b = dbl(10); + r = a + b; +}`; + + it("inlines every site and stays correct", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(30n); + } + expect(await inlineMarks(source, 2)).toBeGreaterThan(0); + }); + }); + + describe("does not inline into a tail-recursive function (protects TCO)", () => { + // `succ` is a leaf, but inlining it into `count`'s recursive + // call arguments would rewrite `count(succ(n))` into + // `count(n + 1)`, which the tail-call optimizer mishandles. + // The pass must leave recursive/TCO'd callers untouched. + const source = `name TailCall; +define { + function succ(n: uint256) -> uint256 { return n + 1; }; + function count(n: uint256, target: uint256) -> uint256 { + if (n < target) { return count(succ(n), target); } + else { return n; } + }; +} +storage { [0] r: uint256; } +create { r = 0; } +code { r = count(0, 5); }`; + + it("stays correct at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(5n); + } + }); + + it("does not inline succ into the recursive count", async () => { + // No inline markers: succ stays a real call so TCO can fire. + expect(await inlineMarks(source, 2)).toBe(0); + }); + }); +}); diff --git a/packages/bugc/src/optimizer/steps/inlining.ts b/packages/bugc/src/optimizer/steps/inlining.ts new file mode 100644 index 000000000..1da050396 --- /dev/null +++ b/packages/bugc/src/optimizer/steps/inlining.ts @@ -0,0 +1,563 @@ +/** + * Function inlining (level 2). + * + * Replaces calls to eligible internal functions with a copy of + * the callee's body spliced into the caller, so no runtime + * JUMP/frame is used. Each inlined instruction is annotated with + * `transform: ["inline"]` and the body is bracketed by a virtual + * invoke/return (identity + declaration, no code target — the + * #213 optional-target signal) so a debugger can reconstruct a + * virtual activation. + * + * v1 eligibility: internal (user-defined), non-recursive callee + * that is either a leaf (calls nothing) or below a small size + * threshold. Applied at all call sites; a callee whose every + * site is inlined is deleted. + */ +import * as Ir from "#ir"; +import type * as Format from "@ethdebug/format"; + +import { + BaseOptimizationStep, + type OptimizationContext, +} from "../optimizer.js"; + +/** Max IR-node count for a non-leaf callee to still inline. Tunable. */ +const INLINE_MAX_IR_NODES = 16; + +export class InliningStep extends BaseOptimizationStep { + name = "inlining"; + private siteCounter = 0; + + run(module: Ir.Module, _context: OptimizationContext): Ir.Module { + const optimized = this.cloneModule(module); + if (!optimized.functions || optimized.functions.size === 0) { + return optimized; + } + + const callGraph = buildCallGraph(optimized); + const eligible = new Set(); + for (const [name, fn] of optimized.functions) { + if (isEligible(name, fn, callGraph)) eligible.add(name); + } + if (eligible.size === 0) return optimized; + + // Track, per callee, how many sites remain un-inlined so we + // can delete fully-inlined callees afterward. + const remainingSites = new Map(); + for (const name of eligible) remainingSites.set(name, 0); + + // Named callers. Self-recursive callers are skipped: they are + // TailCall-optimized later, and inlining a helper into a + // self-recursive call's arguments (e.g. `count(succ(n))` -> + // `count(n + 1)`) rewrites the tail call into a computed-arg + // form that TCO mishandles, silently breaking the recursion. + // Correctness over coverage — inlining into recursive bodies is + // deferred. + const named: [string, Ir.Function][] = [ + ["
", optimized.main], + ...(optimized.create + ? ([["", optimized.create]] as [string, Ir.Function][]) + : []), + ...[...optimized.functions].map( + ([n, f]) => [n, f] as [string, Ir.Function], + ), + ]; + + for (const [callerName, caller] of named) { + // Self-recursive (pre-TCO) or already TCO'd (post-TCO, its + // self-call is now a jump-with-tailCall so the call graph no + // longer shows the recursion). Either way, don't inline into + // it. + if (reachableCallees(callerName, callGraph).has(callerName)) continue; + if (hasTailCallBackedge(caller)) continue; + this.inlineIntoFunction(caller, optimized, eligible, remainingSites); + } + + // Delete callees that no longer have any call site anywhere. + for (const name of eligible) { + if (!isCalledAnywhere(name, optimized)) { + optimized.functions.delete(name); + } + } + + return optimized; + } + + private inlineIntoFunction( + caller: Ir.Function, + module: Ir.Module, + eligible: Set, + _remainingSites: Map, + ): void { + // Snapshot block ids up front; we mutate the map as we splice. + let changed = true; + // Guard against pathological loops. + let guard = 0; + while (changed && guard++ < 1000) { + changed = false; + for (const [blockId, block] of caller.blocks) { + const term = block.terminator; + if (term.kind !== "call") continue; + if (!eligible.has(term.function)) continue; + const callee = module.functions?.get(term.function); + if (!callee) continue; + // Don't inline a function into itself. + if (callee === caller) continue; + + this.spliceCall(caller, blockId, block, term, callee); + changed = true; + break; // block map mutated — restart the scan + } + } + } + + private spliceCall( + caller: Ir.Function, + callBlockId: string, + callBlock: Ir.Block, + call: Extract, + callee: Ir.Function, + ): void { + const site = this.siteCounter++; + const prefix = `inl${site}$`; + + // --- build rename maps --- + const blockRename = new Map(); + for (const id of callee.blocks.keys()) { + blockRename.set(id, prefix + id); + } + + // param temp id -> bound argument Value + const paramSubst = new Map(); + callee.parameters.forEach((p, i) => { + const arg = call.arguments[i]; + if (arg) paramSubst.set(p.tempId, arg); + }); + + // every non-param temp defined in the callee -> fresh id + const idRename = new Map(); + for (const b of callee.blocks.values()) { + for (const phi of b.phis ?? []) rename(phi.dest); + for (const inst of b.instructions) { + if ("dest" in inst && typeof inst.dest === "string") { + rename(inst.dest); + } + } + } + function rename(id: string): void { + if (paramSubst.has(id)) return; + if (!idRename.has(id)) idRename.set(id, prefix + id); + } + + const remapValue = (v: Ir.Value): Ir.Value => { + if (v.kind !== "temp") return v; + const sub = paramSubst.get(v.id); + if (sub) return sub; + const nid = idRename.get(v.id); + return nid ? { ...v, id: nid } : v; + }; + + // Declaration for the callee (for the virtual invoke/return). + const declaration = + callee.loc && callee.sourceId + ? { source: { id: callee.sourceId }, range: callee.loc } + : undefined; + + const inlineInvoke: Format.Program.Context.Invoke["invoke"] = { + jump: true, + identifier: callee.name, + ...(declaration ? { declaration } : {}), + // no `target` — JUMP is elided (virtual activation) + }; + const inlineReturn: Format.Program.Context.Return["return"] = { + identifier: callee.name, + ...(declaration ? { declaration } : {}), + }; + + const entryBlockId = blockRename.get(callee.entry)!; + const returnBlockIds: string[] = []; + + // --- clone + remap callee blocks --- + for (const [origId, origBlock] of callee.blocks) { + const newId = blockRename.get(origId)!; + const isEntry = origId === callee.entry; + + const instructions: Ir.Instruction[] = origBlock.instructions.map( + (inst, idx) => { + const cloned = remapInstruction(inst, remapValue, idRename); + // Mark every inlined instruction for membership. + cloned.operationDebug = addInlineMarker(cloned.operationDebug); + // Virtual invoke on the first instruction of the entry. + if (isEntry && idx === 0) { + cloned.operationDebug = mergeDiscriminator( + cloned.operationDebug, + "invoke", + inlineInvoke, + ); + } + return cloned; + }, + ); + + const phis: Ir.Block.Phi[] = (origBlock.phis ?? []).map((phi) => + remapPhi(phi, remapValue, idRename, blockRename), + ); + + let terminator: Ir.Block.Terminator; + const t = origBlock.terminator; + if (t.kind === "return") { + returnBlockIds.push(newId); + // Virtual return marker on the last body instruction of + // this block (or a synthetic carrier if the block is empty + // is not needed — return blocks always have ≥1 emitted + // instruction in practice; if empty, the marker rides the + // jump's debug below). + if (instructions.length > 0) { + const last = instructions[instructions.length - 1]; + last.operationDebug = mergeDiscriminator( + last.operationDebug, + "return", + inlineReturn, + ); + } + // return -> jump to the caller's continuation + terminator = { + kind: "jump", + target: call.continuation, + operationDebug: addInlineMarker( + mergeDiscriminator({}, "return", inlineReturn), + ), + }; + } else { + terminator = remapTerminator(t, remapValue, blockRename); + } + + caller.blocks.set(newId, { + id: newId, + phis, + instructions, + terminator, + predecessors: new Set(), + debug: origBlock.debug, + }); + } + + // --- wire the single return value into the caller --- + // v1 eligibility guarantees exactly one return. Substitute the + // call's dest temp with the (remapped) returned value across the + // whole caller — no phi, so it's robust to L3 block-merging. + if (call.dest) { + const returns = collectReturns(callee, blockRename, remapValue); + if (returns.length === 1) { + substituteTemp(caller, call.dest, returns[0].value); + } + } + + // --- rewire the calling block: call -> jump into inlined entry --- + callBlock.terminator = { + kind: "jump", + target: entryBlockId, + operationDebug: call.operationDebug, + }; + + void callBlockId; + recomputePredecessors(caller); + } +} + +// ---- helpers ---- + +function collectReturns( + callee: Ir.Function, + blockRename: Map, + remapValue: (v: Ir.Value) => Ir.Value, +): { block: string; value: Ir.Value }[] { + const out: { block: string; value: Ir.Value }[] = []; + for (const [origId, b] of callee.blocks) { + if (b.terminator.kind === "return" && b.terminator.value) { + out.push({ + block: blockRename.get(origId)!, + value: remapValue(b.terminator.value), + }); + } + } + return out; +} + +function buildCallGraph(module: Ir.Module): Map> { + const graph = new Map>(); + const fns: [string, Ir.Function][] = [ + ["
", module.main], + ...(module.create + ? ([["", module.create]] as [string, Ir.Function][]) + : []), + ...[...(module.functions ?? new Map())].map( + ([n, f]) => [n, f] as [string, Ir.Function], + ), + ]; + for (const [name, fn] of fns) { + const callees = new Set(); + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "call") callees.add(b.terminator.function); + } + graph.set(name, callees); + } + return graph; +} + +function reachableCallees( + start: string, + graph: Map>, +): Set { + const seen = new Set(); + const stack = [...(graph.get(start) ?? [])]; + while (stack.length) { + const n = stack.pop()!; + if (seen.has(n)) continue; + seen.add(n); + for (const c of graph.get(n) ?? []) stack.push(c); + } + return seen; +} + +function functionSize(fn: Ir.Function): number { + let n = 0; + for (const b of fn.blocks.values()) { + n += b.instructions.length + 1; // + terminator + n += (b.phis ?? []).length; + } + return n; +} + +function hasTailCallBackedge(fn: Ir.Function): boolean { + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "jump" && b.terminator.tailCall) return true; + } + return false; +} + +function returnCount(fn: Ir.Function): number { + let n = 0; + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "return") n += 1; + } + return n; +} + +function isEligible( + name: string, + fn: Ir.Function, + graph: Map>, +): boolean { + const callees = graph.get(name) ?? new Set(); + // Non-recursive: name not reachable from itself. + if (reachableCallees(name, graph).has(name)) return false; + // v1: single return point only. Multi-return needs a phi at the + // continuation, which block-merging (L3) can turn into an + // invalid self-referential phi; deferred until that's handled. + if (returnCount(fn) !== 1) return false; + // Never inline a TCO-transformed function: after TailCall + // optimization a self-recursive function's back-edge becomes a + // `jump` with `tailCall`, which makes it look like a leaf + // single-return function on the next fixpoint iteration. Inlining + // it would clobber the tailcall showcase. A `tailCall` back-edge + // marks it as recursion, not a real leaf. + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "jump" && b.terminator.tailCall) return false; + } + // v1: leaf callees only. Inlining a non-leaf callee whose own + // (eligible) calls also inline exposes a dest-substitution + // ordering bug in the nested chain; deferred. The size-threshold + // branch is kept for when that lands. + const isLeaf = callees.size === 0; + const smallEnough = functionSize(fn) <= INLINE_MAX_IR_NODES; + void smallEnough; + return isLeaf; +} + +function isCalledAnywhere(name: string, module: Ir.Module): boolean { + const fns = [ + module.main, + ...(module.create ? [module.create] : []), + ...(module.functions?.values() ?? []), + ]; + for (const fn of fns) { + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "call" && b.terminator.function === name) { + return true; + } + } + } + return false; +} + +/** Deep-clone an instruction, remapping temp values and its dest. */ +function remapInstruction( + inst: Ir.Instruction, + remapValue: (v: Ir.Value) => Ir.Value, + idRename: Map, +): Ir.Instruction { + const cloned = structuredCloneValues(inst) as Ir.Instruction & { + dest?: string; + }; + remapValuesInPlace(cloned, remapValue); + if (typeof cloned.dest === "string") { + cloned.dest = idRename.get(cloned.dest) ?? cloned.dest; + } + return cloned; +} + +function remapPhi( + phi: Ir.Block.Phi, + remapValue: (v: Ir.Value) => Ir.Value, + idRename: Map, + blockRename: Map, +): Ir.Block.Phi { + const sources = new Map(); + for (const [pred, val] of phi.sources) { + sources.set(blockRename.get(pred) ?? pred, remapValue(val)); + } + return { + ...phi, + dest: idRename.get(phi.dest) ?? phi.dest, + sources, + }; +} + +function remapTerminator( + t: Ir.Block.Terminator, + remapValue: (v: Ir.Value) => Ir.Value, + blockRename: Map, +): Ir.Block.Terminator { + switch (t.kind) { + case "jump": + return { ...t, target: blockRename.get(t.target) ?? t.target }; + case "branch": + return { + ...t, + condition: remapValue(t.condition), + trueTarget: blockRename.get(t.trueTarget) ?? t.trueTarget, + falseTarget: blockRename.get(t.falseTarget) ?? t.falseTarget, + }; + case "return": + return { ...t, value: t.value ? remapValue(t.value) : undefined }; + case "call": + return { + ...t, + arguments: t.arguments.map(remapValue), + dest: t.dest, + continuation: blockRename.get(t.continuation) ?? t.continuation, + }; + } +} + +/** Deep clone that preserves nested plain objects and bigints. */ +function structuredCloneValues(obj: T): T { + return structuredClone(obj); +} + +/** Recursively rewrite any temp Value in place. */ +function remapValuesInPlace( + node: unknown, + remapValue: (v: Ir.Value) => Ir.Value, +): void { + if (!node || typeof node !== "object") return; + const obj = node as Record; + for (const key of Object.keys(obj)) { + const child = obj[key]; + if ( + child && + typeof child === "object" && + (child as { kind?: string }).kind === "temp" && + typeof (child as { id?: unknown }).id === "string" + ) { + obj[key] = remapValue(child as Ir.Value); + } else if (Array.isArray(child)) { + child.forEach((el, i) => { + if ( + el && + typeof el === "object" && + (el as { kind?: string }).kind === "temp" + ) { + child[i] = remapValue(el as Ir.Value); + } else { + remapValuesInPlace(el, remapValue); + } + }); + } else if (child && typeof child === "object") { + remapValuesInPlace(child, remapValue); + } + } +} + +/** Replace every use of temp `id` with `value` across the function. */ +function substituteTemp(fn: Ir.Function, id: string, value: Ir.Value): void { + const sub = (v: Ir.Value): Ir.Value => + v.kind === "temp" && v.id === id ? value : v; + for (const b of fn.blocks.values()) { + for (const inst of b.instructions) { + remapValuesInPlace(inst, sub); + } + for (const phi of b.phis ?? []) { + for (const [pred, val] of phi.sources) { + phi.sources.set(pred, sub(val)); + } + } + b.terminator = remapTerminator(b.terminator, sub, new Map()); + } +} + +function recomputePredecessors(fn: Ir.Function): void { + for (const b of fn.blocks.values()) b.predecessors = new Set(); + for (const [id, b] of fn.blocks) { + const t = b.terminator; + const targets: string[] = + t.kind === "jump" + ? [t.target] + : t.kind === "branch" + ? [t.trueTarget, t.falseTarget] + : t.kind === "call" + ? [t.continuation] + : []; + for (const tgt of targets) { + fn.blocks.get(tgt)?.predecessors.add(id); + } + } +} + +// ---- debug-context composition ---- + +function addInlineMarker( + debug: Ir.Instruction.Debug | undefined, +): Ir.Instruction.Debug { + return Ir.Utils.addTransform(debug, "inline"); +} + +/** + * Attach a discriminator (invoke/return) as a flat sibling key on + * a debug context, threading into a gather leaf if present so the + * marker never sits as a sibling of `gather`. + */ +function mergeDiscriminator( + debug: Ir.Instruction.Debug, + key: "invoke" | "return", + value: unknown, +): Ir.Instruction.Debug { + const existing = debug.context as Record | undefined; + if (existing && "gather" in existing && Array.isArray(existing.gather)) { + // Add as a new gather child rather than a sibling of gather. + return { + context: { + ...existing, + gather: [...(existing.gather as unknown[]), { [key]: value }], + } as Format.Program.Context, + }; + } + return { + context: { + ...(existing ?? {}), + [key]: value, + } as Format.Program.Context, + }; +} From c03679ccdfab0f9da2e5af4a13b6228cf515cb2f Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 20:34:36 -0400 Subject: [PATCH 15/21] format: clarify inline activation reconstruction (push/pop vs membership) (#233) Amend the invoke spec's "Reconstructing activations" section to resolve an ambiguity #230's inline pass exposed: an inlined body's invoke/return markers must bracket the body (invoke on the first instruction, return on the last), never duplicate across interior instructions. Duplicated boundary markers push/pop spurious activations. - State the push/pop display semantics: invoke opens inclusive of its instruction, return closes after its instruction. - Require bracketed emission; permit the single-instruction body (entry==exit) to carry both invoke and return, processed push-then-pop. - Separate the two concerns explicitly: push/pop determines an activation's lifetime; membership determines which open activation an instruction belongs to. An activation stays open across non-member (interleaved caller) instructions. No schema change. --- .../spec/program/context/function/invoke.mdx | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/packages/web/spec/program/context/function/invoke.mdx b/packages/web/spec/program/context/function/invoke.mdx index 4e6d88bd3..7a6b71701 100644 --- a/packages/web/spec/program/context/function/invoke.mdx +++ b/packages/web/spec/program/context/function/invoke.mdx @@ -125,7 +125,23 @@ uniform whether or not the call was inlined: - **Push** an activation when an `invoke` context is encountered and **pop** it when the matching `return` context is - encountered, in trace order. + encountered, in trace order. The `invoke` opens the activation + inclusive of its instruction; the `return` closes it after its + instruction, so the instruction bearing `return` is still inside + the activation. + +Because push/pop is driven by where the `invoke` and `return` +contexts sit, a compiler must emit them as a **bracket**: the +`invoke` on the first instruction of the body and the `return` on +its last. A compiler must **not** duplicate `invoke` or `return` +across a body's interior instructions — repeating them would push +or pop spurious activations. The sole exception is a body that +compiles to a **single instruction**, whose entry and exit +coincide: that one instruction legitimately carries both `invoke` +and `return`, and a debugger processes them in order (push, then +pop). This bracket rule is what keeps the guarantee below — +that a debugger ignoring `transform` still sees a coherent +`invoke`/`return` pair — true for inlined calls. An inlined callee therefore appears on the call stack exactly as a non-inlined one does. Two kinds of activation differ only in how @@ -147,6 +163,12 @@ transform marker — **not** by whether `target` is present: ### Activation membership +Push/pop and membership answer two different questions. Push/pop +(above) determines **when** a virtual activation is open — its +lifetime on the call stack. Membership determines **which** open +activation a given instruction belongs to. The two are +independent, and a debugger uses both. + An instruction belongs to the innermost open virtual activation if and only if its context carries an `inline` identifier in its transform list — so composed markers such as `["inline", "fold"]` @@ -158,6 +180,14 @@ optimization passes may relocate or interleave an inlined body, so a positional "everything between the invoke and the return" rule would be unsound. +This is why membership is separate from lifetime. An activation +opened by an `invoke` stays open until its `return`, even across +instructions that are **not** its members — for example, caller +code an optimizer interleaved into the body's trace span. Such a +non-member instruction (no `inline` marker for that depth) is +attributed to the enclosing activation, not the inlined one, even +while the virtual activation remains on the stack. + ### Identity and values Every function-identity field (`identifier`, `declaration`, From 410362d2cde9db6c717fd2b4556add70f1a38147 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 20:49:22 -0400 Subject: [PATCH 16/21] web/programs-react: reconstruct inline virtual activations in tracer (#234) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inlined internal calls (level-2 `inline` transform) produced VIRTUAL activations that the call stack mis-rendered: because the extractor checks invoke before return, an inlined instruction's context (which carries both) always read as invoke, so buildCallStack pushed a frame per inline site and never popped — phantom `dbl > dbl` frames leaked to the end of the trace, indistinguishable from real calls. - CallInfo/CallFrame: add `isInline` (virtual activation tag). - extractCallInfoFromInstruction: set `isInline` from the `inline` transform marker (alongside the existing `isTailCall`). - buildCallStack: tag virtual frames; add a defensive per-instruction membership guard that tears down any trailing virtual frame once execution reaches an instruction without the `inline` marker, so a virtual activation can never leak into caller code. - Render an `⧉ inline` chip (dashed, italic) on the call-stack frame in both the docs TraceDrawer and the standalone CallStackDisplay, mirroring the tailcall chip — distinct from a real call. Verified end-to-end against real O2 bytecode: each inline site now shows one virtual `dbl` activation across its body and the stack returns to top level between/after sites (no phantom frames). --- .../src/components/CallStackDisplay.css | 10 +- .../src/components/CallStackDisplay.tsx | 8 + .../src/utils/mockTrace.test.ts | 163 ++++++++++++++++++ .../programs-react/src/utils/mockTrace.ts | 56 ++++-- .../src/theme/ProgramExample/TraceDrawer.css | 10 +- .../src/theme/ProgramExample/TraceDrawer.tsx | 8 + 6 files changed, 243 insertions(+), 12 deletions(-) diff --git a/packages/programs-react/src/components/CallStackDisplay.css b/packages/programs-react/src/components/CallStackDisplay.css index 90afee044..33790e6a2 100644 --- a/packages/programs-react/src/components/CallStackDisplay.css +++ b/packages/programs-react/src/components/CallStackDisplay.css @@ -49,7 +49,8 @@ color: var(--programs-text-muted, #888); } -.call-stack-tailcall { +.call-stack-tailcall, +.call-stack-inline { margin-left: 4px; padding: 0 5px; border-radius: 8px; @@ -60,3 +61,10 @@ color: var(--programs-transform-text, #8250df); border: 1px solid var(--programs-transform-accent, #a475f9); } + +/* Virtual (inline) activations use a dashed border to read as + "not a real frame" while sharing the transform palette. */ +.call-stack-inline { + border-style: dashed; + font-style: italic; +} diff --git a/packages/programs-react/src/components/CallStackDisplay.tsx b/packages/programs-react/src/components/CallStackDisplay.tsx index 983f75be1..f8dfc4b05 100644 --- a/packages/programs-react/src/components/CallStackDisplay.tsx +++ b/packages/programs-react/src/components/CallStackDisplay.tsx @@ -102,6 +102,14 @@ export function CallStackDisplay({ ⮌ tail call )} + {frame.isInline && ( + + ⧉ inline + + )} ))} diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts index 1d0bef30b..f9901e6a6 100644 --- a/packages/programs-react/src/utils/mockTrace.test.ts +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -188,3 +188,166 @@ describe("flat (production) TCO back-edge shape", () => { expect(stack.some((f) => f.isTailCall)).toBe(false); }); }); + +// Inlined internal calls (level-2 `inline` transform) produce +// VIRTUAL activations, not real ones. The compiler brackets an +// inlined body with a virtual invoke on the entry-first +// instruction and a virtual return on the exit-last instruction; +// every inlined instruction carries transform:["inline"]. The +// call stack must reconstruct the virtual frame, tag it, and +// tear it down when execution leaves the inlined body — so it +// reads distinctly from a real call and never leaks a phantom +// frame into caller code. +describe("inline virtual activations", () => { + const entryInvoke = { + code: { source: { id: "0" }, range: { offset: 0, length: 1 } }, + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + }; + const bodyMark = { + code: { source: { id: "0" }, range: { offset: 1, length: 1 } }, + transform: ["inline"], + }; + const exitReturn = { + code: { source: { id: "0" }, range: { offset: 2, length: 1 } }, + transform: ["inline"], + return: { identifier: "dbl" }, + }; + const callerMark = { + code: { source: { id: "0" }, range: { offset: 3, length: 1 } }, + }; + + describe("extractCallInfoFromInstruction inline flag", () => { + it("marks isInline on a virtual (inline) invoke", () => { + const info = extractCallInfoFromInstruction(instr(0, entryInvoke)); + expect(info?.kind).toBe("invoke"); + expect(info?.isInline).toBe(true); + }); + + it("marks isInline on a virtual (inline) return", () => { + const info = extractCallInfoFromInstruction(instr(0, exitReturn)); + expect(info?.kind).toBe("return"); + expect(info?.isInline).toBe(true); + }); + + it("leaves isInline falsy for a plain (real) invoke", () => { + const info = extractCallInfoFromInstruction( + instr(0, { invoke: { jump: true, identifier: "dbl" } }), + ); + expect(info?.isInline).toBeFalsy(); + }); + }); + + describe("buildCallStack virtual frame lifetime", () => { + // A single inlined body: entry / body / exit / caller. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // entry invoke → push virtual dbl + { pc: 1, opcode: "ADD" }, // inlined body instruction + { pc: 2, opcode: "MSTORE" }, // exit return → pop + { pc: 3, opcode: "JUMPDEST" }, // caller code (no inline marker) + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, bodyMark), + instr(2, exitReturn), + instr(3, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("pushes a virtual frame tagged isInline at the entry", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("dbl"); + expect(stack[0].isInline).toBe(true); + }); + + it("keeps the virtual frame open across the inlined body", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("pops the virtual frame at the exit return", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(0); + }); + + it("does not leak a phantom frame into caller code", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(0); + }); + }); + + describe("two gap-separated inline sites of the same helper", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // site 1 entry + { pc: 2, opcode: "MSTORE" }, // site 1 exit + { pc: 3, opcode: "JUMPDEST" }, // caller gap (no inline) + { pc: 10, opcode: "PUSH1" }, // site 2 entry + { pc: 12, opcode: "MSTORE" }, // site 2 exit + { pc: 13, opcode: "JUMPDEST" }, // caller + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(2, exitReturn), + instr(3, callerMark), + instr(10, entryInvoke), + instr(12, exitReturn), + instr(13, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows depth 1 while inside the second body", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("is empty after both sites — no accumulation", () => { + const stack = buildCallStack(trace, pcToInstruction, 5); + expect(stack).toHaveLength(0); + }); + }); + + describe("defensive membership guard", () => { + // A virtual invoke whose exit return never arrives (residual + // smear / dropped marker): the frame must still be torn down + // when execution reaches a non-inline caller instruction, + // rather than leaking to the end of the trace. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // virtual invoke → push + { pc: 1, opcode: "ADD" }, // still inside the body + { pc: 3, opcode: "JUMPDEST" }, // caller code, no inline marker + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, bodyMark), + instr(3, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps the frame while inline membership holds", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(1); + }); + + it("force-pops a stale virtual frame at a non-inline instr", () => { + expect(buildCallStack(trace, pcToInstruction, 2)).toHaveLength(0); + }); + }); + + it("leaves a real call frame's isInline falsy", () => { + const trace: TraceStep[] = [{ pc: 0, opcode: "JUMPDEST" }]; + const program = { + instructions: [instr(0, { invoke: { jump: true, identifier: "f" } })], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack[0].isInline).toBeFalsy(); + }); +}); diff --git a/packages/programs-react/src/utils/mockTrace.ts b/packages/programs-react/src/utils/mockTrace.ts index 9279c85b4..eb9c55581 100644 --- a/packages/programs-react/src/utils/mockTrace.ts +++ b/packages/programs-react/src/utils/mockTrace.ts @@ -125,6 +125,12 @@ export interface CallInfo { * (TCO), reusing the current frame rather than nesting. */ isTailCall?: boolean; + /** + * True when an `inline` transform is present on the same + * instruction — this invoke/return belongs to an inlined + * (virtual) activation, not a real call. + */ + isInline?: boolean; } /** @@ -178,10 +184,15 @@ export function extractCallInfoFromInstruction( if (!info) { return undefined; } - const isTailCall = extractTransformFromContext(instruction.context).includes( - "tailcall", - ); - return isTailCall ? { ...info, isTailCall: true } : info; + const transforms = extractTransformFromContext(instruction.context); + const decorated: CallInfo = { ...info }; + if (transforms.includes("tailcall")) { + decorated.isTailCall = true; + } + if (transforms.includes("inline")) { + decorated.isInline = true; + } + return decorated; } function extractCallInfoFromContext( @@ -329,6 +340,13 @@ export interface CallFrame { * (TCO). The frame was reused in place rather than nested. */ isTailCall?: boolean; + /** + * True when this frame is a VIRTUAL activation reconstructed + * from an inlined body (transform:["inline"]) rather than a + * real call. Its instructions were spliced into the caller; + * no JUMP occurred. + */ + isInline?: boolean; } /** @@ -360,12 +378,15 @@ export function buildCallStack( continue; } + // Per-instruction inline membership drives the defensive + // guard below: an inlined body's instructions all carry + // transform:["inline"], so a virtual frame is only valid + // while that marker holds. + const isInlineInstr = + extractTransformFromInstruction(instruction).includes("inline"); const callInfo = extractCallInfoFromInstruction(instruction); - if (!callInfo) { - continue; - } - if (callInfo.isTailCall) { + if (callInfo?.isTailCall) { // A TCO back-edge carries both return and invoke on a // single instruction: the previous iteration returns // and the next iteration is invoked, reusing the same @@ -393,7 +414,7 @@ export function buildCallStack( continue; } - if (callInfo.kind === "invoke") { + if (callInfo?.kind === "invoke") { // The compiler emits invoke on both the caller JUMP // and callee entry JUMPDEST for the same call. These // occur on consecutive trace steps. Only skip if the @@ -423,14 +444,29 @@ export function buildCallStack( callType: callInfo.callType, argumentNames: argResult?.names, argumentPointers: argResult?.pointers, + // Tag virtual activations so the widget can render + // them distinctly from real calls. + ...(callInfo.isInline ? { isInline: true } : {}), }); } - } else if (callInfo.kind === "return" || callInfo.kind === "revert") { + } else if (callInfo?.kind === "return" || callInfo?.kind === "revert") { // Pop the matching frame if (stack.length > 0) { stack.pop(); } } + + // Defensive membership guard: a virtual (inline) frame must + // not stay open once execution leaves the inlined body. If + // the current instruction carries no inline marker, tear down + // any trailing virtual frames — belt-and-suspenders against a + // dropped or incomplete virtual return so a phantom activation + // can never leak into caller code. + if (!isInlineInstr) { + while (stack.length > 0 && stack[stack.length - 1].isInline) { + stack.pop(); + } + } } return stack; diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.css b/packages/web/src/theme/ProgramExample/TraceDrawer.css index d320e269e..401a318f1 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.css +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.css @@ -267,7 +267,8 @@ border-left: 3px solid #8250df; } -.call-stack-tailcall { +.call-stack-tailcall, +.call-stack-inline { margin-left: 4px; padding: 0 5px; border-radius: 8px; @@ -279,6 +280,13 @@ border: 1px solid rgba(130, 80, 223, 0.45); } +/* Virtual (inline) activations read as "not a real frame": dashed + border + italic, sharing the transform purple tint. */ +.call-stack-inline { + border-style: dashed; + font-style: italic; +} + /* Trace panels */ .trace-panels { display: grid; diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx index cf68e4d71..78023423b 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx @@ -618,6 +618,14 @@ function TraceDrawerContent(): JSX.Element { ⮌ tail call )} + {frame.isInline && ( + + ⧉ inline + + )} )) From ea80a411aa1b44cb06db2c3335087566a5404a03 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 21:08:41 -0400 Subject: [PATCH 17/21] bugc: bracket inlined invoke/return to boundary ops in evmgen (#235) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bugc: bracket inlined invoke/return to boundary ops in evmgen Inlined virtual activations mis-rendered in the tracer: the call stack never popped (phantom frames). Root cause was in evmgen lowering, not the optimizer. An IR instruction lowers to N EVM micro-ops, and the generic lowering attaches that instruction's whole operationDebug to every op. That is right for source/variables/transform context (all N ops map to the instruction) but wrong for invoke/return, which are positional activation boundaries: invoke marks one push point, return one pop point. Broadcasting them across the whole op-run makes push/pop reconstruction see every op as both a push and a pop. The optimizer IR is already correctly bracketed (verified per-pass: invoke/return counts are stable through inlining + all L2/L3 passes); the smear appears only after lowering. Real calls are unaffected because their invoke/return ride single-op JUMP/JUMPDEST terminators — only inlined activations put them on multi-op compute instructions. Fix (bracket-activation.ts, wired into block generation): for the ops emitted by one instruction, keep invoke on only the first op and return on only the last op, stripped from the interior; transform markers and source/variables stay on all ops. General evmgen invariant, not inline-specific — a no-op for single-op terminators. Reaches invoke/return nested in pick/gather composites, not just flat leaves. Tests: new inline-bracket.test.ts asserts one push + one pop per site on dbl@2-sites (both=0), invoke-before-return ordering, entry vs exit placement on a multi-instruction body, and unchanged runtime behavior at O0-O3. Full suite green (428 passed). * bugc: add tailcall + real-call no-op guards to bracket test Architect's requested safety net for the general invoke/return bracketing invariant: assert it stays a no-op where invoke/return ride single-op carriers. - tailcall back-edge (tailRecursiveSum, O2): the combined invoke+return on the one back-edge JUMP survives (both>=1). - mutually recursive real calls (never inlined): no op carries both, and invoke/return are still present (real call tracing intact). --- packages/bugc/src/evmgen/generation/block.ts | 27 ++- .../evmgen/generation/bracket-activation.ts | 164 +++++++++++++++ .../bugc/src/evmgen/inline-bracket.test.ts | 192 ++++++++++++++++++ 3 files changed, 381 insertions(+), 2 deletions(-) create mode 100644 packages/bugc/src/evmgen/generation/bracket-activation.ts create mode 100644 packages/bugc/src/evmgen/inline-bracket.test.ts diff --git a/packages/bugc/src/evmgen/generation/block.ts b/packages/bugc/src/evmgen/generation/block.ts index e23460dfb..632ee132a 100644 --- a/packages/bugc/src/evmgen/generation/block.ts +++ b/packages/bugc/src/evmgen/generation/block.ts @@ -14,6 +14,7 @@ import { Memory } from "#evmgen/analysis"; import { calculateSize } from "#evmgen/serialize"; import * as Instruction from "./instruction.js"; +import { bracketActivation, carriesActivation } from "./bracket-activation.js"; import { loadValue } from "./values/index.js"; import { generateTerminator, @@ -161,9 +162,31 @@ export function generate( // the runtime predecessor differs from the layout-order // predecessor. - // Process regular instructions + // Process regular instructions. Invoke/return activation + // discriminators must be bracketed to the first/last emitted op + // of the instruction (see bracket-activation.ts); everything else + // (source mapping, variables, transform markers) rides all ops. for (const inst of block.instructions) { - result = result.then(Instruction.generate(inst)); + const gen = Instruction.generate(inst); + const operationCtx = inst.operationDebug?.context; + if ( + !carriesActivation(operationCtx, "invoke") && + !carriesActivation(operationCtx, "return") + ) { + result = result.then(gen); + continue; + } + result = result.peek((state, builder) => { + const start = state.instructions.length; + return builder.then(gen).then((s) => ({ + ...s, + instructions: bracketActivation( + s.instructions, + start, + operationCtx, + ), + })); + }); } // Emit phi copies for successor blocks before the diff --git a/packages/bugc/src/evmgen/generation/bracket-activation.ts b/packages/bugc/src/evmgen/generation/bracket-activation.ts new file mode 100644 index 000000000..653c2ffc8 --- /dev/null +++ b/packages/bugc/src/evmgen/generation/bracket-activation.ts @@ -0,0 +1,164 @@ +/** + * Bracket invoke/return activation discriminators onto the boundary + * ops of an IR instruction's emitted op-run. + * + * A single IR instruction lowers to N EVM micro-ops, and the generic + * lowering attaches that instruction's whole `operationDebug` (source + * mapping, variables, transform markers, AND any invoke/return + * discriminators) to every one of those ops. That is correct for + * source/variable/transform context — a debugger wants all N ops + * mapped to the instruction — but WRONG for invoke/return: those are + * positional activation boundaries. An `invoke` marks a single push + * point; a `return` a single pop point. Broadcasting them across the + * whole op-run makes a push/pop reconstruction see every op as both a + * push and a pop. + * + * This module de-smears: for the ops emitted by one instruction, the + * `invoke` discriminator is kept on only the FIRST op, `return` on only + * the LAST op, and stripped from the interior. The `transform` + * membership markers (and source/variables) stay on every op. + * + * It is a general evmgen invariant, not inline-specific: it is a no-op + * for real calls (whose invoke/return already ride single-op JUMP / + * JUMPDEST terminators) and fires only when invoke/return happen to + * ride a multi-op instruction — which today is inlined virtual + * activations. + */ +import type * as Format from "@ethdebug/format"; +import type * as Evm from "#evm"; + +type Ctx = Format.Program.Context; +type Activation = "invoke" | "return"; + +function isPick(ctx: Ctx): ctx is Ctx & { pick: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "pick" in ctx && + Array.isArray((ctx as { pick: unknown }).pick) + ); +} + +function isGather(ctx: Ctx): ctx is Ctx & { gather: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "gather" in ctx && + Array.isArray((ctx as { gather: unknown }).gather) + ); +} + +/** Whether ctx carries the given activation key anywhere, reaching + * into pick/gather composites. */ +export function carriesActivation( + ctx: Ctx | undefined, + key: Activation, +): boolean { + if (!ctx || typeof ctx !== "object") return false; + if (isPick(ctx)) return ctx.pick.some((c) => carriesActivation(c, key)); + if (isGather(ctx)) return ctx.gather.some((c) => carriesActivation(c, key)); + return key in ctx; +} + +/** The first activation value found for the given key, reaching into + * pick/gather composites. */ +function findActivation(ctx: Ctx | undefined, key: Activation): unknown { + if (!ctx || typeof ctx !== "object") return undefined; + if (isPick(ctx)) { + for (const c of ctx.pick) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + if (isGather(ctx)) { + for (const c of ctx.gather) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + return (ctx as Record)[key]; +} + +/** Remove invoke and return discriminators anywhere in ctx, reaching + * into pick/gather composites. Returns undefined if nothing remains. */ +export function stripActivation(ctx: Ctx | undefined): Ctx | undefined { + if (!ctx || typeof ctx !== "object") return ctx; + if (isPick(ctx)) { + const kids = ctx.pick + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { pick: kids } as Ctx; + } + if (isGather(ctx)) { + const kids = ctx.gather + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { gather: kids } as Ctx; + } + const rest = { ...(ctx as Record) }; + delete rest.invoke; + delete rest.return; + return Object.keys(rest).length > 0 ? (rest as Ctx) : undefined; +} + +/** Attach an activation discriminator, composing it as a flat sibling + * key on a leaf context (per the flat-composition convention), or + * appending it to a pick/gather composite. */ +function attachActivation( + ctx: Ctx | undefined, + key: Activation, + value: unknown, +): Ctx { + const marker = { [key]: value } as Ctx; + if (!ctx || typeof ctx !== "object") return marker; + if (isPick(ctx)) return { pick: [...ctx.pick, marker] } as Ctx; + if (isGather(ctx)) return { gather: [...ctx.gather, marker] } as Ctx; + return { ...(ctx as Record), [key]: value } as Ctx; +} + +/** + * Rewrite the ops emitted by one IR instruction (the tail slice + * `instructions[start..]`) so invoke rides only the first op and + * return only the last op, using the discriminators found on the + * instruction's `operationDebug` context. No-op unless that context + * carries invoke and/or return, so it never touches ordinary code. + */ +export function bracketActivation( + instructions: Evm.Instruction[], + start: number, + operationCtx: Ctx | undefined, +): Evm.Instruction[] { + const end = instructions.length; // exclusive + if (end <= start) return instructions; + + const hasInvoke = carriesActivation(operationCtx, "invoke"); + const hasReturn = carriesActivation(operationCtx, "return"); + if (!hasInvoke && !hasReturn) return instructions; + + const invokeValue = hasInvoke + ? findActivation(operationCtx, "invoke") + : undefined; + const returnValue = hasReturn + ? findActivation(operationCtx, "return") + : undefined; + + const out = instructions.slice(); + for (let i = start; i < end; i++) { + const op = out[i]; + let ctx = stripActivation(op.debug?.context); + if (hasInvoke && i === start) { + ctx = attachActivation(ctx, "invoke", invokeValue); + } + if (hasReturn && i === end - 1) { + ctx = attachActivation(ctx, "return", returnValue); + } + out[i] = { ...op, debug: { ...op.debug, context: ctx } }; + } + return out; +} diff --git a/packages/bugc/src/evmgen/inline-bracket.test.ts b/packages/bugc/src/evmgen/inline-bracket.test.ts new file mode 100644 index 000000000..6583bcfed --- /dev/null +++ b/packages/bugc/src/evmgen/inline-bracket.test.ts @@ -0,0 +1,192 @@ +/** + * Verifies that inlined virtual-activation invoke/return contexts are + * BRACKETED on the emitted bytecode, not smeared across every op. + * + * An IR instruction lowers to N EVM micro-ops. evmgen must attach the + * `invoke` discriminator to only the FIRST emitted op of the + * invoke-bearing instruction and the `return` discriminator to only the + * LAST emitted op of the return-bearing instruction, while keeping the + * `transform: ["inline"]` membership marker on ALL body ops. + * + * Without bracketing, the tracer's push/pop reconstruction sees every + * body op as both a push and a pop -> phantom frames. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import { executeProgram } from "#test/evm/behavioral"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +type OptLevel = 0 | 1 | 2 | 3; + +async function runtimeInstructions(source: string, level: OptLevel) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + `Compilation failed at level ${level}:\n` + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + return result.value.bytecode.runtimeInstructions; +} + +/** Flatten a context into leaves, unwrapping gather/pick. */ +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +/** Per-op discriminator/marker presence, reaching nested pick/gather. */ +function flags(instr: { debug?: { context?: Format.Program.Context } }) { + const ctx = instr.debug?.context; + if (!ctx) return { invoke: false, return: false, inline: false }; + const all = [ctx, ...leaves(ctx)]; + return { + invoke: all.some((c) => Context.isInvoke(c)), + return: all.some((c) => Context.isReturn(c)), + inline: all.some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ), + }; +} + +function tally(instrs: ReturnType[]) { + let invoke = 0, + ret = 0, + both = 0, + inline = 0; + for (const f of instrs) { + if (f.invoke) invoke += 1; + if (f.return) ret += 1; + if (f.invoke && f.return) both += 1; + if (f.inline) inline += 1; + } + return { invoke, ret, both, inline }; +} + +// The exact fixture the UI reported mis-rendering: a leaf helper +// inlined at two sites. +const dblTwoSites = `name Multi; +define { function dbl(x: uint256) -> uint256 { return x + x; }; } +storage { [0] r: uint256; } +create { r = 0; } +code { + let a = dbl(5); + let b = dbl(10); + r = a + b; +}`; + +// A multi-instruction body: entry (t = x + x) differs from exit +// (t * x), so invoke and return live on distinct IR instructions. +const multiInstrBody = `name Poly; +define { function poly(x: uint256) -> uint256 { let t = x + x; return t * x; }; } +storage { [0] a: uint256; [1] r: uint256; } +create { a = 3; r = 0; } +code { r = poly(a); }`; + +describe("inlined invoke/return are bracketed on emitted bytecode", () => { + it("dbl@2-sites: one push and one pop per site, never both on an op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const t = tally(instrs.map(flags)); + // Two inlined sites => exactly one invoke op and one return op each. + expect(t.invoke).toBe(2); + expect(t.ret).toBe(2); + // No op may be both a push and a pop (that breaks push/pop). + expect(t.both).toBe(0); + // Membership marker stays on every body op (more than the 4 + // boundary ops). + expect(t.inline).toBeGreaterThan(4); + }); + + it("dbl@2-sites: each site's invoke op precedes its return op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const seq = instrs + .map((instr, i) => ({ i, f: flags(instr) })) + .filter(({ f }) => f.invoke || f.return) + .map(({ f }) => (f.invoke ? "invoke" : "return")); + // Bracketed order across two sites: push,pop,push,pop. + expect(seq).toEqual(["invoke", "return", "invoke", "return"]); + }); + + it("multi-instruction body: invoke on entry, return on exit, both=0", async () => { + const instrs = await runtimeInstructions(multiInstrBody, 2); + const t = tally(instrs.map(flags)); + expect(t.invoke).toBe(1); + expect(t.ret).toBe(1); + expect(t.both).toBe(0); + }); + + it("preserves runtime behavior at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(dblTwoSites, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + // dbl(5)=10, dbl(10)=20, r=30 + expect(await res.getStorage(0n)).toBe(30n); + } + }); +}); + +// A self-tail-recursive accumulator: TCO turns the recursive call +// into a single back-edge JUMP that legitimately carries BOTH invoke +// and return on its one op (end one iteration + begin the next). +const tailRecursive = `name TailSum; +define { + function sum(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return sum(n - 1, acc + n); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = sum(5, 0); }`; + +// Mutually recursive functions never inline, so their calls stay real +// (invoke on a 1-op JUMP, return on a 1-op JUMPDEST). +const mutualRecursion = `name EvenOdd; +define { + function isEven(n: uint256) -> uint256 { + if (n == 0) { return 1; } else { return isOdd(n - 1); } + }; + function isOdd(n: uint256) -> uint256 { + if (n == 0) { return 0; } else { return isEven(n - 1); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = isEven(4); }`; + +describe("bracketing is a no-op for single-op invoke/return carriers", () => { + it("tailcall back-edge keeps its combined invoke+return on one op", async () => { + // The back-edge JUMP is a single op carrying both markers; bracketing + // to first-op/last-op is first==last, so both must survive. + const instrs = await runtimeInstructions(tailRecursive, 2); + const t = tally(instrs.map(flags)); + expect(t.both).toBeGreaterThanOrEqual(1); + }); + + it("real (non-inlined) calls never carry both on an op", async () => { + const instrs = await runtimeInstructions(mutualRecursion, 2); + const t = tally(instrs.map(flags)); + // Real calls put invoke on a 1-op JUMP and return on a 1-op JUMPDEST, + // distinct ops — the fix must not fabricate a both. + expect(t.both).toBe(0); + expect(t.invoke).toBeGreaterThan(0); + expect(t.ret).toBeGreaterThan(0); + }); +}); From 1e909351b2aedba4e8b8042c50b30a046eb80e74 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 21:15:39 -0400 Subject: [PATCH 18/21] format: define context `name` as a referenceable identifier (activation correlation) (#236) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * format: define context `name` as a referenceable identifier Reframe the `name` context from a "disambiguation label" to its intended purpose: a machine-generated identifier for a context that other contexts reference by name, unique within a program. State the per-program uniqueness requirement. Apply it to activation reconstruction: an `invoke` context declares an activation's name; the matching `return` and the body's member instructions reference it. This pairs a call with its return order-independently — resolving the two cases push/pop + the inline marker alone cannot: adjacent same-function activations and reordered/interleaved bodies. When present, names are authoritative for activation structure; push/pop + marker-count are the name-less fallback. - name.schema.yaml: rewrite description (purpose + uniqueness); examples. - name.mdx: frame `name` as the referenceable-context-name primitive, then pick-selection and invoke/return correlation as applications; note gather carries distinct co-located names (tailcall). - invoke.mdx: add "Correlating with `name`" to Reconstructing activations (Layer 2 authoritative when present). No schema structure change. * format: sharpen name uniqueness wording (declared once, referenced freely) Per writer review of #236: 'a name must be unique' misread as 'appears once', but a name deliberately recurs — the invoke declares it, the return and every body instruction reference it. State the requirement on the declaration: each name is declared by exactly one context; other contexts reference it freely. Mirror the wording across name.schema.yaml, name.mdx, and invoke.mdx. Plus prose polish (marker-run phrasing, pick handle). --- .../spec/program/context/function/invoke.mdx | 37 ++++++++++++ packages/web/spec/program/context/name.mdx | 57 +++++++++++++++++-- schemas/program/context/name.schema.yaml | 39 +++++++++---- 3 files changed, 119 insertions(+), 14 deletions(-) diff --git a/packages/web/spec/program/context/function/invoke.mdx b/packages/web/spec/program/context/function/invoke.mdx index 7a6b71701..76da58aff 100644 --- a/packages/web/spec/program/context/function/invoke.mdx +++ b/packages/web/spec/program/context/function/invoke.mdx @@ -188,6 +188,43 @@ non-member instruction (no `inline` marker for that depth) is attributed to the enclosing activation, not the inlined one, even while the virtual activation remains on the stack. +### Correlating with `name` + +The push/pop and membership rules above reconstruct activations +without any correlation identifier: they rely on `invoke`/`return` +appearing in a well-nested order and on the `inline` marker to +attribute instructions. That is sufficient for typical compiler +output. It has two blind spots, both arising because the marker +alone can't tell one activation from another of the same function: + +- **Adjacent activations of the same function.** Two inlined + copies of the same callee placed back-to-back, with no + intervening caller instruction, read as one activation to a + debugger that groups a consecutive run of `inline`-marked + instructions. +- **Reordered or interleaved bodies.** An optimizer that moves a + `return` marker ahead of its `invoke`, or interleaves two + activations non-nested, defeats strict push/pop pairing. + +A [`name`](/spec/program/context/name) closes both. When an +`invoke` context carries a `name`, it **declares** that activation; +the matching `return`, and the body instructions that belong to it, +carry the same `name` to **reference** it. Because each name is +declared by exactly one `invoke`, the pairing is explicit and +order-independent: adjacent same-function activations have distinct +names, and a reference resolves to its declaration regardless of +trace order. + +When names are present they are **authoritative** for activation +structure — which `invoke` pairs with which `return`, and which +instructions belong to which activation. Push/pop, the `inline` +marker, and the marker-count depth remain the fallback a name-less +debugger uses; in well-nested output the two agree. Where they +cannot — the two blind spots above — the names are correct. A +compiler that emits names should therefore keep them consistent +with the push/pop structure wherever both are determinate, so the +two views never silently disagree. + ### Identity and values Every function-identity field (`identifier`, `declaration`, diff --git a/packages/web/spec/program/context/name.mdx b/packages/web/spec/program/context/name.mdx index 31fd7fb17..7a247d5d6 100644 --- a/packages/web/spec/program/context/name.mdx +++ b/packages/web/spec/program/context/name.mdx @@ -6,9 +6,58 @@ import SchemaViewer from "@site/src/components/SchemaViewer"; # Named contexts -Contexts may include a `name` property for distinguishing them from -other contexts. This is particularly useful inside `pick` alternatives, -where several possible contexts may apply at a given point in execution -and runtime information is needed to select which one is active. +A context may carry a `name`: a machine-generated identifier that gives +the context a stable identity other contexts can reference. A name is +what makes a **cross-context reference** possible — one context declares +a name, and another points back to it by the same name. + +Names are opaque strings; the format imposes no structure on them. +Within a single program — one [`instructions`](/spec/program) +sequence — each name **must** be declared by exactly one context; no +two contexts may declare the same name. Other contexts may reference +that name freely — that repetition is how they point back — and every +reference resolves to the single declaring context. Compilers +**should** also choose names that are meaningful to debugger users. + +## Uses + +### Selecting `pick` alternatives + +Inside a [`pick`](/spec/program/context/pick), several contexts may apply +at a given point in execution and runtime information is needed to select +which one is active. A `name` on each alternative gives the selection +a stable handle for the chosen alternative. + +### Correlating an invocation with its return + +A `name` lets a function invocation and its return be paired directly. +An [`invoke`](/spec/program/context/function/invoke) context **declares** +an activation's name; the matching +[`return`](/spec/program/context/function/return) context — and the +instructions belonging to that activation's body — **reference** it by +the same name. + +This declaration/reference split follows the format's general +reference-by-name idiom (as a +[pointer template](/spec/pointer/collection/templates) is declared +once and referenced elsewhere). It pairs a call with its return +without relying on the trace +being strictly nested: even when optimization reorders or interleaves +code so that a naive "innermost open activation" rule would mispair +them, the shared name resolves the pairing unambiguously. When two +inlined copies of the same function appear back-to-back, their distinct +names keep them distinct activations. + +Because a single context object can hold at most one `name`, two +activation facts that must carry **different** names at the same +instruction — for example a tail call, where one instruction both +returns from the current activation and invokes the next — are expressed +with a [`gather`](/spec/program/context/gather) whose members each carry +their own name. The naming granularity therefore tracks the structure of +the contexts themselves. + +See the invoke context's +[Reconstructing activations](/spec/program/context/function/invoke#reconstructing-activations) +for how a debugger uses these names to rebuild the call stack. diff --git a/schemas/program/context/name.schema.yaml b/schemas/program/context/name.schema.yaml index 759161059..c9830fc1f 100644 --- a/schemas/program/context/name.schema.yaml +++ b/schemas/program/context/name.schema.yaml @@ -3,15 +3,31 @@ $id: "schema:ethdebug/format/program/context/name" title: ethdebug/format/program/context/name description: | - A label for distinguishing this context from other contexts. - This is particularly useful inside `pick` alternatives, - where several possible contexts may apply at a given point in - execution and runtime information is needed to select which one - is active. + A machine-generated identifier for this context that other + contexts may reference by name. - Context names are opaque strings with no format-imposed semantics. - Compilers **should** choose names that are meaningful to debugger - users. + A `name` gives a context a stable identity that the rest of a + program's debug information can point back to. This is what makes + cross-context references possible. Uses include: + + - **Selecting `pick` alternatives.** Several contexts may apply + at a point in execution; a `name` identifies which alternative + is active, so runtime information can select it. + - **Correlating an invocation with its return.** An `invoke` + context *declares* an activation's name; the matching `return` + context — and the instructions belonging to that activation — + *reference* it by the same name. This pairs a call with its + return directly, without relying on the trace being strictly + nested (see the invoke context's activation-reconstruction + guidance). + + Names are opaque strings; the format imposes no structure on + them. Within a single program — one **instructions** sequence — + each name **must** be declared by exactly one context; no two + contexts may declare the same name. Other contexts may reference + that name freely, and every reference resolves to the single + declaring context. Compilers **should** also choose names that + are meaningful to debugger users. type: object properties: @@ -21,7 +37,10 @@ required: - name examples: - # example: naming an inlined call site - - name: "inlined-call" + # example: declaring an inlined activation, referenced by its + # matching return and by the instructions of the inlined body + - name: "inline-0" # example: naming a generic instantiation - name: "Array" + # example: distinguishing a `pick` alternative + - name: "storage-layout-v2" From 20a7654b4a0498ec6d5c3d2c8ee8ba2f5d14875a Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 21:16:38 -0400 Subject: [PATCH 19/21] programs-react: adopt locked inline-activation contract (#237) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per architect's rulings on #23, replace the pragmatic invoke-wins reconstruction with the locked contract: - extractCallEvents: expose ALL call events (ordered invoke-before- return) so a co-located invoke+return context is never swallowed as invoke-only. extractCallInfoFromInstruction now returns the first event (display banner unchanged). Parsing split into parseInvoke/parseReturn/parseRevert + collectCallInfos. - buildCallStack close-after semantics: an invoke opens a frame inclusive of its instruction; a return closes it AFTER (deferred until the viewed step advances past it), so a frame is visible while parked on its return-bearing instruction — uniformly for real and virtual frames, and correct for a single-instruction inlined body. - marker-keyed dedup: the caller-JUMP/callee-JUMPDEST collapse now also requires matching inline-ness, so a virtual invoke never merges with an adjacent real invoke of the same name. - generalized membership guard: force-pop trailing virtual frames whose depth exceeds the instruction's inline-marker count (handles nested inlining and residual smear). Tests: close-after lifetime, single-op body, adjacent-site split via the return, marker-keyed dedup, nested (double-marker) inlining, and a real-call close-after regression. 59 programs-react tests green. Verified end-to-end on real O2 (dbl@2 sites): one virtual frame per body, top level between/after, no phantom — holds even pre-de-smear. --- .../src/utils/mockTrace.test.ts | 212 +++++++++++- .../programs-react/src/utils/mockTrace.ts | 310 ++++++++++-------- 2 files changed, 368 insertions(+), 154 deletions(-) diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts index f9901e6a6..e56a8ea1c 100644 --- a/packages/programs-react/src/utils/mockTrace.test.ts +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -8,6 +8,7 @@ import type { Program } from "@ethdebug/format"; import { extractTransformFromInstruction, extractCallInfoFromInstruction, + extractCallEvents, buildCallStack, buildPcToInstructionMap, type TraceStep, @@ -194,10 +195,13 @@ describe("flat (production) TCO back-edge shape", () => { // inlined body with a virtual invoke on the entry-first // instruction and a virtual return on the exit-last instruction; // every inlined instruction carries transform:["inline"]. The -// call stack must reconstruct the virtual frame, tag it, and -// tear it down when execution leaves the inlined body — so it -// reads distinctly from a real call and never leaks a phantom -// frame into caller code. +// call stack reconstructs the virtual frame via close-after +// push/pop (a frame is visible AT its return-bearing instruction +// and popped on advance), tags it, and — belt-and-suspenders — +// tears down any trailing virtual frame the moment execution +// reaches an instruction whose inline-marker count is below the +// open virtual depth. So it reads distinctly from a real call and +// never leaks a phantom frame into caller code. describe("inline virtual activations", () => { const entryInvoke = { code: { source: { id: "0" }, range: { offset: 0, length: 1 } }, @@ -216,6 +220,14 @@ describe("inline virtual activations", () => { const callerMark = { code: { source: { id: "0" }, range: { offset: 3, length: 1 } }, }; + // A body that emits to a single EVM op: invoke and return + // co-locate on one instruction (the degenerate bracketed case). + const singleOpBody = { + code: { source: { id: "0" }, range: { offset: 0, length: 1 } }, + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + return: { identifier: "dbl" }, + }; describe("extractCallInfoFromInstruction inline flag", () => { it("marks isInline on a virtual (inline) invoke", () => { @@ -238,13 +250,31 @@ describe("inline virtual activations", () => { }); }); - describe("buildCallStack virtual frame lifetime", () => { + describe("extractCallEvents exposes both discriminators", () => { + it("returns invoke then return, in order, for a co-located context", () => { + const events = extractCallEvents(instr(0, singleOpBody)); + expect(events.map((e) => e.kind)).toEqual(["invoke", "return"]); + expect(events.every((e) => e.isInline)).toBe(true); + }); + + it("returns a single invoke event for a pure invoke", () => { + const events = extractCallEvents(instr(0, entryInvoke)); + expect(events.map((e) => e.kind)).toEqual(["invoke"]); + }); + + it("returns a single return event for a pure return", () => { + const events = extractCallEvents(instr(0, exitReturn)); + expect(events.map((e) => e.kind)).toEqual(["return"]); + }); + }); + + describe("buildCallStack virtual frame lifetime (close-after)", () => { // A single inlined body: entry / body / exit / caller. const trace: TraceStep[] = [ { pc: 0, opcode: "PUSH1" }, // entry invoke → push virtual dbl { pc: 1, opcode: "ADD" }, // inlined body instruction - { pc: 2, opcode: "MSTORE" }, // exit return → pop - { pc: 3, opcode: "JUMPDEST" }, // caller code (no inline marker) + { pc: 2, opcode: "MSTORE" }, // exit return (still inside frame) + { pc: 3, opcode: "JUMPDEST" }, // caller code (frame gone) ]; const program = { instructions: [ @@ -269,17 +299,39 @@ describe("inline virtual activations", () => { expect(stack[0].isInline).toBe(true); }); - it("pops the virtual frame at the exit return", () => { + it("still shows the frame AT the exit return (close-after)", () => { const stack = buildCallStack(trace, pcToInstruction, 2); - expect(stack).toHaveLength(0); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); }); - it("does not leak a phantom frame into caller code", () => { + it("pops the frame once execution advances past the return", () => { const stack = buildCallStack(trace, pcToInstruction, 3); expect(stack).toHaveLength(0); }); }); + describe("single-op inlined body (co-located invoke+return)", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // the whole body: invoke+return + { pc: 1, opcode: "JUMPDEST" }, // caller code + ]; + const program = { + instructions: [instr(0, singleOpBody), instr(1, callerMark)], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows the virtual frame AT the single body op", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("pops after advancing off the body op", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(0); + }); + }); + describe("two gap-separated inline sites of the same helper", () => { const trace: TraceStep[] = [ { pc: 0, opcode: "PUSH1" }, // site 1 entry @@ -305,6 +357,7 @@ describe("inline virtual activations", () => { const stack = buildCallStack(trace, pcToInstruction, 3); expect(stack).toHaveLength(1); expect(stack[0].isInline).toBe(true); + expect(stack[0].stepIndex).toBe(3); }); it("is empty after both sites — no accumulation", () => { @@ -313,6 +366,110 @@ describe("inline virtual activations", () => { }); }); + describe("two ADJACENT inline sites split by the return", () => { + // No caller gap between sites: the return marker (not the + // membership guard, which can't see a boundary between two + // inline-marked instructions) is what closes site 1 before + // site 2 opens. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // site 1 entry + { pc: 1, opcode: "MSTORE" }, // site 1 exit + { pc: 2, opcode: "PUSH1" }, // site 2 entry (immediately) + { pc: 3, opcode: "MSTORE" }, // site 2 exit + { pc: 5, opcode: "JUMPDEST" }, // caller + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, exitReturn), + instr(2, entryInvoke), + instr(3, exitReturn), + instr(5, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("does not merge or accumulate — one frame, rooted at site 2", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(1); + expect(stack[0].stepIndex).toBe(2); + }); + + it("is empty after both sites", () => { + expect(buildCallStack(trace, pcToInstruction, 4)).toHaveLength(0); + }); + }); + + describe("marker-keyed dedup", () => { + // A real call and an inlined body of the SAME name on + // consecutive steps must NOT be merged by the caller-JUMP / + // callee-JUMPDEST dedup — they are distinct activations. + const realInvoke = { invoke: { jump: true, identifier: "dbl" } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMP" }, // real invoke of dbl + { pc: 1, opcode: "PUSH1" }, // virtual (inline) invoke of dbl + ]; + const program = { + instructions: [instr(0, realInvoke), instr(1, entryInvoke)], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps a real and a virtual dbl as two separate frames", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(2); + expect(stack[0].isInline).toBeFalsy(); + expect(stack[1].isInline).toBe(true); + }); + }); + + describe("nested inlining (double inline marker)", () => { + // Helper A inlined into helper B which is itself inlined: + // A's body instructions are members of both bodies and carry + // transform:["inline","inline"]. Two virtual frames stack; the + // inner returns first, leaving the outer. + const entryB = { + transform: ["inline"], + invoke: { jump: true, identifier: "B" }, + }; + const entryA = { + transform: ["inline", "inline"], + invoke: { jump: true, identifier: "A" }, + }; + const exitA = { + transform: ["inline", "inline"], + return: { identifier: "A" }, + }; + const bodyB = { transform: ["inline"] }; // back to just B's body + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // enter B + { pc: 1, opcode: "PUSH1" }, // enter A (inside B) + { pc: 2, opcode: "MSTORE" }, // exit A + { pc: 3, opcode: "ADD" }, // back in B only + ]; + const program = { + instructions: [ + instr(0, entryB), + instr(1, entryA), + instr(2, exitA), + instr(3, bodyB), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("stacks two virtual frames inside the inner body", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(2); + expect(stack[0].identifier).toBe("B"); + expect(stack[1].identifier).toBe("A"); + }); + + it("drops to the outer frame after the inner returns", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("B"); + }); + }); + describe("defensive membership guard", () => { // A virtual invoke whose exit return never arrives (residual // smear / dropped marker): the frame must still be torn down @@ -341,13 +498,38 @@ describe("inline virtual activations", () => { }); }); - it("leaves a real call frame's isInline falsy", () => { - const trace: TraceStep[] = [{ pc: 0, opcode: "JUMPDEST" }]; + describe("real calls (regression: close-after applies uniformly)", () => { + // A real call: caller JUMP + callee JUMPDEST (deduped), then a + // return. The frame is visible at its return step and popped on + // advance — same close-after rule as virtual frames. + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMP" }, // caller invoke + { pc: 1, opcode: "JUMPDEST" }, // callee entry invoke (dedup) + { pc: 2, opcode: "JUMP" }, // callee return + { pc: 3, opcode: "JUMPDEST" }, // back in caller + ]; const program = { - instructions: [instr(0, { invoke: { jump: true, identifier: "f" } })], + instructions: [ + instr(0, { invoke: { jump: true, identifier: "f" } }), + instr(1, { invoke: { jump: true, identifier: "f" } }), + instr(2, { return: { identifier: "f" } }), + instr(3, { code: { source: { id: "0" }, range: {} } }), + ], } as unknown as Program; const pcToInstruction = buildPcToInstructionMap(program); - const stack = buildCallStack(trace, pcToInstruction, 0); - expect(stack[0].isInline).toBeFalsy(); + + it("collapses the caller/callee invoke double into one frame", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(1); + }); + + it("still shows the frame AT its return instruction", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBeFalsy(); + }); + + it("pops the real frame on advancing past the return", () => { + expect(buildCallStack(trace, pcToInstruction, 3)).toHaveLength(0); + }); }); }); diff --git a/packages/programs-react/src/utils/mockTrace.ts b/packages/programs-react/src/utils/mockTrace.ts index eb9c55581..fe5ac7fb0 100644 --- a/packages/programs-react/src/utils/mockTrace.ts +++ b/packages/programs-react/src/utils/mockTrace.ts @@ -171,117 +171,136 @@ function extractTransformFromContext(context: Program.Context): string[] { } /** - * Extract call info (invoke/return/revert) from an - * instruction's context tree. + * Extract the primary call event (invoke/return/revert) from an + * instruction's context tree, decorated with transform flags. + * + * A context can legitimately carry BOTH an invoke and a return + * (e.g. a tail-call back-edge, or an inlined body that emits to a + * single instruction). This accessor returns just the first event + * for display banners; call-stack reconstruction uses + * {@link extractCallEvents}, which surfaces every event so a + * co-located return is never swallowed by the invoke. */ export function extractCallInfoFromInstruction( instruction: Program.Instruction, ): CallInfo | undefined { + return extractCallEvents(instruction)[0]; +} + +/** + * Extract ALL call events (invoke/return/revert) from an + * instruction's context tree, in document order (invoke before + * return within one context), decorated with the instruction's + * transform flags. Returns [] when there is no call context. + */ +export function extractCallEvents( + instruction: Program.Instruction, +): CallInfo[] { if (!instruction.context) { - return undefined; + return []; } - const info = extractCallInfoFromContext(instruction.context); - if (!info) { - return undefined; + const events = collectCallInfos(instruction.context); + if (events.length === 0) { + return []; } const transforms = extractTransformFromContext(instruction.context); - const decorated: CallInfo = { ...info }; - if (transforms.includes("tailcall")) { - decorated.isTailCall = true; - } - if (transforms.includes("inline")) { - decorated.isInline = true; + const isTailCall = transforms.includes("tailcall"); + const isInline = transforms.includes("inline"); + if (!isTailCall && !isInline) { + return events; } - return decorated; + return events.map((e) => ({ + ...e, + ...(isTailCall ? { isTailCall: true } : {}), + ...(isInline ? { isInline: true } : {}), + })); } -function extractCallInfoFromContext( - context: Program.Context, -): CallInfo | undefined { +/** + * Collect the invoke/return/revert events carried by a context + * tree, in order. Invoke precedes return within a single context; + * gather/pick children are visited in sequence. + */ +function collectCallInfos(context: Program.Context): CallInfo[] { // Use unknown intermediate to avoid strict type checks // on the context union — we discriminate by key presence const ctx = context as unknown as Record; + const out: CallInfo[] = []; if ("invoke" in ctx) { - const inv = ctx.invoke as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - - let callType: CallInfo["callType"]; - if ("jump" in inv) { - callType = "internal"; - collectPointerRef(pointerRefs, "target", inv.target); - collectPointerRef(pointerRefs, "arguments", inv.arguments); - } else if ("message" in inv) { - callType = "external"; - collectPointerRef(pointerRefs, "target", inv.target); - collectPointerRef(pointerRefs, "gas", inv.gas); - collectPointerRef(pointerRefs, "value", inv.value); - collectPointerRef(pointerRefs, "input", inv.input); - } else if ("create" in inv) { - callType = "create"; - collectPointerRef(pointerRefs, "value", inv.value); - collectPointerRef(pointerRefs, "salt", inv.salt); - collectPointerRef(pointerRefs, "input", inv.input); - } - - // Extract argument names from group entries - const argNames = extractArgNamesFromInvoke(inv); - - return { - kind: "invoke", - identifier: inv.identifier as string | undefined, - callType, - argumentNames: argNames, - pointerRefs, - }; + out.push(parseInvoke(ctx.invoke as Record)); } - if ("return" in ctx) { - const ret = ctx.return as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - collectPointerRef(pointerRefs, "data", ret.data); - collectPointerRef(pointerRefs, "success", ret.success); - - return { - kind: "return", - identifier: ret.identifier as string | undefined, - pointerRefs, - }; + out.push(parseReturn(ctx.return as Record)); } - if ("revert" in ctx) { - const rev = ctx.revert as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - collectPointerRef(pointerRefs, "reason", rev.reason); - - return { - kind: "revert", - identifier: rev.identifier as string | undefined, - panic: rev.panic as number | undefined, - pointerRefs, - }; + out.push(parseRevert(ctx.revert as Record)); } - // Walk gather/pick to find call info - if ("gather" in ctx && Array.isArray(ctx.gather)) { + if (Array.isArray(ctx.gather)) { for (const sub of ctx.gather as Program.Context[]) { - const info = extractCallInfoFromContext(sub); - if (info) { - return info; - } + out.push(...collectCallInfos(sub)); } } - - if ("pick" in ctx && Array.isArray(ctx.pick)) { + if (Array.isArray(ctx.pick)) { for (const sub of ctx.pick as Program.Context[]) { - const info = extractCallInfoFromContext(sub); - if (info) { - return info; - } + out.push(...collectCallInfos(sub)); } } - return undefined; + return out; +} + +function parseInvoke(inv: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + + let callType: CallInfo["callType"]; + if ("jump" in inv) { + callType = "internal"; + collectPointerRef(pointerRefs, "target", inv.target); + collectPointerRef(pointerRefs, "arguments", inv.arguments); + } else if ("message" in inv) { + callType = "external"; + collectPointerRef(pointerRefs, "target", inv.target); + collectPointerRef(pointerRefs, "gas", inv.gas); + collectPointerRef(pointerRefs, "value", inv.value); + collectPointerRef(pointerRefs, "input", inv.input); + } else if ("create" in inv) { + callType = "create"; + collectPointerRef(pointerRefs, "value", inv.value); + collectPointerRef(pointerRefs, "salt", inv.salt); + collectPointerRef(pointerRefs, "input", inv.input); + } + + return { + kind: "invoke", + identifier: inv.identifier as string | undefined, + callType, + argumentNames: extractArgNamesFromInvoke(inv), + pointerRefs, + }; +} + +function parseReturn(ret: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + collectPointerRef(pointerRefs, "data", ret.data); + collectPointerRef(pointerRefs, "success", ret.success); + return { + kind: "return", + identifier: ret.identifier as string | undefined, + pointerRefs, + }; +} + +function parseRevert(rev: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + collectPointerRef(pointerRefs, "reason", rev.reason); + return { + kind: "revert", + identifier: rev.identifier as string | undefined, + panic: rev.panic as number | undefined, + pointerRefs, + }; } function extractArgNamesFromInvoke( @@ -380,13 +399,13 @@ export function buildCallStack( // Per-instruction inline membership drives the defensive // guard below: an inlined body's instructions all carry - // transform:["inline"], so a virtual frame is only valid - // while that marker holds. - const isInlineInstr = - extractTransformFromInstruction(instruction).includes("inline"); - const callInfo = extractCallInfoFromInstruction(instruction); + // transform:["inline"] (nested inlining stacks the marker), so + // the count bounds how many virtual frames may legitimately be + // open on this instruction. + const transforms = extractTransformFromInstruction(instruction); + const inlineCount = transforms.filter((t) => t === "inline").length; - if (callInfo?.isTailCall) { + if (transforms.includes("tailcall")) { // A TCO back-edge carries both return and invoke on a // single instruction: the previous iteration returns // and the next iteration is invoked, reusing the same @@ -399,9 +418,9 @@ export function buildCallStack( const argResult = extractArgInfo(instruction); const invId = inv?.identifier as string | undefined; const frame: CallFrame = { - identifier: invId ?? callInfo.identifier, + identifier: invId, stepIndex: i, - callType: inv ? invokeCallType(inv) : callInfo.callType, + callType: inv ? invokeCallType(inv) : undefined, argumentNames: argResult?.names, argumentPointers: argResult?.pointers, isTailCall: true, @@ -414,58 +433,71 @@ export function buildCallStack( continue; } - if (callInfo?.kind === "invoke") { - // The compiler emits invoke on both the caller JUMP - // and callee entry JUMPDEST for the same call. These - // occur on consecutive trace steps. Only skip if the - // top frame matches AND was pushed on the immediately - // preceding step — otherwise this is a new call (e.g. - // recursion with the same function name). - const top = stack[stack.length - 1]; - const isDuplicate = - top && - top.identifier === callInfo.identifier && - top.callType === callInfo.callType && - top.stepIndex === i - 1; - if (isDuplicate) { - // Use the callee entry step for resolution — - // the argument pointers reference stack slots - // that are valid at the JUMPDEST, not the JUMP. - // Argument names also live on the callee entry. - const argResult = extractArgInfo(instruction); - top.stepIndex = i; - top.argumentNames = argResult?.names ?? top.argumentNames; - top.argumentPointers = argResult?.pointers; - } else { - const argResult = extractArgInfo(instruction); - stack.push({ - identifier: callInfo.identifier, - stepIndex: i, - callType: callInfo.callType, - argumentNames: argResult?.names, - argumentPointers: argResult?.pointers, - // Tag virtual activations so the widget can render - // them distinctly from real calls. - ...(callInfo.isInline ? { isInline: true } : {}), - }); - } - } else if (callInfo?.kind === "return" || callInfo?.kind === "revert") { - // Pop the matching frame - if (stack.length > 0) { - stack.pop(); + // A context may carry more than one event (invoke + return), + // e.g. an inlined body that emits to a single instruction. + // Process them in order: an invoke opens a frame INCLUSIVE of + // its instruction; a return closes it AFTER its instruction + // (close-after) — so the frame is still shown while parked on + // the return-bearing instruction and popped only on advance. + for (const event of extractCallEvents(instruction)) { + if (event.kind === "invoke") { + // The compiler emits invoke on both the caller JUMP and + // callee entry JUMPDEST for a REAL call, on consecutive + // steps — collapse that double. Key the dedup on the + // inline marker so a virtual invoke never merges with an + // adjacent real invoke of the same name (and vice versa). + const top = stack[stack.length - 1]; + const isDuplicate = + top && + top.identifier === event.identifier && + top.callType === event.callType && + top.stepIndex === i - 1 && + !!top.isInline === !!event.isInline; + if (isDuplicate) { + // Use the callee entry step for resolution — argument + // pointers/names live on the JUMPDEST, not the JUMP. + const argResult = extractArgInfo(instruction); + top.stepIndex = i; + top.argumentNames = argResult?.names ?? top.argumentNames; + top.argumentPointers = argResult?.pointers; + } else { + const argResult = extractArgInfo(instruction); + stack.push({ + identifier: event.identifier, + stepIndex: i, + callType: event.callType, + argumentNames: argResult?.names, + argumentPointers: argResult?.pointers, + // Tag virtual activations so the widget can render + // them distinctly from real calls. + ...(event.isInline ? { isInline: true } : {}), + }); + } + } else if (event.kind === "return" || event.kind === "revert") { + // close-after: defer the pop until we advance past this + // step, so the frame is visible AT its return instruction. + if (i < upToStep && stack.length > 0) { + stack.pop(); + } } } - // Defensive membership guard: a virtual (inline) frame must - // not stay open once execution leaves the inlined body. If - // the current instruction carries no inline marker, tear down - // any trailing virtual frames — belt-and-suspenders against a - // dropped or incomplete virtual return so a phantom activation - // can never leak into caller code. - if (!isInlineInstr) { - while (stack.length > 0 && stack[stack.length - 1].isInline) { - stack.pop(); - } + // Defensive membership guard: virtual frames beyond the + // instruction's inline-marker count are stale — belt-and- + // suspenders against a dropped or incomplete virtual return so + // a phantom activation can never leak into caller code (or + // linger after an inner inlined body has ended). + let trailingVirtual = 0; + for (let k = stack.length - 1; k >= 0 && stack[k].isInline; k--) { + trailingVirtual++; + } + while ( + trailingVirtual > inlineCount && + stack.length > 0 && + stack[stack.length - 1].isInline + ) { + stack.pop(); + trailingVirtual--; } } From 0ca6be7e6c75530e7128e809cb802be2b9cc5de3 Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 21:30:25 -0400 Subject: [PATCH 20/21] programs-react: lock inline reconstruction against bracketed + smeared shapes (#239) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fixtures for both emission shapes the reconstruction must handle: - bracketed (post #235): invoke on the body's first op, return on the last, transform:["inline"] on all — frame visible across every body op INCLUDING the exit op (close-after), gone at the gap. - legacy smeared: every op carries invoke+return+inline — close-after still yields exactly one frame per body, no accumulation across gap-separated bodies. Verified end-to-end on real O2 (dbl@2 sites, #235 bracketed emission): one virtual frame per body incl. the exit op, top level between/after. --- .../src/utils/mockTrace.test.ts | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts index e56a8ea1c..ee223ecb4 100644 --- a/packages/programs-react/src/utils/mockTrace.test.ts +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -532,4 +532,97 @@ describe("inline virtual activations", () => { expect(buildCallStack(trace, pcToInstruction, 3)).toHaveLength(0); }); }); + + describe("bracketed emission (post de-smear, #235 shape)", () => { + // The real bracketed shape: invoke on the body's FIRST op, + // return on its LAST op, transform:["inline"] on every op. The + // frame must be visible across the whole body — including the + // return-bearing exit op (close-after) — and gone at the gap. + const entryOp = { + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + }; + const interiorOp = { transform: ["inline"] }; + const exitOp = { transform: ["inline"], return: { identifier: "dbl" } }; + const gapOp = { code: { source: { id: "0" }, range: {} } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // entry op (invoke) + { pc: 1, opcode: "DUP2" }, // interior op + { pc: 2, opcode: "ADD" }, // interior op + { pc: 3, opcode: "MSTORE" }, // exit op (return) + { pc: 4, opcode: "JUMPDEST" }, // gap / caller + ]; + const program = { + instructions: [ + instr(0, entryOp), + instr(1, interiorOp), + instr(2, interiorOp), + instr(3, exitOp), + instr(4, gapOp), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows the virtual frame across every body op incl. the exit", () => { + for (const s of [0, 1, 2, 3]) { + const stack = buildCallStack(trace, pcToInstruction, s); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + } + }); + + it("is gone at the gap after the return op", () => { + expect(buildCallStack(trace, pcToInstruction, 4)).toHaveLength(0); + }); + }); + + describe("robustness: legacy SMEARED emission (pre de-smear)", () => { + // Belt-and-suspenders: an older/residual emission where EVERY + // body op carries invoke+return+inline. Close-after must still + // yield exactly one frame per body across all ops (the viewed + // op's co-located return is deferred; prior ops net empty) and + // no accumulation across two gap-separated bodies. + const smearedOp = { + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + return: { identifier: "dbl" }, + }; + const gapOp = { code: { source: { id: "0" }, range: {} } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // body 1: 3 smeared ops + { pc: 1, opcode: "DUP2" }, + { pc: 2, opcode: "MSTORE" }, + { pc: 3, opcode: "JUMPDEST" }, // gap + { pc: 4, opcode: "PUSH1" }, // body 2: 3 smeared ops + { pc: 5, opcode: "DUP2" }, + { pc: 6, opcode: "MSTORE" }, + { pc: 7, opcode: "JUMPDEST" }, // gap + ]; + const program = { + instructions: [ + instr(0, smearedOp), + instr(1, smearedOp), + instr(2, smearedOp), + instr(3, gapOp), + instr(4, smearedOp), + instr(5, smearedOp), + instr(6, smearedOp), + instr(7, gapOp), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows exactly one frame across each smeared body", () => { + for (const s of [0, 1, 2, 4, 5, 6]) { + const stack = buildCallStack(trace, pcToInstruction, s); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + } + }); + + it("returns to top level at each gap — no accumulation", () => { + expect(buildCallStack(trace, pcToInstruction, 3)).toHaveLength(0); + expect(buildCallStack(trace, pcToInstruction, 7)).toHaveLength(0); + }); + }); }); From b2d9e383b4d80e85cf4d538bf69bbbe45b577c0d Mon Sep 17 00:00:00 2001 From: "g. nicholas d'andrea" Date: Thu, 2 Jul 2026 21:38:17 -0400 Subject: [PATCH 21/21] format: add Name context type and guard to TS types (#238) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `name` context has a schema (program/context/name) but was never mirrored into the TypeScript types — no Context.Name interface, no isName guard, and it was absent from the Context union and isContext. Add Name (`{ name: string }`) and isName, mirroring isFrame, and wire them into the union and isContext (schema-canonical order: name first). Adds schema-guard test coverage for the name context. Unblocks upcoming consumers (name as invoke/return correlation id, #26) before compiler/UI start reading names. Behavior-preserving addition. --- packages/format/src/types/program/context.test.ts | 4 ++++ packages/format/src/types/program/context.ts | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/packages/format/src/types/program/context.test.ts b/packages/format/src/types/program/context.test.ts index a36e1e4c7..55a716181 100644 --- a/packages/format/src/types/program/context.test.ts +++ b/packages/format/src/types/program/context.test.ts @@ -6,6 +6,10 @@ testSchemaGuards("ethdebug/format/program/context", [ schema: "schema:ethdebug/format/program/context", guard: isContext, }, + { + schema: "schema:ethdebug/format/program/context/name", + guard: Context.isName, + }, { schema: "schema:ethdebug/format/program/context/code", guard: Context.isCode, diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts index e943f1ec2..7c4e4ea4c 100644 --- a/packages/format/src/types/program/context.ts +++ b/packages/format/src/types/program/context.ts @@ -3,6 +3,7 @@ import { Type } from "#types/type"; import { Pointer, isPointer } from "#types/pointer"; export type Context = + | Context.Name | Context.Code | Context.Variables | Context.Remark @@ -16,6 +17,7 @@ export type Context = export const isContext = (value: unknown): value is Context => [ + Context.isName, Context.isCode, Context.isVariables, Context.isRemark, @@ -29,6 +31,16 @@ export const isContext = (value: unknown): value is Context => ].some((guard) => guard(value)); export namespace Context { + export interface Name { + name: string; + } + + export const isName = (value: unknown): value is Name => + typeof value === "object" && + !!value && + "name" in value && + typeof value.name === "string"; + export interface Code { code: Materials.SourceRange; }