diff --git a/packages/bugc/src/evmgen/call-contexts.test.ts b/packages/bugc/src/evmgen/call-contexts.test.ts index c05f58708..c670d1660 100644 --- a/packages/bugc/src/evmgen/call-contexts.test.ts +++ b/packages/bugc/src/evmgen/call-contexts.test.ts @@ -97,7 +97,8 @@ code { expect(typeof invoke.declaration!.range!.length).toBe("number"); // Target should be a code pointer (not stack) - expect(Pointer.Region.isCode(call.target.pointer)).toBe(true); + expect(call.target).toBeDefined(); + expect(Pointer.Region.isCode(call.target!.pointer)).toBe(true); // Caller JUMP should NOT have argument pointers // (args live on the callee JUMPDEST invoke context) @@ -156,7 +157,8 @@ code { expect(call.identifier).toBe("add"); // Target should be a code pointer - expect(Pointer.Region.isCode(call.target.pointer)).toBe(true); + expect(call.target).toBeDefined(); + expect(Pointer.Region.isCode(call.target!.pointer)).toBe(true); // Should have argument pointers matching // function parameters diff --git a/packages/bugc/src/evmgen/generation/block.ts b/packages/bugc/src/evmgen/generation/block.ts index e23460dfb..632ee132a 100644 --- a/packages/bugc/src/evmgen/generation/block.ts +++ b/packages/bugc/src/evmgen/generation/block.ts @@ -14,6 +14,7 @@ import { Memory } from "#evmgen/analysis"; import { calculateSize } from "#evmgen/serialize"; import * as Instruction from "./instruction.js"; +import { bracketActivation, carriesActivation } from "./bracket-activation.js"; import { loadValue } from "./values/index.js"; import { generateTerminator, @@ -161,9 +162,31 @@ export function generate( // the runtime predecessor differs from the layout-order // predecessor. - // Process regular instructions + // Process regular instructions. Invoke/return activation + // discriminators must be bracketed to the first/last emitted op + // of the instruction (see bracket-activation.ts); everything else + // (source mapping, variables, transform markers) rides all ops. for (const inst of block.instructions) { - result = result.then(Instruction.generate(inst)); + const gen = Instruction.generate(inst); + const operationCtx = inst.operationDebug?.context; + if ( + !carriesActivation(operationCtx, "invoke") && + !carriesActivation(operationCtx, "return") + ) { + result = result.then(gen); + continue; + } + result = result.peek((state, builder) => { + const start = state.instructions.length; + return builder.then(gen).then((s) => ({ + ...s, + instructions: bracketActivation( + s.instructions, + start, + operationCtx, + ), + })); + }); } // Emit phi copies for successor blocks before the diff --git a/packages/bugc/src/evmgen/generation/bracket-activation.ts b/packages/bugc/src/evmgen/generation/bracket-activation.ts new file mode 100644 index 000000000..653c2ffc8 --- /dev/null +++ b/packages/bugc/src/evmgen/generation/bracket-activation.ts @@ -0,0 +1,164 @@ +/** + * Bracket invoke/return activation discriminators onto the boundary + * ops of an IR instruction's emitted op-run. + * + * A single IR instruction lowers to N EVM micro-ops, and the generic + * lowering attaches that instruction's whole `operationDebug` (source + * mapping, variables, transform markers, AND any invoke/return + * discriminators) to every one of those ops. That is correct for + * source/variable/transform context — a debugger wants all N ops + * mapped to the instruction — but WRONG for invoke/return: those are + * positional activation boundaries. An `invoke` marks a single push + * point; a `return` a single pop point. Broadcasting them across the + * whole op-run makes a push/pop reconstruction see every op as both a + * push and a pop. + * + * This module de-smears: for the ops emitted by one instruction, the + * `invoke` discriminator is kept on only the FIRST op, `return` on only + * the LAST op, and stripped from the interior. The `transform` + * membership markers (and source/variables) stay on every op. + * + * It is a general evmgen invariant, not inline-specific: it is a no-op + * for real calls (whose invoke/return already ride single-op JUMP / + * JUMPDEST terminators) and fires only when invoke/return happen to + * ride a multi-op instruction — which today is inlined virtual + * activations. + */ +import type * as Format from "@ethdebug/format"; +import type * as Evm from "#evm"; + +type Ctx = Format.Program.Context; +type Activation = "invoke" | "return"; + +function isPick(ctx: Ctx): ctx is Ctx & { pick: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "pick" in ctx && + Array.isArray((ctx as { pick: unknown }).pick) + ); +} + +function isGather(ctx: Ctx): ctx is Ctx & { gather: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "gather" in ctx && + Array.isArray((ctx as { gather: unknown }).gather) + ); +} + +/** Whether ctx carries the given activation key anywhere, reaching + * into pick/gather composites. */ +export function carriesActivation( + ctx: Ctx | undefined, + key: Activation, +): boolean { + if (!ctx || typeof ctx !== "object") return false; + if (isPick(ctx)) return ctx.pick.some((c) => carriesActivation(c, key)); + if (isGather(ctx)) return ctx.gather.some((c) => carriesActivation(c, key)); + return key in ctx; +} + +/** The first activation value found for the given key, reaching into + * pick/gather composites. */ +function findActivation(ctx: Ctx | undefined, key: Activation): unknown { + if (!ctx || typeof ctx !== "object") return undefined; + if (isPick(ctx)) { + for (const c of ctx.pick) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + if (isGather(ctx)) { + for (const c of ctx.gather) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + return (ctx as Record)[key]; +} + +/** Remove invoke and return discriminators anywhere in ctx, reaching + * into pick/gather composites. Returns undefined if nothing remains. */ +export function stripActivation(ctx: Ctx | undefined): Ctx | undefined { + if (!ctx || typeof ctx !== "object") return ctx; + if (isPick(ctx)) { + const kids = ctx.pick + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { pick: kids } as Ctx; + } + if (isGather(ctx)) { + const kids = ctx.gather + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { gather: kids } as Ctx; + } + const rest = { ...(ctx as Record) }; + delete rest.invoke; + delete rest.return; + return Object.keys(rest).length > 0 ? (rest as Ctx) : undefined; +} + +/** Attach an activation discriminator, composing it as a flat sibling + * key on a leaf context (per the flat-composition convention), or + * appending it to a pick/gather composite. */ +function attachActivation( + ctx: Ctx | undefined, + key: Activation, + value: unknown, +): Ctx { + const marker = { [key]: value } as Ctx; + if (!ctx || typeof ctx !== "object") return marker; + if (isPick(ctx)) return { pick: [...ctx.pick, marker] } as Ctx; + if (isGather(ctx)) return { gather: [...ctx.gather, marker] } as Ctx; + return { ...(ctx as Record), [key]: value } as Ctx; +} + +/** + * Rewrite the ops emitted by one IR instruction (the tail slice + * `instructions[start..]`) so invoke rides only the first op and + * return only the last op, using the discriminators found on the + * instruction's `operationDebug` context. No-op unless that context + * carries invoke and/or return, so it never touches ordinary code. + */ +export function bracketActivation( + instructions: Evm.Instruction[], + start: number, + operationCtx: Ctx | undefined, +): Evm.Instruction[] { + const end = instructions.length; // exclusive + if (end <= start) return instructions; + + const hasInvoke = carriesActivation(operationCtx, "invoke"); + const hasReturn = carriesActivation(operationCtx, "return"); + if (!hasInvoke && !hasReturn) return instructions; + + const invokeValue = hasInvoke + ? findActivation(operationCtx, "invoke") + : undefined; + const returnValue = hasReturn + ? findActivation(operationCtx, "return") + : undefined; + + const out = instructions.slice(); + for (let i = start; i < end; i++) { + const op = out[i]; + let ctx = stripActivation(op.debug?.context); + if (hasInvoke && i === start) { + ctx = attachActivation(ctx, "invoke", invokeValue); + } + if (hasReturn && i === end - 1) { + ctx = attachActivation(ctx, "return", returnValue); + } + out[i] = { ...op, debug: { ...op.debug, context: ctx } }; + } + return out; +} diff --git a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts index 0bb5b5939..a87f5349f 100644 --- a/packages/bugc/src/evmgen/generation/control-flow/terminator.ts +++ b/packages/bugc/src/evmgen/generation/control-flow/terminator.ts @@ -1,5 +1,6 @@ import type * as Format from "@ethdebug/format"; import type * as Ir from "#ir"; +import { Utils as IrUtils } from "#ir"; import type * as Evm from "#evm"; import type { Stack } from "#evm"; import type { State } from "#evmgen/state"; @@ -411,9 +412,11 @@ function generateReturnEpilogue( /** * Build JUMP instruction options for a TCO-replaced tail call. * - * The JUMP carries BOTH contexts in a gather: + * The JUMP carries three keys on a single flat context + * object: * - return: the previous iteration's return * - invoke: the new iteration's call + * - transform: ["tailcall"] * * Semantically the debugger sees frame depth stay constant * across the back-edge JUMP: the previous frame pops, the @@ -421,6 +424,13 @@ function generateReturnEpilogue( * terminal RETURN (elsewhere) emits a return context * normally, popping the final iteration's frame. * + * The `transform: ["tailcall"]` key is an additive + * annotation: it does not replace the invoke/return pair + * (which state the source-level facts) but tells debuggers + * the pair was realized as a TCO back-edge rather than a + * real frame push/pop, so they can avoid inventing a + * spurious frame. + * * The invoke mirrors the normal caller-JUMP invoke * (identity + declaration + code target, no argument * pointers). The return omits `data` because TCO does not @@ -431,7 +441,7 @@ function generateReturnEpilogue( * resolved later by patchInvokeTarget. */ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { - debug: { context: Format.Program.Context }; + debug: Ir.Instruction.Debug; } { const declaration = tailCall.declarationLoc && tailCall.declarationSourceId @@ -441,14 +451,12 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { } : undefined; - const returnCtx: Format.Program.Context.Return = { + const combined: Format.Program.Context.Return & + Format.Program.Context.Invoke = { return: { identifier: tailCall.function, ...(declaration ? { declaration } : {}), }, - }; - - const invoke: Format.Program.Context.Invoke = { invoke: { jump: true as const, identifier: tailCall.function, @@ -463,11 +471,16 @@ function buildTailCallJumpOptions(tailCall: Ir.Block.TailCall): { }, }; - const gather: Format.Program.Context.Gather = { - gather: [returnCtx, invoke], + // Route through the shared helper so all transform emission + // (fold/tailcall/coalesce/...) composes consistently: the + // `transform` marker becomes a flat sibling key appended to + // any existing transform array. + return { + debug: IrUtils.addTransform( + { context: combined as Format.Program.Context }, + "tailcall", + ), }; - - return { debug: { context: gather as Format.Program.Context } }; } /** PUSH an integer as the smallest PUSHn. */ diff --git a/packages/bugc/src/evmgen/generation/function.ts b/packages/bugc/src/evmgen/generation/function.ts index 5b1944b1e..8e7230155 100644 --- a/packages/bugc/src/evmgen/generation/function.ts +++ b/packages/bugc/src/evmgen/generation/function.ts @@ -537,6 +537,8 @@ function patchInvokeInContext( const offset = functionRegistry[invoke.identifier]; if (offset === undefined) return; + if (!invoke.target) return; + const ptr = invoke.target.pointer; if (Format.Pointer.Region.isCode(ptr)) { ptr.offset = `0x${offset.toString(16)}`; diff --git a/packages/bugc/src/evmgen/inline-bracket.test.ts b/packages/bugc/src/evmgen/inline-bracket.test.ts new file mode 100644 index 000000000..6583bcfed --- /dev/null +++ b/packages/bugc/src/evmgen/inline-bracket.test.ts @@ -0,0 +1,192 @@ +/** + * Verifies that inlined virtual-activation invoke/return contexts are + * BRACKETED on the emitted bytecode, not smeared across every op. + * + * An IR instruction lowers to N EVM micro-ops. evmgen must attach the + * `invoke` discriminator to only the FIRST emitted op of the + * invoke-bearing instruction and the `return` discriminator to only the + * LAST emitted op of the return-bearing instruction, while keeping the + * `transform: ["inline"]` membership marker on ALL body ops. + * + * Without bracketing, the tracer's push/pop reconstruction sees every + * body op as both a push and a pop -> phantom frames. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import { executeProgram } from "#test/evm/behavioral"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +type OptLevel = 0 | 1 | 2 | 3; + +async function runtimeInstructions(source: string, level: OptLevel) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + `Compilation failed at level ${level}:\n` + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + return result.value.bytecode.runtimeInstructions; +} + +/** Flatten a context into leaves, unwrapping gather/pick. */ +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +/** Per-op discriminator/marker presence, reaching nested pick/gather. */ +function flags(instr: { debug?: { context?: Format.Program.Context } }) { + const ctx = instr.debug?.context; + if (!ctx) return { invoke: false, return: false, inline: false }; + const all = [ctx, ...leaves(ctx)]; + return { + invoke: all.some((c) => Context.isInvoke(c)), + return: all.some((c) => Context.isReturn(c)), + inline: all.some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ), + }; +} + +function tally(instrs: ReturnType[]) { + let invoke = 0, + ret = 0, + both = 0, + inline = 0; + for (const f of instrs) { + if (f.invoke) invoke += 1; + if (f.return) ret += 1; + if (f.invoke && f.return) both += 1; + if (f.inline) inline += 1; + } + return { invoke, ret, both, inline }; +} + +// The exact fixture the UI reported mis-rendering: a leaf helper +// inlined at two sites. +const dblTwoSites = `name Multi; +define { function dbl(x: uint256) -> uint256 { return x + x; }; } +storage { [0] r: uint256; } +create { r = 0; } +code { + let a = dbl(5); + let b = dbl(10); + r = a + b; +}`; + +// A multi-instruction body: entry (t = x + x) differs from exit +// (t * x), so invoke and return live on distinct IR instructions. +const multiInstrBody = `name Poly; +define { function poly(x: uint256) -> uint256 { let t = x + x; return t * x; }; } +storage { [0] a: uint256; [1] r: uint256; } +create { a = 3; r = 0; } +code { r = poly(a); }`; + +describe("inlined invoke/return are bracketed on emitted bytecode", () => { + it("dbl@2-sites: one push and one pop per site, never both on an op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const t = tally(instrs.map(flags)); + // Two inlined sites => exactly one invoke op and one return op each. + expect(t.invoke).toBe(2); + expect(t.ret).toBe(2); + // No op may be both a push and a pop (that breaks push/pop). + expect(t.both).toBe(0); + // Membership marker stays on every body op (more than the 4 + // boundary ops). + expect(t.inline).toBeGreaterThan(4); + }); + + it("dbl@2-sites: each site's invoke op precedes its return op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const seq = instrs + .map((instr, i) => ({ i, f: flags(instr) })) + .filter(({ f }) => f.invoke || f.return) + .map(({ f }) => (f.invoke ? "invoke" : "return")); + // Bracketed order across two sites: push,pop,push,pop. + expect(seq).toEqual(["invoke", "return", "invoke", "return"]); + }); + + it("multi-instruction body: invoke on entry, return on exit, both=0", async () => { + const instrs = await runtimeInstructions(multiInstrBody, 2); + const t = tally(instrs.map(flags)); + expect(t.invoke).toBe(1); + expect(t.ret).toBe(1); + expect(t.both).toBe(0); + }); + + it("preserves runtime behavior at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(dblTwoSites, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + // dbl(5)=10, dbl(10)=20, r=30 + expect(await res.getStorage(0n)).toBe(30n); + } + }); +}); + +// A self-tail-recursive accumulator: TCO turns the recursive call +// into a single back-edge JUMP that legitimately carries BOTH invoke +// and return on its one op (end one iteration + begin the next). +const tailRecursive = `name TailSum; +define { + function sum(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return sum(n - 1, acc + n); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = sum(5, 0); }`; + +// Mutually recursive functions never inline, so their calls stay real +// (invoke on a 1-op JUMP, return on a 1-op JUMPDEST). +const mutualRecursion = `name EvenOdd; +define { + function isEven(n: uint256) -> uint256 { + if (n == 0) { return 1; } else { return isOdd(n - 1); } + }; + function isOdd(n: uint256) -> uint256 { + if (n == 0) { return 0; } else { return isEven(n - 1); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = isEven(4); }`; + +describe("bracketing is a no-op for single-op invoke/return carriers", () => { + it("tailcall back-edge keeps its combined invoke+return on one op", async () => { + // The back-edge JUMP is a single op carrying both markers; bracketing + // to first-op/last-op is first==last, so both must survive. + const instrs = await runtimeInstructions(tailRecursive, 2); + const t = tally(instrs.map(flags)); + expect(t.both).toBeGreaterThanOrEqual(1); + }); + + it("real (non-inlined) calls never carry both on an op", async () => { + const instrs = await runtimeInstructions(mutualRecursion, 2); + const t = tally(instrs.map(flags)); + // Real calls put invoke on a 1-op JUMP and return on a 1-op JUMPDEST, + // distinct ops — the fix must not fabricate a both. + expect(t.both).toBe(0); + expect(t.invoke).toBeGreaterThan(0); + expect(t.ret).toBeGreaterThan(0); + }); +}); diff --git a/packages/bugc/src/evmgen/optimizer-contexts.test.ts b/packages/bugc/src/evmgen/optimizer-contexts.test.ts index 4fe92325f..f0d9539aa 100644 --- a/packages/bugc/src/evmgen/optimizer-contexts.test.ts +++ b/packages/bugc/src/evmgen/optimizer-contexts.test.ts @@ -13,9 +13,9 @@ * verifies the expected invoke/return contexts are present * with the right identifiers. TCO is a special case: the * back-edge JUMP that replaces the recursive call carries a - * gather context with BOTH the previous iteration's return - * and the new iteration's invoke, so frame depth stays - * constant across the optimization. + * single flat context with BOTH the previous iteration's + * return and the new iteration's invoke discriminators, so + * frame depth stays constant across the optimization. */ import { describe, it, expect } from "vitest"; @@ -64,7 +64,7 @@ interface CallSiteCounts { /** * JUMP carrying a return context (TCO back-edge, where * the previous iteration's return is paired with the new - * iteration's invoke in a gather). + * iteration's invoke on a single flat context). */ returnJump: Record; } @@ -82,9 +82,13 @@ function unwrapLeaves(ctx: Format.Program.Context): Format.Program.Context[] { /** * Scan a program and count invoke/return contexts by - * instruction type and function identifier. Handles gather - * contexts so TCO's (return + invoke) JUMPs get counted in - * both the invokeJump and returnJump buckets. + * instruction type and function identifier. Each leaf is + * checked for invoke and return independently (not as an + * either/or) so a flat multi-discriminator context — like + * the TCO back-edge JUMP carrying both `invoke` and + * `return` — gets counted in both buckets. Enclosing + * gather wrappers are still unwrapped for defensive + * coverage. */ function countCallSites(program: Format.Program): CallSiteCounts { const counts: CallSiteCounts = { @@ -108,7 +112,8 @@ function countCallSites(program: Format.Program): CallSiteCounts { } else if (mn === "JUMPDEST") { counts.invokeJumpdest[id] = (counts.invokeJumpdest[id] ?? 0) + 1; } - } else if (Context.isReturn(leaf)) { + } + if (Context.isReturn(leaf)) { const id = leaf.return.identifier ?? "?"; if (mn === "JUMPDEST") { counts.returnJumpdest[id] = (counts.returnJumpdest[id] ?? 0) + 1; @@ -122,6 +127,22 @@ function countCallSites(program: Format.Program): CallSiteCounts { return counts; } +/** Count instructions carrying a `transform: ["inline"]` marker. */ +function countInline(program: Format.Program): number { + let n = 0; + for (const instr of program.instructions) { + if (!instr.context) continue; + if ( + unwrapLeaves(instr.context).some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ) + ) { + n += 1; + } + } + return n; +} + describe("optimizer preserves invoke/return contexts", () => { const allLevels: OptLevel[] = [0, 1, 2, 3]; @@ -143,11 +164,23 @@ code { r = add(10, 20); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - // One caller JUMP, one callee JUMPDEST, one - // continuation JUMPDEST — all naming "add". - expect(counts.invokeJump).toEqual({ add: 1 }); - expect(counts.invokeJumpdest).toEqual({ add: 1 }); - expect(counts.returnJumpdest).toEqual({ add: 1 }); + if (level >= 2) { + // `add` is a leaf single-return helper: inlining (L2+) + // replaces the real call with a virtual inline + // activation, so there's no caller JUMP for `add`. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + // One caller JUMP, one callee JUMPDEST, one + // continuation JUMPDEST — all naming "add". + expect(counts.invokeJump).toEqual({ add: 1 }); + expect(counts.invokeJumpdest).toEqual({ add: 1 }); + expect(counts.returnJumpdest).toEqual({ add: 1 }); + } // Behavior is still correct. const result = await executeProgram(source, { @@ -180,9 +213,20 @@ code { r = add(2 + 3, 4 * 5); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ add: 1 }); - expect(counts.invokeJumpdest).toEqual({ add: 1 }); - expect(counts.returnJumpdest).toEqual({ add: 1 }); + if (level >= 2) { + // `add` inlined at L2+ — virtual inline activation, no + // real caller JUMP. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ add: 1 }); + expect(counts.invokeJumpdest).toEqual({ add: 1 }); + expect(counts.returnJumpdest).toEqual({ add: 1 }); + } const result = await executeProgram(source, { calldata: "", @@ -219,9 +263,20 @@ code { const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ dbl: 2 }); - expect(counts.invokeJumpdest).toEqual({ dbl: 1 }); - expect(counts.returnJumpdest).toEqual({ dbl: 2 }); + if (level >= 2) { + // Both `dbl` sites are inlined (leaf single-return) into + // separate virtual activations; no real caller JUMPs. + expect(counts.invokeJump).toEqual({}); + // Inline markers appear on the inlined body; at L3 a + // fully-foldable helper body can be constant-folded to a + // PUSH, dissolving the marker, so only require presence + // at L2. + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ dbl: 2 }); + expect(counts.invokeJumpdest).toEqual({ dbl: 1 }); + expect(counts.returnJumpdest).toEqual({ dbl: 2 }); + } const result = await executeProgram(source, { calldata: "", @@ -344,18 +399,28 @@ code { r = addThree(1, 2, 3); }`; const program = await compileAt(source, level); const counts = countCallSites(program); - expect(counts.invokeJump).toEqual({ - addThree: 1, - add: 2, - }); - expect(counts.invokeJumpdest).toEqual({ - addThree: 1, - add: 1, - }); - expect(counts.returnJumpdest).toEqual({ - addThree: 1, - add: 2, - }); + if (level >= 2) { + // `add` (leaf) inlines into `addThree` at both sites; + // that makes `addThree` itself a leaf, so on a later + // fixpoint iteration it inlines into `main` too. End + // state: no real caller JUMPs — everything is inline + // activations. + expect(counts.invokeJump).toEqual({}); + if (level === 2) expect(countInline(program)).toBeGreaterThan(0); + } else { + expect(counts.invokeJump).toEqual({ + addThree: 1, + add: 2, + }); + expect(counts.invokeJumpdest).toEqual({ + addThree: 1, + add: 1, + }); + expect(counts.returnJumpdest).toEqual({ + addThree: 1, + add: 2, + }); + } const result = await executeProgram(source, { calldata: "", @@ -409,7 +474,7 @@ code { r = check(3, 4); }`; // `count` is tail-recursive: the recursive call is in // return position. At levels 2 and 3, TCO rewrites the // recursive call into a back-edge JUMP. That JUMP - // carries a gather context with BOTH: + // carries a single flat context with BOTH discriminators: // - return: previous iteration's return // - invoke: new iteration's call // @@ -472,30 +537,44 @@ code { r = count(0, 5); }`; // The TCO back-edge JUMP additionally carries a // return context for `count` (the previous - // iteration's return), paired with its invoke in - // a gather. This keeps the debugger's logical - // frame depth constant across the back-edge. + // iteration's return), paired with its invoke on + // a single flat context. This keeps the debugger's + // logical frame depth constant across the + // back-edge. expect(counts.returnJump).toEqual({ count: 1 }); - // The invoke target inside the gather must be - // patched to the actual count entry, not left as - // the placeholder offset 0. This guards against - // patchInvokeTarget failing to walk into gather. + // The TCO back-edge JUMP is the one carrying both + // invoke and return discriminators on the same + // context object. Its invoke target must be patched + // to the actual count entry, not left as the + // placeholder offset 0 — this guards against + // patchInvokeTarget missing flat combined contexts. const tcoJump = program.instructions.find( (instr) => instr.operation?.mnemonic === "JUMP" && instr.context !== undefined && - Context.isGather(instr.context), + Context.isInvoke(instr.context) && + Context.isReturn(instr.context), ); expect(tcoJump).toBeDefined(); - const gather = tcoJump!.context as Format.Program.Context.Gather; - const invokeLeaf = gather.gather.find(Context.isInvoke); - expect(invokeLeaf).toBeDefined(); - const invocation = invokeLeaf!.invoke; + + // The same back-edge JUMP additionally carries a + // `transform: ["tailcall"]` context. This is an + // additive annotation telling debuggers the + // invoke/return pair was realized as a TCO + // back-edge rather than a real frame push/pop. + expect(Context.isTransform(tcoJump!.context)).toBe(true); + expect( + (tcoJump!.context as Format.Program.Context.Transform).transform, + ).toContain("tailcall"); + + const ctx = tcoJump!.context as Format.Program.Context.Invoke; + const invocation = ctx.invoke; expect(Invocation.isInternalCall(invocation)).toBe(true); const internalCall = invocation as Format.Program.Context.Invoke.Invocation.InternalCall; - const invokeTarget = internalCall.target.pointer; + expect(internalCall.target).toBeDefined(); + const invokeTarget = internalCall.target!.pointer; expect(invokeTarget).toBeDefined(); expect( "offset" in invokeTarget ? invokeTarget.offset : undefined, @@ -513,3 +592,65 @@ code { r = count(0, 5); }`; } }); }); + +/** + * Count invoke/return contexts that carry a `declaration` + * (source id + range for the called/returning function's + * declaration). Walks gather wrappers and flat + * multi-discriminator contexts. + */ +function countDeclarations(program: Format.Program): { + invoke: number; + return: number; +} { + let invoke = 0; + let ret = 0; + for (const instr of program.instructions) { + if (!instr.context) continue; + for (const leaf of unwrapLeaves(instr.context)) { + if (Context.isInvoke(leaf) && leaf.invoke.declaration) invoke += 1; + if (Context.isReturn(leaf) && leaf.return.declaration) ret += 1; + } + } + return { invoke, return: ret }; +} + +/** + * Regression: the optimizer must preserve each function's + * `loc`/`sourceId` so evmgen can emit `declaration` source + * ranges on invoke/return contexts. cloneFunction used to + * drop these fields, so every declaration vanished from + * optimization level 1 upward. + */ +describe("optimizer preserves function declaration info", () => { + const source = `name Recur; + +define { + function count(n: uint256, target: uint256) -> uint256 { + if (n < target) { return count(n + 1, target); } + else { return n; } + }; +} + +storage { [0] r: uint256; } +create { r = 0; } +code { r = count(0, 5); }`; + + it("carries declarations at level 0 (baseline)", async () => { + const program = await compileAt(source, 0); + const decls = countDeclarations(program); + expect(decls.invoke).toBeGreaterThan(0); + expect(decls.return).toBeGreaterThan(0); + }); + + for (const level of [1, 2, 3] as const) { + it(`preserves declarations through optimization at level ${level}`, async () => { + const program = await compileAt(source, level); + const decls = countDeclarations(program); + // The optimizer must not strip function loc/sourceId: + // invoke/return contexts still carry declaration ranges. + expect(decls.invoke).toBeGreaterThan(0); + expect(decls.return).toBeGreaterThan(0); + }); + } +}); diff --git a/packages/bugc/src/evmgen/transform-contexts.test.ts b/packages/bugc/src/evmgen/transform-contexts.test.ts new file mode 100644 index 000000000..5fdeee574 --- /dev/null +++ b/packages/bugc/src/evmgen/transform-contexts.test.ts @@ -0,0 +1,114 @@ +/** + * Verifies that optimizer `transform` markers are emitted onto + * the resulting bytecode's debug contexts, on the same + * `runtimeInstructions` path the docs tracer widget consumes. + * + * Level 1 constant folding attaches `transform: ["fold"]` to the + * folded value's instruction; a debugger can then show that a + * PUSH is a compile-time-evaluated constant rather than source + * the user wrote. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +type OptLevel = 0 | 1 | 2 | 3; + +async function compileBytecode(source: string, level: OptLevel) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + `Compilation failed at level ${level}:\n` + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + return result.value.bytecode; +} + +/** Flatten a context into leaves, unwrapping gather/pick. */ +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +/** + * Count instructions in the widget-path array whose context + * (at top level or in any leaf) carries a transform containing + * the given identifier. + */ +function countTransform( + instructions: { debug?: { context?: Format.Program.Context } }[], + id: string, +): number { + let count = 0; + for (const instr of instructions) { + const ctx = instr.debug?.context; + if (!ctx) continue; + const hit = [ctx, ...leaves(ctx)].some( + (c) => Context.isTransform(c) && c.transform.includes(id), + ); + if (hit) count += 1; + } + return count; +} + +describe("optimizer emits fold transform contexts", () => { + // `2 + 3` and `4 * 5` fold to constants at level 1. + const source = `name Fold; + +storage { [0] r: uint256; } +create { r = 0; } +code { r = (2 + 3) * (4 * 5); }`; + + it("emits no fold transform at level 0", async () => { + const bc = await compileBytecode(source, 0); + expect(countTransform(bc.runtimeInstructions, "fold")).toBe(0); + }); + + for (const level of [1, 2, 3] as const) { + it(`emits fold transform at level ${level}`, async () => { + const bc = await compileBytecode(source, level); + expect(countTransform(bc.runtimeInstructions, "fold")).toBeGreaterThan(0); + }); + } +}); + +describe("optimizer emits coalesce transform contexts", () => { + // Two adjacent packed writes of a runtime value to one storage + // slot; read/write merging (level 3) packs them with SHL/OR into + // a single word write. + const source = `name Coalesce; + +define { struct S { a: uint128; b: uint128; }; } +storage { [0] s: S; [1] src: uint256; } +create {} +code { let v = src; s.a = v; s.b = v; }`; + + for (const level of [0, 1, 2] as const) { + it(`emits no coalesce transform at level ${level}`, async () => { + const bc = await compileBytecode(source, level); + expect(countTransform(bc.runtimeInstructions, "coalesce")).toBe(0); + }); + } + + it("emits coalesce transform at level 3", async () => { + const bc = await compileBytecode(source, 3); + expect(countTransform(bc.runtimeInstructions, "coalesce")).toBeGreaterThan( + 0, + ); + }); +}); diff --git a/packages/bugc/src/ir/utils/debug.ts b/packages/bugc/src/ir/utils/debug.ts index 5feb48b9c..13516fe71 100644 --- a/packages/bugc/src/ir/utils/debug.ts +++ b/packages/bugc/src/ir/utils/debug.ts @@ -350,3 +350,36 @@ export function preserveSubInstructionDebug( ...additionalContexts.map((c) => ({ context: c })), ); } + +/** + * Add one or more `transform` optimization markers to a debug + * context, composing them as a flat sibling key alongside any + * existing context discriminators (per the flat-composition + * convention: gather is only for same-key collisions). + * + * Markers are appended to any existing `transform` array on the + * context, so an instruction touched by multiple passes + * accumulates the multiset — e.g. a folded value later merged + * yields `transform: ["fold", "coalesce"]`. + */ +export function addTransform( + debug: Ir.Instruction.Debug | undefined, + ...ids: Format.Program.Context.Transform.Identifier[] +): Ir.Instruction.Debug { + const existing = debug?.context; + + const prior: Format.Program.Context.Transform.Identifier[] = + existing && + "transform" in existing && + Array.isArray((existing as Format.Program.Context.Transform).transform) + ? (existing as Format.Program.Context.Transform).transform + : []; + + const transform = [...prior, ...ids]; + + if (!existing) { + return { context: { transform } }; + } + + return { context: { ...existing, transform } }; +} diff --git a/packages/bugc/src/optimizer/optimizer.ts b/packages/bugc/src/optimizer/optimizer.ts index 5b32a83cc..2165d479a 100644 --- a/packages/bugc/src/optimizer/optimizer.ts +++ b/packages/bugc/src/optimizer/optimizer.ts @@ -230,6 +230,12 @@ export abstract class BaseOptimizationStep implements OptimizationStep { parameters: [...func.parameters], entry: func.entry, blocks: clonedBlocks, + // Preserve declaration source info so evmgen can emit + // `declaration` ranges on invoke/return contexts. + // Dropping these here erased all declarations from + // optimization level 1 upward. + ...(func.loc ? { loc: func.loc } : {}), + ...(func.sourceId !== undefined ? { sourceId: func.sourceId } : {}), }; } diff --git a/packages/bugc/src/optimizer/simple-optimizer.ts b/packages/bugc/src/optimizer/simple-optimizer.ts index b078c9638..7c64642d7 100644 --- a/packages/bugc/src/optimizer/simple-optimizer.ts +++ b/packages/bugc/src/optimizer/simple-optimizer.ts @@ -14,6 +14,7 @@ import { ReturnMergingStep, ReadWriteMergingStep, TailCallOptimizationStep, + InliningStep, } from "./steps/index.js"; /** @@ -58,9 +59,13 @@ function createOptimizationPipeline(level: number): OptimizationStep[] { ); } - // Level 2: Add CSE, tail call optimization, and jump optimization + // Level 2: Add inlining, CSE, tail call optimization, and + // jump optimization. Inlining runs first (after L1 fold) so + // TCO/CSE still apply to inlined code and `["fold","inline"]` + // composes. if (level >= 2) { steps.push( + new InliningStep(), new CommonSubexpressionEliminationStep(), new TailCallOptimizationStep(), new JumpOptimizationStep(), diff --git a/packages/bugc/src/optimizer/steps/constant-folding.ts b/packages/bugc/src/optimizer/steps/constant-folding.ts index 20f99d585..725c7f530 100644 --- a/packages/bugc/src/optimizer/steps/constant-folding.ts +++ b/packages/bugc/src/optimizer/steps/constant-folding.ts @@ -134,7 +134,10 @@ export class ConstantFoldingStep extends BaseOptimizationStep { value: result, type: this.getResultType(inst.op, typeof result), dest: inst.dest, - operationDebug: Ir.Utils.preserveDebug(inst), + operationDebug: Ir.Utils.addTransform( + Ir.Utils.preserveDebug(inst), + "fold", + ), }; } @@ -266,7 +269,10 @@ export class ConstantFoldingStep extends BaseOptimizationStep { value: hashValue, type: Ir.Type.Scalar.bytes32, dest: inst.dest, - operationDebug: Ir.Utils.preserveDebug(inst), + operationDebug: Ir.Utils.addTransform( + Ir.Utils.preserveDebug(inst), + "fold", + ), }; } diff --git a/packages/bugc/src/optimizer/steps/index.ts b/packages/bugc/src/optimizer/steps/index.ts index 75a02833f..640bac63c 100644 --- a/packages/bugc/src/optimizer/steps/index.ts +++ b/packages/bugc/src/optimizer/steps/index.ts @@ -7,3 +7,4 @@ export { BlockMergingStep } from "./block-merging.js"; export { ReturnMergingStep } from "./return-merging.js"; export { ReadWriteMergingStep } from "./read-write-merging.js"; export { TailCallOptimizationStep } from "./tail-call-optimization.js"; +export { InliningStep } from "./inlining.js"; diff --git a/packages/bugc/src/optimizer/steps/inlining.test.ts b/packages/bugc/src/optimizer/steps/inlining.test.ts new file mode 100644 index 000000000..ecd49bcc4 --- /dev/null +++ b/packages/bugc/src/optimizer/steps/inlining.test.ts @@ -0,0 +1,144 @@ +/** + * Behavioral tests for the function-inlining pass (level 2). + * + * Inlining must (a) preserve runtime behavior exactly, and + * (b) emit `transform: ["inline"]` on the inlined body so the + * debugger can reconstruct a virtual activation for the call. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import { executeProgram } from "#test/evm/behavioral"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +async function inlineMarks(source: string, level: 0 | 1 | 2 | 3) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + "compile failed:\n" + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + let count = 0; + for (const instr of result.value.bytecode.runtimeInstructions) { + const ctx = instr.debug?.context; + if (!ctx) continue; + if ( + [ctx, ...leaves(ctx)].some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ) + ) { + count += 1; + } + } + return count; +} + +describe("function inlining (level 2)", () => { + describe("leaf helper, single return", () => { + const source = `name Demo; +define { + function add(a: uint256, b: uint256) -> uint256 { return a + b; }; +} +storage { [0] r: uint256; } +create {} +code { r = add(3, 4); }`; + + it("produces the same result at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(7n); + } + }); + + it("emits no inline marks at level 0", async () => { + expect(await inlineMarks(source, 0)).toBe(0); + }); + + it("emits inline marks at level 2", async () => { + expect(await inlineMarks(source, 2)).toBeGreaterThan(0); + }); + }); + + describe("multiple call sites", () => { + const source = `name Multi; +define { + function dbl(x: uint256) -> uint256 { return x + x; }; +} +storage { [0] r: uint256; } +create { r = 0; } +code { + let a = dbl(5); + let b = dbl(10); + r = a + b; +}`; + + it("inlines every site and stays correct", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(30n); + } + expect(await inlineMarks(source, 2)).toBeGreaterThan(0); + }); + }); + + describe("does not inline into a tail-recursive function (protects TCO)", () => { + // `succ` is a leaf, but inlining it into `count`'s recursive + // call arguments would rewrite `count(succ(n))` into + // `count(n + 1)`, which the tail-call optimizer mishandles. + // The pass must leave recursive/TCO'd callers untouched. + const source = `name TailCall; +define { + function succ(n: uint256) -> uint256 { return n + 1; }; + function count(n: uint256, target: uint256) -> uint256 { + if (n < target) { return count(succ(n), target); } + else { return n; } + }; +} +storage { [0] r: uint256; } +create { r = 0; } +code { r = count(0, 5); }`; + + it("stays correct at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(source, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + expect(await res.getStorage(0n)).toBe(5n); + } + }); + + it("does not inline succ into the recursive count", async () => { + // No inline markers: succ stays a real call so TCO can fire. + expect(await inlineMarks(source, 2)).toBe(0); + }); + }); +}); diff --git a/packages/bugc/src/optimizer/steps/inlining.ts b/packages/bugc/src/optimizer/steps/inlining.ts new file mode 100644 index 000000000..1da050396 --- /dev/null +++ b/packages/bugc/src/optimizer/steps/inlining.ts @@ -0,0 +1,563 @@ +/** + * Function inlining (level 2). + * + * Replaces calls to eligible internal functions with a copy of + * the callee's body spliced into the caller, so no runtime + * JUMP/frame is used. Each inlined instruction is annotated with + * `transform: ["inline"]` and the body is bracketed by a virtual + * invoke/return (identity + declaration, no code target — the + * #213 optional-target signal) so a debugger can reconstruct a + * virtual activation. + * + * v1 eligibility: internal (user-defined), non-recursive callee + * that is either a leaf (calls nothing) or below a small size + * threshold. Applied at all call sites; a callee whose every + * site is inlined is deleted. + */ +import * as Ir from "#ir"; +import type * as Format from "@ethdebug/format"; + +import { + BaseOptimizationStep, + type OptimizationContext, +} from "../optimizer.js"; + +/** Max IR-node count for a non-leaf callee to still inline. Tunable. */ +const INLINE_MAX_IR_NODES = 16; + +export class InliningStep extends BaseOptimizationStep { + name = "inlining"; + private siteCounter = 0; + + run(module: Ir.Module, _context: OptimizationContext): Ir.Module { + const optimized = this.cloneModule(module); + if (!optimized.functions || optimized.functions.size === 0) { + return optimized; + } + + const callGraph = buildCallGraph(optimized); + const eligible = new Set(); + for (const [name, fn] of optimized.functions) { + if (isEligible(name, fn, callGraph)) eligible.add(name); + } + if (eligible.size === 0) return optimized; + + // Track, per callee, how many sites remain un-inlined so we + // can delete fully-inlined callees afterward. + const remainingSites = new Map(); + for (const name of eligible) remainingSites.set(name, 0); + + // Named callers. Self-recursive callers are skipped: they are + // TailCall-optimized later, and inlining a helper into a + // self-recursive call's arguments (e.g. `count(succ(n))` -> + // `count(n + 1)`) rewrites the tail call into a computed-arg + // form that TCO mishandles, silently breaking the recursion. + // Correctness over coverage — inlining into recursive bodies is + // deferred. + const named: [string, Ir.Function][] = [ + ["
", optimized.main], + ...(optimized.create + ? ([["", optimized.create]] as [string, Ir.Function][]) + : []), + ...[...optimized.functions].map( + ([n, f]) => [n, f] as [string, Ir.Function], + ), + ]; + + for (const [callerName, caller] of named) { + // Self-recursive (pre-TCO) or already TCO'd (post-TCO, its + // self-call is now a jump-with-tailCall so the call graph no + // longer shows the recursion). Either way, don't inline into + // it. + if (reachableCallees(callerName, callGraph).has(callerName)) continue; + if (hasTailCallBackedge(caller)) continue; + this.inlineIntoFunction(caller, optimized, eligible, remainingSites); + } + + // Delete callees that no longer have any call site anywhere. + for (const name of eligible) { + if (!isCalledAnywhere(name, optimized)) { + optimized.functions.delete(name); + } + } + + return optimized; + } + + private inlineIntoFunction( + caller: Ir.Function, + module: Ir.Module, + eligible: Set, + _remainingSites: Map, + ): void { + // Snapshot block ids up front; we mutate the map as we splice. + let changed = true; + // Guard against pathological loops. + let guard = 0; + while (changed && guard++ < 1000) { + changed = false; + for (const [blockId, block] of caller.blocks) { + const term = block.terminator; + if (term.kind !== "call") continue; + if (!eligible.has(term.function)) continue; + const callee = module.functions?.get(term.function); + if (!callee) continue; + // Don't inline a function into itself. + if (callee === caller) continue; + + this.spliceCall(caller, blockId, block, term, callee); + changed = true; + break; // block map mutated — restart the scan + } + } + } + + private spliceCall( + caller: Ir.Function, + callBlockId: string, + callBlock: Ir.Block, + call: Extract, + callee: Ir.Function, + ): void { + const site = this.siteCounter++; + const prefix = `inl${site}$`; + + // --- build rename maps --- + const blockRename = new Map(); + for (const id of callee.blocks.keys()) { + blockRename.set(id, prefix + id); + } + + // param temp id -> bound argument Value + const paramSubst = new Map(); + callee.parameters.forEach((p, i) => { + const arg = call.arguments[i]; + if (arg) paramSubst.set(p.tempId, arg); + }); + + // every non-param temp defined in the callee -> fresh id + const idRename = new Map(); + for (const b of callee.blocks.values()) { + for (const phi of b.phis ?? []) rename(phi.dest); + for (const inst of b.instructions) { + if ("dest" in inst && typeof inst.dest === "string") { + rename(inst.dest); + } + } + } + function rename(id: string): void { + if (paramSubst.has(id)) return; + if (!idRename.has(id)) idRename.set(id, prefix + id); + } + + const remapValue = (v: Ir.Value): Ir.Value => { + if (v.kind !== "temp") return v; + const sub = paramSubst.get(v.id); + if (sub) return sub; + const nid = idRename.get(v.id); + return nid ? { ...v, id: nid } : v; + }; + + // Declaration for the callee (for the virtual invoke/return). + const declaration = + callee.loc && callee.sourceId + ? { source: { id: callee.sourceId }, range: callee.loc } + : undefined; + + const inlineInvoke: Format.Program.Context.Invoke["invoke"] = { + jump: true, + identifier: callee.name, + ...(declaration ? { declaration } : {}), + // no `target` — JUMP is elided (virtual activation) + }; + const inlineReturn: Format.Program.Context.Return["return"] = { + identifier: callee.name, + ...(declaration ? { declaration } : {}), + }; + + const entryBlockId = blockRename.get(callee.entry)!; + const returnBlockIds: string[] = []; + + // --- clone + remap callee blocks --- + for (const [origId, origBlock] of callee.blocks) { + const newId = blockRename.get(origId)!; + const isEntry = origId === callee.entry; + + const instructions: Ir.Instruction[] = origBlock.instructions.map( + (inst, idx) => { + const cloned = remapInstruction(inst, remapValue, idRename); + // Mark every inlined instruction for membership. + cloned.operationDebug = addInlineMarker(cloned.operationDebug); + // Virtual invoke on the first instruction of the entry. + if (isEntry && idx === 0) { + cloned.operationDebug = mergeDiscriminator( + cloned.operationDebug, + "invoke", + inlineInvoke, + ); + } + return cloned; + }, + ); + + const phis: Ir.Block.Phi[] = (origBlock.phis ?? []).map((phi) => + remapPhi(phi, remapValue, idRename, blockRename), + ); + + let terminator: Ir.Block.Terminator; + const t = origBlock.terminator; + if (t.kind === "return") { + returnBlockIds.push(newId); + // Virtual return marker on the last body instruction of + // this block (or a synthetic carrier if the block is empty + // is not needed — return blocks always have ≥1 emitted + // instruction in practice; if empty, the marker rides the + // jump's debug below). + if (instructions.length > 0) { + const last = instructions[instructions.length - 1]; + last.operationDebug = mergeDiscriminator( + last.operationDebug, + "return", + inlineReturn, + ); + } + // return -> jump to the caller's continuation + terminator = { + kind: "jump", + target: call.continuation, + operationDebug: addInlineMarker( + mergeDiscriminator({}, "return", inlineReturn), + ), + }; + } else { + terminator = remapTerminator(t, remapValue, blockRename); + } + + caller.blocks.set(newId, { + id: newId, + phis, + instructions, + terminator, + predecessors: new Set(), + debug: origBlock.debug, + }); + } + + // --- wire the single return value into the caller --- + // v1 eligibility guarantees exactly one return. Substitute the + // call's dest temp with the (remapped) returned value across the + // whole caller — no phi, so it's robust to L3 block-merging. + if (call.dest) { + const returns = collectReturns(callee, blockRename, remapValue); + if (returns.length === 1) { + substituteTemp(caller, call.dest, returns[0].value); + } + } + + // --- rewire the calling block: call -> jump into inlined entry --- + callBlock.terminator = { + kind: "jump", + target: entryBlockId, + operationDebug: call.operationDebug, + }; + + void callBlockId; + recomputePredecessors(caller); + } +} + +// ---- helpers ---- + +function collectReturns( + callee: Ir.Function, + blockRename: Map, + remapValue: (v: Ir.Value) => Ir.Value, +): { block: string; value: Ir.Value }[] { + const out: { block: string; value: Ir.Value }[] = []; + for (const [origId, b] of callee.blocks) { + if (b.terminator.kind === "return" && b.terminator.value) { + out.push({ + block: blockRename.get(origId)!, + value: remapValue(b.terminator.value), + }); + } + } + return out; +} + +function buildCallGraph(module: Ir.Module): Map> { + const graph = new Map>(); + const fns: [string, Ir.Function][] = [ + ["
", module.main], + ...(module.create + ? ([["", module.create]] as [string, Ir.Function][]) + : []), + ...[...(module.functions ?? new Map())].map( + ([n, f]) => [n, f] as [string, Ir.Function], + ), + ]; + for (const [name, fn] of fns) { + const callees = new Set(); + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "call") callees.add(b.terminator.function); + } + graph.set(name, callees); + } + return graph; +} + +function reachableCallees( + start: string, + graph: Map>, +): Set { + const seen = new Set(); + const stack = [...(graph.get(start) ?? [])]; + while (stack.length) { + const n = stack.pop()!; + if (seen.has(n)) continue; + seen.add(n); + for (const c of graph.get(n) ?? []) stack.push(c); + } + return seen; +} + +function functionSize(fn: Ir.Function): number { + let n = 0; + for (const b of fn.blocks.values()) { + n += b.instructions.length + 1; // + terminator + n += (b.phis ?? []).length; + } + return n; +} + +function hasTailCallBackedge(fn: Ir.Function): boolean { + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "jump" && b.terminator.tailCall) return true; + } + return false; +} + +function returnCount(fn: Ir.Function): number { + let n = 0; + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "return") n += 1; + } + return n; +} + +function isEligible( + name: string, + fn: Ir.Function, + graph: Map>, +): boolean { + const callees = graph.get(name) ?? new Set(); + // Non-recursive: name not reachable from itself. + if (reachableCallees(name, graph).has(name)) return false; + // v1: single return point only. Multi-return needs a phi at the + // continuation, which block-merging (L3) can turn into an + // invalid self-referential phi; deferred until that's handled. + if (returnCount(fn) !== 1) return false; + // Never inline a TCO-transformed function: after TailCall + // optimization a self-recursive function's back-edge becomes a + // `jump` with `tailCall`, which makes it look like a leaf + // single-return function on the next fixpoint iteration. Inlining + // it would clobber the tailcall showcase. A `tailCall` back-edge + // marks it as recursion, not a real leaf. + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "jump" && b.terminator.tailCall) return false; + } + // v1: leaf callees only. Inlining a non-leaf callee whose own + // (eligible) calls also inline exposes a dest-substitution + // ordering bug in the nested chain; deferred. The size-threshold + // branch is kept for when that lands. + const isLeaf = callees.size === 0; + const smallEnough = functionSize(fn) <= INLINE_MAX_IR_NODES; + void smallEnough; + return isLeaf; +} + +function isCalledAnywhere(name: string, module: Ir.Module): boolean { + const fns = [ + module.main, + ...(module.create ? [module.create] : []), + ...(module.functions?.values() ?? []), + ]; + for (const fn of fns) { + for (const b of fn.blocks.values()) { + if (b.terminator.kind === "call" && b.terminator.function === name) { + return true; + } + } + } + return false; +} + +/** Deep-clone an instruction, remapping temp values and its dest. */ +function remapInstruction( + inst: Ir.Instruction, + remapValue: (v: Ir.Value) => Ir.Value, + idRename: Map, +): Ir.Instruction { + const cloned = structuredCloneValues(inst) as Ir.Instruction & { + dest?: string; + }; + remapValuesInPlace(cloned, remapValue); + if (typeof cloned.dest === "string") { + cloned.dest = idRename.get(cloned.dest) ?? cloned.dest; + } + return cloned; +} + +function remapPhi( + phi: Ir.Block.Phi, + remapValue: (v: Ir.Value) => Ir.Value, + idRename: Map, + blockRename: Map, +): Ir.Block.Phi { + const sources = new Map(); + for (const [pred, val] of phi.sources) { + sources.set(blockRename.get(pred) ?? pred, remapValue(val)); + } + return { + ...phi, + dest: idRename.get(phi.dest) ?? phi.dest, + sources, + }; +} + +function remapTerminator( + t: Ir.Block.Terminator, + remapValue: (v: Ir.Value) => Ir.Value, + blockRename: Map, +): Ir.Block.Terminator { + switch (t.kind) { + case "jump": + return { ...t, target: blockRename.get(t.target) ?? t.target }; + case "branch": + return { + ...t, + condition: remapValue(t.condition), + trueTarget: blockRename.get(t.trueTarget) ?? t.trueTarget, + falseTarget: blockRename.get(t.falseTarget) ?? t.falseTarget, + }; + case "return": + return { ...t, value: t.value ? remapValue(t.value) : undefined }; + case "call": + return { + ...t, + arguments: t.arguments.map(remapValue), + dest: t.dest, + continuation: blockRename.get(t.continuation) ?? t.continuation, + }; + } +} + +/** Deep clone that preserves nested plain objects and bigints. */ +function structuredCloneValues(obj: T): T { + return structuredClone(obj); +} + +/** Recursively rewrite any temp Value in place. */ +function remapValuesInPlace( + node: unknown, + remapValue: (v: Ir.Value) => Ir.Value, +): void { + if (!node || typeof node !== "object") return; + const obj = node as Record; + for (const key of Object.keys(obj)) { + const child = obj[key]; + if ( + child && + typeof child === "object" && + (child as { kind?: string }).kind === "temp" && + typeof (child as { id?: unknown }).id === "string" + ) { + obj[key] = remapValue(child as Ir.Value); + } else if (Array.isArray(child)) { + child.forEach((el, i) => { + if ( + el && + typeof el === "object" && + (el as { kind?: string }).kind === "temp" + ) { + child[i] = remapValue(el as Ir.Value); + } else { + remapValuesInPlace(el, remapValue); + } + }); + } else if (child && typeof child === "object") { + remapValuesInPlace(child, remapValue); + } + } +} + +/** Replace every use of temp `id` with `value` across the function. */ +function substituteTemp(fn: Ir.Function, id: string, value: Ir.Value): void { + const sub = (v: Ir.Value): Ir.Value => + v.kind === "temp" && v.id === id ? value : v; + for (const b of fn.blocks.values()) { + for (const inst of b.instructions) { + remapValuesInPlace(inst, sub); + } + for (const phi of b.phis ?? []) { + for (const [pred, val] of phi.sources) { + phi.sources.set(pred, sub(val)); + } + } + b.terminator = remapTerminator(b.terminator, sub, new Map()); + } +} + +function recomputePredecessors(fn: Ir.Function): void { + for (const b of fn.blocks.values()) b.predecessors = new Set(); + for (const [id, b] of fn.blocks) { + const t = b.terminator; + const targets: string[] = + t.kind === "jump" + ? [t.target] + : t.kind === "branch" + ? [t.trueTarget, t.falseTarget] + : t.kind === "call" + ? [t.continuation] + : []; + for (const tgt of targets) { + fn.blocks.get(tgt)?.predecessors.add(id); + } + } +} + +// ---- debug-context composition ---- + +function addInlineMarker( + debug: Ir.Instruction.Debug | undefined, +): Ir.Instruction.Debug { + return Ir.Utils.addTransform(debug, "inline"); +} + +/** + * Attach a discriminator (invoke/return) as a flat sibling key on + * a debug context, threading into a gather leaf if present so the + * marker never sits as a sibling of `gather`. + */ +function mergeDiscriminator( + debug: Ir.Instruction.Debug, + key: "invoke" | "return", + value: unknown, +): Ir.Instruction.Debug { + const existing = debug.context as Record | undefined; + if (existing && "gather" in existing && Array.isArray(existing.gather)) { + // Add as a new gather child rather than a sibling of gather. + return { + context: { + ...existing, + gather: [...(existing.gather as unknown[]), { [key]: value }], + } as Format.Program.Context, + }; + } + return { + context: { + ...(existing ?? {}), + [key]: value, + } as Format.Program.Context, + }; +} diff --git a/packages/bugc/src/optimizer/steps/read-write-merging.ts b/packages/bugc/src/optimizer/steps/read-write-merging.ts index 1ac2ecde5..cb24ba1c7 100644 --- a/packages/bugc/src/optimizer/steps/read-write-merging.ts +++ b/packages/bugc/src/optimizer/steps/read-write-merging.ts @@ -368,6 +368,18 @@ export class ReadWriteMergingStep extends BaseOptimizationStep { reason: `Merged ${writes.length} writes to same location`, }); + // Mark every instruction produced by the merge with + // transform:["coalesce"] so debuggers can show the SHL/OR + // field-packing sequence as compiler-synthesized rather than + // source the user wrote. Appends to any existing transform + // array (e.g. a folded packed value → ["fold","coalesce"]). + for (const inst of instructions) { + inst.operationDebug = Ir.Utils.addTransform( + inst.operationDebug, + "coalesce", + ); + } + return instructions; } diff --git a/packages/format/src/types/program/context.test.ts b/packages/format/src/types/program/context.test.ts index 4470a322d..55a716181 100644 --- a/packages/format/src/types/program/context.test.ts +++ b/packages/format/src/types/program/context.test.ts @@ -6,6 +6,10 @@ testSchemaGuards("ethdebug/format/program/context", [ schema: "schema:ethdebug/format/program/context", guard: isContext, }, + { + schema: "schema:ethdebug/format/program/context/name", + guard: Context.isName, + }, { schema: "schema:ethdebug/format/program/context/code", guard: Context.isCode, @@ -46,4 +50,8 @@ testSchemaGuards("ethdebug/format/program/context", [ schema: "schema:ethdebug/format/program/context/function/revert", guard: Context.isRevert, }, + { + schema: "schema:ethdebug/format/program/context/transform", + guard: Context.isTransform, + }, ] as const); diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts index 104f27196..7c4e4ea4c 100644 --- a/packages/format/src/types/program/context.ts +++ b/packages/format/src/types/program/context.ts @@ -3,6 +3,7 @@ import { Type } from "#types/type"; import { Pointer, isPointer } from "#types/pointer"; export type Context = + | Context.Name | Context.Code | Context.Variables | Context.Remark @@ -11,10 +12,12 @@ export type Context = | Context.Frame | Context.Invoke | Context.Return - | Context.Revert; + | Context.Revert + | Context.Transform; export const isContext = (value: unknown): value is Context => [ + Context.isName, Context.isCode, Context.isVariables, Context.isRemark, @@ -24,9 +27,20 @@ export const isContext = (value: unknown): value is Context => Context.isInvoke, Context.isReturn, Context.isRevert, + Context.isTransform, ].some((guard) => guard(value)); export namespace Context { + export interface Name { + name: string; + } + + export const isName = (value: unknown): value is Name => + typeof value === "object" && + !!value && + "name" in value && + typeof value.name === "string"; + export interface Code { code: Materials.SourceRange; } @@ -171,7 +185,7 @@ export namespace Context { export namespace Invocation { export interface InternalCall extends Function.Identity { jump: true; - target: Function.PointerRef; + target?: Function.PointerRef; arguments?: Function.PointerRef; } @@ -180,8 +194,7 @@ export namespace Context { !!value && "jump" in value && value.jump === true && - "target" in value && - Function.isPointerRef(value.target) && + (!("target" in value) || Function.isPointerRef(value.target)) && (!("arguments" in value) || Function.isPointerRef(value.arguments)); export interface ExternalCall extends Function.Identity { @@ -274,4 +287,30 @@ export namespace Context { (!("reason" in value) || Function.isPointerRef(value.reason)) && (!("panic" in value) || typeof value.panic === "number"); } + + export interface Transform { + transform: Transform.Identifier[]; + } + + export const isTransform = (value: unknown): value is Transform => + typeof value === "object" && + !!value && + "transform" in value && + Array.isArray(value.transform) && + value.transform.length > 0 && + value.transform.every( + (item) => typeof item === "string" && item.length > 0, + ); + + export namespace Transform { + // Recognized v1 identifiers. Unknown strings are permitted + // (the identifier set is extensible); the union preserves + // autocomplete for known values. + export type Identifier = + | "inline" + | "tailcall" + | "fold" + | "coalesce" + | (string & {}); + } } diff --git a/packages/programs-react/src/components/CallInfoPanel.css b/packages/programs-react/src/components/CallInfoPanel.css index 75cd06511..b2835861c 100644 --- a/packages/programs-react/src/components/CallInfoPanel.css +++ b/packages/programs-react/src/components/CallInfoPanel.css @@ -27,6 +27,12 @@ border-left: 3px solid var(--programs-revert-accent, #cf222e); } +.call-info-banner-tailcall { + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border-left: 3px solid var(--programs-transform-accent, #a475f9); +} + .call-info-refs { display: flex; flex-direction: column; diff --git a/packages/programs-react/src/components/CallInfoPanel.tsx b/packages/programs-react/src/components/CallInfoPanel.tsx index 09da2b3d7..e0e10ed2b 100644 --- a/packages/programs-react/src/components/CallInfoPanel.tsx +++ b/packages/programs-react/src/components/CallInfoPanel.tsx @@ -29,6 +29,10 @@ function formatBanner(info: ResolvedCallInfo): string { ? `(${info.argumentNames.join(", ")})` : "()"; + if (info.isTailCall) { + return `Tail call: ${name} (frame reused)`; + } + if (info.kind === "invoke") { const prefix = info.callType === "external" @@ -50,11 +54,14 @@ function formatBanner(info: ResolvedCallInfo): string { return `Reverted in ${name}()`; } -function bannerClassName(kind: ResolvedCallInfo["kind"]): string { - if (kind === "invoke") { +function bannerClassName(info: ResolvedCallInfo): string { + if (info.isTailCall) { + return "call-info-banner-tailcall"; + } + if (info.kind === "invoke") { return "call-info-banner-invoke"; } - if (kind === "return") { + if (info.kind === "return") { return "call-info-banner-return"; } return "call-info-banner-revert"; @@ -76,9 +83,7 @@ export function CallInfoPanel({ return (
-
+
{formatBanner(currentCallInfo)}
diff --git a/packages/programs-react/src/components/CallStackDisplay.css b/packages/programs-react/src/components/CallStackDisplay.css index 9143b8d76..33790e6a2 100644 --- a/packages/programs-react/src/components/CallStackDisplay.css +++ b/packages/programs-react/src/components/CallStackDisplay.css @@ -48,3 +48,23 @@ .call-stack-parens { color: var(--programs-text-muted, #888); } + +.call-stack-tailcall, +.call-stack-inline { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 0.8em; + font-weight: 500; + white-space: nowrap; + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border: 1px solid var(--programs-transform-accent, #a475f9); +} + +/* Virtual (inline) activations use a dashed border to read as + "not a real frame" while sharing the transform palette. */ +.call-stack-inline { + border-style: dashed; + font-style: italic; +} diff --git a/packages/programs-react/src/components/CallStackDisplay.tsx b/packages/programs-react/src/components/CallStackDisplay.tsx index 09e2bf7aa..f8dfc4b05 100644 --- a/packages/programs-react/src/components/CallStackDisplay.tsx +++ b/packages/programs-react/src/components/CallStackDisplay.tsx @@ -94,6 +94,22 @@ export function CallStackDisplay({ ({formatArgs(frame, resolvedCallStack)}) + {frame.isTailCall && ( + + ⮌ tail call + + )} + {frame.isInline && ( + + ⧉ inline + + )} ))} diff --git a/packages/programs-react/src/components/TraceContext.tsx b/packages/programs-react/src/components/TraceContext.tsx index 581b72143..795f3f2f4 100644 --- a/packages/programs-react/src/components/TraceContext.tsx +++ b/packages/programs-react/src/components/TraceContext.tsx @@ -118,6 +118,8 @@ export interface ResolvedCallInfo { panic?: number; /** Resolved pointer refs */ pointerRefs: ResolvedPointerRef[]; + /** True when a tailcall transform is present (TCO). */ + isTailCall?: boolean; } /** @@ -136,6 +138,8 @@ export interface ResolvedCallFrame { value?: string; error?: string; }>; + /** True when this frame was (re)entered via a tail call. */ + isTailCall?: boolean; } /** @@ -382,6 +386,7 @@ export function TraceProvider({ identifier: frame.identifier, stepIndex: frame.stepIndex, callType: frame.callType, + isTailCall: frame.isTailCall, resolvedArgs: argCacheRef.current.get(frame.stepIndex), })); setResolvedCallStack(initial); @@ -477,6 +482,7 @@ export function TraceProvider({ callType: extractedCallInfo.callType, argumentNames: extractedCallInfo.argumentNames, panic: extractedCallInfo.panic, + isTailCall: extractedCallInfo.isTailCall, pointerRefs: extractedCallInfo.pointerRefs.map((ref) => ({ label: ref.label, pointer: ref.pointer, diff --git a/packages/programs-react/src/index.ts b/packages/programs-react/src/index.ts index 6253933c2..8d8610771 100644 --- a/packages/programs-react/src/index.ts +++ b/packages/programs-react/src/index.ts @@ -59,6 +59,7 @@ export { findInstructionAtPc, extractVariablesFromInstruction, extractCallInfoFromInstruction, + extractTransformFromInstruction, buildPcToInstructionMap, buildCallStack, type CallInfo, diff --git a/packages/programs-react/src/utils/index.ts b/packages/programs-react/src/utils/index.ts index a79f07b08..e6dfdbefb 100644 --- a/packages/programs-react/src/utils/index.ts +++ b/packages/programs-react/src/utils/index.ts @@ -18,6 +18,7 @@ export { findInstructionAtPc, extractVariablesFromInstruction, extractCallInfoFromInstruction, + extractTransformFromInstruction, buildPcToInstructionMap, buildCallStack, type TraceStep, diff --git a/packages/programs-react/src/utils/mockTrace.test.ts b/packages/programs-react/src/utils/mockTrace.test.ts new file mode 100644 index 000000000..ee223ecb4 --- /dev/null +++ b/packages/programs-react/src/utils/mockTrace.test.ts @@ -0,0 +1,628 @@ +/** + * Tests for trace context extraction, transform (tailcall) + * detection, and call-stack construction. + */ + +import { describe, it, expect } from "vitest"; +import type { Program } from "@ethdebug/format"; +import { + extractTransformFromInstruction, + extractCallInfoFromInstruction, + extractCallEvents, + buildCallStack, + buildPcToInstructionMap, + type TraceStep, +} from "./mockTrace.js"; + +/** Build a minimal instruction with a context at an offset. */ +function instr(offset: number, context: unknown): Program.Instruction { + return { + offset, + operation: { mnemonic: "JUMPDEST", arguments: [] }, + context, + } as unknown as Program.Instruction; +} + +describe("extractTransformFromInstruction", () => { + it("returns identifiers from a direct transform context", () => { + const i = instr(0, { transform: ["tailcall"] }); + expect(extractTransformFromInstruction(i)).toEqual(["tailcall"]); + }); + + it("finds transform identifiers nested inside a gather", () => { + const i = instr(0, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }); + expect(extractTransformFromInstruction(i)).toEqual(["tailcall"]); + }); + + it("collects multiple identifiers across nested contexts", () => { + const i = instr(0, { + gather: [{ transform: ["inline"] }, { transform: ["tailcall"] }], + }); + expect(extractTransformFromInstruction(i).sort()).toEqual([ + "inline", + "tailcall", + ]); + }); + + it("returns an empty array when no transform is present", () => { + const i = instr(0, { invoke: { jump: true, identifier: "sum" } }); + expect(extractTransformFromInstruction(i)).toEqual([]); + }); +}); + +describe("extractCallInfoFromInstruction tailcall flag", () => { + it("marks isTailCall when a tailcall transform is present", () => { + const i = instr(0, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }); + const info = extractCallInfoFromInstruction(i); + expect(info?.isTailCall).toBe(true); + }); + + it("leaves isTailCall falsy for a plain invoke", () => { + const i = instr(0, { invoke: { jump: true, identifier: "sum" } }); + const info = extractCallInfoFromInstruction(i); + expect(info?.isTailCall).toBeFalsy(); + }); +}); + +describe("buildCallStack TCO frame replacement", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, // entry invoke → push sum + { pc: 10, opcode: "JUMP" }, // TCO back-edge → replace frame + ]; + + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, { + gather: [ + { return: { identifier: "sum" } }, + { invoke: { jump: true, identifier: "sum" } }, + { transform: ["tailcall"] }, + ], + }), + ], + } as unknown as Program; + + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps the stack depth stable across a tail call", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + // Without the fix, the return-first gather pops to empty. + expect(stack).toHaveLength(1); + }); + + it("replaces the top frame and marks it as a tail call", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack[0].identifier).toBe("sum"); + expect(stack[0].isTailCall).toBe(true); + expect(stack[0].stepIndex).toBe(1); + }); + + it("does not mark a normal (pre-tailcall) frame", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].isTailCall).toBeFalsy(); + }); +}); + +// The compiler (bugc #217) emits the TCO back-edge as a +// single FLAT context object carrying return + invoke + +// transform keys together (not a gather). This is the +// actual production shape, so it needs direct coverage. +describe("flat (production) TCO back-edge shape", () => { + const flatBackEdge = { + return: { identifier: "sum" }, + invoke: { + jump: true, + identifier: "sum", + target: { pointer: { location: "code", offset: 0, length: 1 } }, + }, + transform: ["tailcall"], + }; + + it("extracts the tailcall transform from the flat object", () => { + expect(extractTransformFromInstruction(instr(0, flatBackEdge))).toEqual([ + "tailcall", + ]); + }); + + it("marks isTailCall on the flat back-edge", () => { + const info = extractCallInfoFromInstruction(instr(0, flatBackEdge)); + expect(info?.isTailCall).toBe(true); + }); + + it("replaces the frame in place for a flat back-edge", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, + { pc: 10, opcode: "JUMP" }, + ]; + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, flatBackEdge), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("sum"); + expect(stack[0].isTailCall).toBe(true); + expect(stack[0].callType).toBe("internal"); + }); + + it("loses tail-call handling when the marker is stripped", () => { + // Guards the task #10 failure mode: with the transform + // marker gone, the flat {return, invoke} back-edge is + // treated as a plain invoke — the frame-replacement path + // never runs, so no frame is flagged as a tail call and + // the widget can no longer render it as a frame reuse. + const stripped = { + return: { identifier: "sum" }, + invoke: { jump: true, identifier: "sum" }, + }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMPDEST" }, + { pc: 10, opcode: "JUMP" }, + ]; + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "sum" } }), + instr(10, stripped), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack.some((f) => f.isTailCall)).toBe(false); + }); +}); + +// Inlined internal calls (level-2 `inline` transform) produce +// VIRTUAL activations, not real ones. The compiler brackets an +// inlined body with a virtual invoke on the entry-first +// instruction and a virtual return on the exit-last instruction; +// every inlined instruction carries transform:["inline"]. The +// call stack reconstructs the virtual frame via close-after +// push/pop (a frame is visible AT its return-bearing instruction +// and popped on advance), tags it, and — belt-and-suspenders — +// tears down any trailing virtual frame the moment execution +// reaches an instruction whose inline-marker count is below the +// open virtual depth. So it reads distinctly from a real call and +// never leaks a phantom frame into caller code. +describe("inline virtual activations", () => { + const entryInvoke = { + code: { source: { id: "0" }, range: { offset: 0, length: 1 } }, + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + }; + const bodyMark = { + code: { source: { id: "0" }, range: { offset: 1, length: 1 } }, + transform: ["inline"], + }; + const exitReturn = { + code: { source: { id: "0" }, range: { offset: 2, length: 1 } }, + transform: ["inline"], + return: { identifier: "dbl" }, + }; + const callerMark = { + code: { source: { id: "0" }, range: { offset: 3, length: 1 } }, + }; + // A body that emits to a single EVM op: invoke and return + // co-locate on one instruction (the degenerate bracketed case). + const singleOpBody = { + code: { source: { id: "0" }, range: { offset: 0, length: 1 } }, + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + return: { identifier: "dbl" }, + }; + + describe("extractCallInfoFromInstruction inline flag", () => { + it("marks isInline on a virtual (inline) invoke", () => { + const info = extractCallInfoFromInstruction(instr(0, entryInvoke)); + expect(info?.kind).toBe("invoke"); + expect(info?.isInline).toBe(true); + }); + + it("marks isInline on a virtual (inline) return", () => { + const info = extractCallInfoFromInstruction(instr(0, exitReturn)); + expect(info?.kind).toBe("return"); + expect(info?.isInline).toBe(true); + }); + + it("leaves isInline falsy for a plain (real) invoke", () => { + const info = extractCallInfoFromInstruction( + instr(0, { invoke: { jump: true, identifier: "dbl" } }), + ); + expect(info?.isInline).toBeFalsy(); + }); + }); + + describe("extractCallEvents exposes both discriminators", () => { + it("returns invoke then return, in order, for a co-located context", () => { + const events = extractCallEvents(instr(0, singleOpBody)); + expect(events.map((e) => e.kind)).toEqual(["invoke", "return"]); + expect(events.every((e) => e.isInline)).toBe(true); + }); + + it("returns a single invoke event for a pure invoke", () => { + const events = extractCallEvents(instr(0, entryInvoke)); + expect(events.map((e) => e.kind)).toEqual(["invoke"]); + }); + + it("returns a single return event for a pure return", () => { + const events = extractCallEvents(instr(0, exitReturn)); + expect(events.map((e) => e.kind)).toEqual(["return"]); + }); + }); + + describe("buildCallStack virtual frame lifetime (close-after)", () => { + // A single inlined body: entry / body / exit / caller. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // entry invoke → push virtual dbl + { pc: 1, opcode: "ADD" }, // inlined body instruction + { pc: 2, opcode: "MSTORE" }, // exit return (still inside frame) + { pc: 3, opcode: "JUMPDEST" }, // caller code (frame gone) + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, bodyMark), + instr(2, exitReturn), + instr(3, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("pushes a virtual frame tagged isInline at the entry", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("dbl"); + expect(stack[0].isInline).toBe(true); + }); + + it("keeps the virtual frame open across the inlined body", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("still shows the frame AT the exit return (close-after)", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("pops the frame once execution advances past the return", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(0); + }); + }); + + describe("single-op inlined body (co-located invoke+return)", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // the whole body: invoke+return + { pc: 1, opcode: "JUMPDEST" }, // caller code + ]; + const program = { + instructions: [instr(0, singleOpBody), instr(1, callerMark)], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows the virtual frame AT the single body op", () => { + const stack = buildCallStack(trace, pcToInstruction, 0); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + }); + + it("pops after advancing off the body op", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(0); + }); + }); + + describe("two gap-separated inline sites of the same helper", () => { + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // site 1 entry + { pc: 2, opcode: "MSTORE" }, // site 1 exit + { pc: 3, opcode: "JUMPDEST" }, // caller gap (no inline) + { pc: 10, opcode: "PUSH1" }, // site 2 entry + { pc: 12, opcode: "MSTORE" }, // site 2 exit + { pc: 13, opcode: "JUMPDEST" }, // caller + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(2, exitReturn), + instr(3, callerMark), + instr(10, entryInvoke), + instr(12, exitReturn), + instr(13, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows depth 1 while inside the second body", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + expect(stack[0].stepIndex).toBe(3); + }); + + it("is empty after both sites — no accumulation", () => { + const stack = buildCallStack(trace, pcToInstruction, 5); + expect(stack).toHaveLength(0); + }); + }); + + describe("two ADJACENT inline sites split by the return", () => { + // No caller gap between sites: the return marker (not the + // membership guard, which can't see a boundary between two + // inline-marked instructions) is what closes site 1 before + // site 2 opens. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // site 1 entry + { pc: 1, opcode: "MSTORE" }, // site 1 exit + { pc: 2, opcode: "PUSH1" }, // site 2 entry (immediately) + { pc: 3, opcode: "MSTORE" }, // site 2 exit + { pc: 5, opcode: "JUMPDEST" }, // caller + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, exitReturn), + instr(2, entryInvoke), + instr(3, exitReturn), + instr(5, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("does not merge or accumulate — one frame, rooted at site 2", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(1); + expect(stack[0].stepIndex).toBe(2); + }); + + it("is empty after both sites", () => { + expect(buildCallStack(trace, pcToInstruction, 4)).toHaveLength(0); + }); + }); + + describe("marker-keyed dedup", () => { + // A real call and an inlined body of the SAME name on + // consecutive steps must NOT be merged by the caller-JUMP / + // callee-JUMPDEST dedup — they are distinct activations. + const realInvoke = { invoke: { jump: true, identifier: "dbl" } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMP" }, // real invoke of dbl + { pc: 1, opcode: "PUSH1" }, // virtual (inline) invoke of dbl + ]; + const program = { + instructions: [instr(0, realInvoke), instr(1, entryInvoke)], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps a real and a virtual dbl as two separate frames", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(2); + expect(stack[0].isInline).toBeFalsy(); + expect(stack[1].isInline).toBe(true); + }); + }); + + describe("nested inlining (double inline marker)", () => { + // Helper A inlined into helper B which is itself inlined: + // A's body instructions are members of both bodies and carry + // transform:["inline","inline"]. Two virtual frames stack; the + // inner returns first, leaving the outer. + const entryB = { + transform: ["inline"], + invoke: { jump: true, identifier: "B" }, + }; + const entryA = { + transform: ["inline", "inline"], + invoke: { jump: true, identifier: "A" }, + }; + const exitA = { + transform: ["inline", "inline"], + return: { identifier: "A" }, + }; + const bodyB = { transform: ["inline"] }; // back to just B's body + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // enter B + { pc: 1, opcode: "PUSH1" }, // enter A (inside B) + { pc: 2, opcode: "MSTORE" }, // exit A + { pc: 3, opcode: "ADD" }, // back in B only + ]; + const program = { + instructions: [ + instr(0, entryB), + instr(1, entryA), + instr(2, exitA), + instr(3, bodyB), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("stacks two virtual frames inside the inner body", () => { + const stack = buildCallStack(trace, pcToInstruction, 1); + expect(stack).toHaveLength(2); + expect(stack[0].identifier).toBe("B"); + expect(stack[1].identifier).toBe("A"); + }); + + it("drops to the outer frame after the inner returns", () => { + const stack = buildCallStack(trace, pcToInstruction, 3); + expect(stack).toHaveLength(1); + expect(stack[0].identifier).toBe("B"); + }); + }); + + describe("defensive membership guard", () => { + // A virtual invoke whose exit return never arrives (residual + // smear / dropped marker): the frame must still be torn down + // when execution reaches a non-inline caller instruction, + // rather than leaking to the end of the trace. + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // virtual invoke → push + { pc: 1, opcode: "ADD" }, // still inside the body + { pc: 3, opcode: "JUMPDEST" }, // caller code, no inline marker + ]; + const program = { + instructions: [ + instr(0, entryInvoke), + instr(1, bodyMark), + instr(3, callerMark), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("keeps the frame while inline membership holds", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(1); + }); + + it("force-pops a stale virtual frame at a non-inline instr", () => { + expect(buildCallStack(trace, pcToInstruction, 2)).toHaveLength(0); + }); + }); + + describe("real calls (regression: close-after applies uniformly)", () => { + // A real call: caller JUMP + callee JUMPDEST (deduped), then a + // return. The frame is visible at its return step and popped on + // advance — same close-after rule as virtual frames. + const trace: TraceStep[] = [ + { pc: 0, opcode: "JUMP" }, // caller invoke + { pc: 1, opcode: "JUMPDEST" }, // callee entry invoke (dedup) + { pc: 2, opcode: "JUMP" }, // callee return + { pc: 3, opcode: "JUMPDEST" }, // back in caller + ]; + const program = { + instructions: [ + instr(0, { invoke: { jump: true, identifier: "f" } }), + instr(1, { invoke: { jump: true, identifier: "f" } }), + instr(2, { return: { identifier: "f" } }), + instr(3, { code: { source: { id: "0" }, range: {} } }), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("collapses the caller/callee invoke double into one frame", () => { + expect(buildCallStack(trace, pcToInstruction, 1)).toHaveLength(1); + }); + + it("still shows the frame AT its return instruction", () => { + const stack = buildCallStack(trace, pcToInstruction, 2); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBeFalsy(); + }); + + it("pops the real frame on advancing past the return", () => { + expect(buildCallStack(trace, pcToInstruction, 3)).toHaveLength(0); + }); + }); + + describe("bracketed emission (post de-smear, #235 shape)", () => { + // The real bracketed shape: invoke on the body's FIRST op, + // return on its LAST op, transform:["inline"] on every op. The + // frame must be visible across the whole body — including the + // return-bearing exit op (close-after) — and gone at the gap. + const entryOp = { + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + }; + const interiorOp = { transform: ["inline"] }; + const exitOp = { transform: ["inline"], return: { identifier: "dbl" } }; + const gapOp = { code: { source: { id: "0" }, range: {} } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // entry op (invoke) + { pc: 1, opcode: "DUP2" }, // interior op + { pc: 2, opcode: "ADD" }, // interior op + { pc: 3, opcode: "MSTORE" }, // exit op (return) + { pc: 4, opcode: "JUMPDEST" }, // gap / caller + ]; + const program = { + instructions: [ + instr(0, entryOp), + instr(1, interiorOp), + instr(2, interiorOp), + instr(3, exitOp), + instr(4, gapOp), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows the virtual frame across every body op incl. the exit", () => { + for (const s of [0, 1, 2, 3]) { + const stack = buildCallStack(trace, pcToInstruction, s); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + } + }); + + it("is gone at the gap after the return op", () => { + expect(buildCallStack(trace, pcToInstruction, 4)).toHaveLength(0); + }); + }); + + describe("robustness: legacy SMEARED emission (pre de-smear)", () => { + // Belt-and-suspenders: an older/residual emission where EVERY + // body op carries invoke+return+inline. Close-after must still + // yield exactly one frame per body across all ops (the viewed + // op's co-located return is deferred; prior ops net empty) and + // no accumulation across two gap-separated bodies. + const smearedOp = { + transform: ["inline"], + invoke: { jump: true, identifier: "dbl" }, + return: { identifier: "dbl" }, + }; + const gapOp = { code: { source: { id: "0" }, range: {} } }; + const trace: TraceStep[] = [ + { pc: 0, opcode: "PUSH1" }, // body 1: 3 smeared ops + { pc: 1, opcode: "DUP2" }, + { pc: 2, opcode: "MSTORE" }, + { pc: 3, opcode: "JUMPDEST" }, // gap + { pc: 4, opcode: "PUSH1" }, // body 2: 3 smeared ops + { pc: 5, opcode: "DUP2" }, + { pc: 6, opcode: "MSTORE" }, + { pc: 7, opcode: "JUMPDEST" }, // gap + ]; + const program = { + instructions: [ + instr(0, smearedOp), + instr(1, smearedOp), + instr(2, smearedOp), + instr(3, gapOp), + instr(4, smearedOp), + instr(5, smearedOp), + instr(6, smearedOp), + instr(7, gapOp), + ], + } as unknown as Program; + const pcToInstruction = buildPcToInstructionMap(program); + + it("shows exactly one frame across each smeared body", () => { + for (const s of [0, 1, 2, 4, 5, 6]) { + const stack = buildCallStack(trace, pcToInstruction, s); + expect(stack).toHaveLength(1); + expect(stack[0].isInline).toBe(true); + } + }); + + it("returns to top level at each gap — no accumulation", () => { + expect(buildCallStack(trace, pcToInstruction, 3)).toHaveLength(0); + expect(buildCallStack(trace, pcToInstruction, 7)).toHaveLength(0); + }); + }); +}); diff --git a/packages/programs-react/src/utils/mockTrace.ts b/packages/programs-react/src/utils/mockTrace.ts index 26a912fc7..fe5ac7fb0 100644 --- a/packages/programs-react/src/utils/mockTrace.ts +++ b/packages/programs-react/src/utils/mockTrace.ts @@ -2,7 +2,7 @@ * Utilities for creating mock execution traces. */ -import type { Program } from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; /** * A single step in an execution trace. @@ -119,108 +119,188 @@ export interface CallInfo { label: string; pointer: unknown; }>; + /** + * True when a `tailcall` transform is present on the same + * instruction — the call was realized as a tail-call + * (TCO), reusing the current frame rather than nesting. + */ + isTailCall?: boolean; + /** + * True when an `inline` transform is present on the same + * instruction — this invoke/return belongs to an inlined + * (virtual) activation, not a real call. + */ + isInline?: boolean; } /** - * Extract call info (invoke/return/revert) from an - * instruction's context tree. + * Extract compiler `transform` annotation identifiers + * (e.g. "tailcall", "inline") from an instruction's context + * tree, walking gather/pick composites. + */ +export function extractTransformFromInstruction( + instruction: Program.Instruction, +): string[] { + if (!instruction.context) { + return []; + } + return extractTransformFromContext(instruction.context); +} + +function extractTransformFromContext(context: Program.Context): string[] { + if (Program.Context.isTransform(context)) { + return context.transform; + } + + // gather/pick are still key-probed here, matching the + // sibling extractors in this file (a broader guard + // migration is tracked separately). + const ctx = context as unknown as Record; + + if ("gather" in ctx && Array.isArray(ctx.gather)) { + return (ctx.gather as Program.Context[]).flatMap( + extractTransformFromContext, + ); + } + + if ("pick" in ctx && Array.isArray(ctx.pick)) { + return (ctx.pick as Program.Context[]).flatMap(extractTransformFromContext); + } + + return []; +} + +/** + * Extract the primary call event (invoke/return/revert) from an + * instruction's context tree, decorated with transform flags. + * + * A context can legitimately carry BOTH an invoke and a return + * (e.g. a tail-call back-edge, or an inlined body that emits to a + * single instruction). This accessor returns just the first event + * for display banners; call-stack reconstruction uses + * {@link extractCallEvents}, which surfaces every event so a + * co-located return is never swallowed by the invoke. */ export function extractCallInfoFromInstruction( instruction: Program.Instruction, ): CallInfo | undefined { + return extractCallEvents(instruction)[0]; +} + +/** + * Extract ALL call events (invoke/return/revert) from an + * instruction's context tree, in document order (invoke before + * return within one context), decorated with the instruction's + * transform flags. Returns [] when there is no call context. + */ +export function extractCallEvents( + instruction: Program.Instruction, +): CallInfo[] { if (!instruction.context) { - return undefined; + return []; } - return extractCallInfoFromContext(instruction.context); + const events = collectCallInfos(instruction.context); + if (events.length === 0) { + return []; + } + const transforms = extractTransformFromContext(instruction.context); + const isTailCall = transforms.includes("tailcall"); + const isInline = transforms.includes("inline"); + if (!isTailCall && !isInline) { + return events; + } + return events.map((e) => ({ + ...e, + ...(isTailCall ? { isTailCall: true } : {}), + ...(isInline ? { isInline: true } : {}), + })); } -function extractCallInfoFromContext( - context: Program.Context, -): CallInfo | undefined { +/** + * Collect the invoke/return/revert events carried by a context + * tree, in order. Invoke precedes return within a single context; + * gather/pick children are visited in sequence. + */ +function collectCallInfos(context: Program.Context): CallInfo[] { // Use unknown intermediate to avoid strict type checks // on the context union — we discriminate by key presence const ctx = context as unknown as Record; + const out: CallInfo[] = []; if ("invoke" in ctx) { - const inv = ctx.invoke as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - - let callType: CallInfo["callType"]; - if ("jump" in inv) { - callType = "internal"; - collectPointerRef(pointerRefs, "target", inv.target); - collectPointerRef(pointerRefs, "arguments", inv.arguments); - } else if ("message" in inv) { - callType = "external"; - collectPointerRef(pointerRefs, "target", inv.target); - collectPointerRef(pointerRefs, "gas", inv.gas); - collectPointerRef(pointerRefs, "value", inv.value); - collectPointerRef(pointerRefs, "input", inv.input); - } else if ("create" in inv) { - callType = "create"; - collectPointerRef(pointerRefs, "value", inv.value); - collectPointerRef(pointerRefs, "salt", inv.salt); - collectPointerRef(pointerRefs, "input", inv.input); - } - - // Extract argument names from group entries - const argNames = extractArgNamesFromInvoke(inv); - - return { - kind: "invoke", - identifier: inv.identifier as string | undefined, - callType, - argumentNames: argNames, - pointerRefs, - }; + out.push(parseInvoke(ctx.invoke as Record)); } - if ("return" in ctx) { - const ret = ctx.return as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - collectPointerRef(pointerRefs, "data", ret.data); - collectPointerRef(pointerRefs, "success", ret.success); - - return { - kind: "return", - identifier: ret.identifier as string | undefined, - pointerRefs, - }; + out.push(parseReturn(ctx.return as Record)); } - if ("revert" in ctx) { - const rev = ctx.revert as Record; - const pointerRefs: CallInfo["pointerRefs"] = []; - collectPointerRef(pointerRefs, "reason", rev.reason); - - return { - kind: "revert", - identifier: rev.identifier as string | undefined, - panic: rev.panic as number | undefined, - pointerRefs, - }; + out.push(parseRevert(ctx.revert as Record)); } - // Walk gather/pick to find call info - if ("gather" in ctx && Array.isArray(ctx.gather)) { + if (Array.isArray(ctx.gather)) { for (const sub of ctx.gather as Program.Context[]) { - const info = extractCallInfoFromContext(sub); - if (info) { - return info; - } + out.push(...collectCallInfos(sub)); } } - - if ("pick" in ctx && Array.isArray(ctx.pick)) { + if (Array.isArray(ctx.pick)) { for (const sub of ctx.pick as Program.Context[]) { - const info = extractCallInfoFromContext(sub); - if (info) { - return info; - } + out.push(...collectCallInfos(sub)); } } - return undefined; + return out; +} + +function parseInvoke(inv: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + + let callType: CallInfo["callType"]; + if ("jump" in inv) { + callType = "internal"; + collectPointerRef(pointerRefs, "target", inv.target); + collectPointerRef(pointerRefs, "arguments", inv.arguments); + } else if ("message" in inv) { + callType = "external"; + collectPointerRef(pointerRefs, "target", inv.target); + collectPointerRef(pointerRefs, "gas", inv.gas); + collectPointerRef(pointerRefs, "value", inv.value); + collectPointerRef(pointerRefs, "input", inv.input); + } else if ("create" in inv) { + callType = "create"; + collectPointerRef(pointerRefs, "value", inv.value); + collectPointerRef(pointerRefs, "salt", inv.salt); + collectPointerRef(pointerRefs, "input", inv.input); + } + + return { + kind: "invoke", + identifier: inv.identifier as string | undefined, + callType, + argumentNames: extractArgNamesFromInvoke(inv), + pointerRefs, + }; +} + +function parseReturn(ret: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + collectPointerRef(pointerRefs, "data", ret.data); + collectPointerRef(pointerRefs, "success", ret.success); + return { + kind: "return", + identifier: ret.identifier as string | undefined, + pointerRefs, + }; +} + +function parseRevert(rev: Record): CallInfo { + const pointerRefs: CallInfo["pointerRefs"] = []; + collectPointerRef(pointerRefs, "reason", rev.reason); + return { + kind: "revert", + identifier: rev.identifier as string | undefined, + panic: rev.panic as number | undefined, + pointerRefs, + }; } function extractArgNamesFromInvoke( @@ -274,6 +354,29 @@ export interface CallFrame { argumentNames?: string[]; /** Individual argument pointers for value resolution */ argumentPointers?: unknown[]; + /** + * True when this frame was (re)entered via a tail call + * (TCO). The frame was reused in place rather than nested. + */ + isTailCall?: boolean; + /** + * True when this frame is a VIRTUAL activation reconstructed + * from an inlined body (transform:["inline"]) rather than a + * real call. Its instructions were spliced into the caller; + * no JUMP occurred. + */ + isInline?: boolean; +} + +/** + * Determine the call type of a raw invoke record from its + * discriminant key. + */ +function invokeCallType(inv: Record): CallFrame["callType"] { + if ("jump" in inv) return "internal"; + if ("message" in inv) return "external"; + if ("create" in inv) return "create"; + return undefined; } /** @@ -294,49 +397,108 @@ export function buildCallStack( continue; } - const callInfo = extractCallInfoFromInstruction(instruction); - if (!callInfo) { + // Per-instruction inline membership drives the defensive + // guard below: an inlined body's instructions all carry + // transform:["inline"] (nested inlining stacks the marker), so + // the count bounds how many virtual frames may legitimately be + // open on this instruction. + const transforms = extractTransformFromInstruction(instruction); + const inlineCount = transforms.filter((t) => t === "inline").length; + + if (transforms.includes("tailcall")) { + // A TCO back-edge carries both return and invoke on a + // single instruction: the previous iteration returns + // and the next iteration is invoked, reusing the same + // activation. Replace the top frame in place (depth is + // unchanged) rather than popping then pushing. Pull the + // new iteration's identity from the invoke leaf, since + // the return leaf may be surfaced first. + const ctx = instruction.context as Record; + const inv = findInvokeField(ctx); + const argResult = extractArgInfo(instruction); + const invId = inv?.identifier as string | undefined; + const frame: CallFrame = { + identifier: invId, + stepIndex: i, + callType: inv ? invokeCallType(inv) : undefined, + argumentNames: argResult?.names, + argumentPointers: argResult?.pointers, + isTailCall: true, + }; + if (stack.length > 0) { + stack[stack.length - 1] = frame; + } else { + stack.push(frame); + } continue; } - if (callInfo.kind === "invoke") { - // The compiler emits invoke on both the caller JUMP - // and callee entry JUMPDEST for the same call. These - // occur on consecutive trace steps. Only skip if the - // top frame matches AND was pushed on the immediately - // preceding step — otherwise this is a new call (e.g. - // recursion with the same function name). - const top = stack[stack.length - 1]; - const isDuplicate = - top && - top.identifier === callInfo.identifier && - top.callType === callInfo.callType && - top.stepIndex === i - 1; - if (isDuplicate) { - // Use the callee entry step for resolution — - // the argument pointers reference stack slots - // that are valid at the JUMPDEST, not the JUMP. - // Argument names also live on the callee entry. - const argResult = extractArgInfo(instruction); - top.stepIndex = i; - top.argumentNames = argResult?.names ?? top.argumentNames; - top.argumentPointers = argResult?.pointers; - } else { - const argResult = extractArgInfo(instruction); - stack.push({ - identifier: callInfo.identifier, - stepIndex: i, - callType: callInfo.callType, - argumentNames: argResult?.names, - argumentPointers: argResult?.pointers, - }); - } - } else if (callInfo.kind === "return" || callInfo.kind === "revert") { - // Pop the matching frame - if (stack.length > 0) { - stack.pop(); + // A context may carry more than one event (invoke + return), + // e.g. an inlined body that emits to a single instruction. + // Process them in order: an invoke opens a frame INCLUSIVE of + // its instruction; a return closes it AFTER its instruction + // (close-after) — so the frame is still shown while parked on + // the return-bearing instruction and popped only on advance. + for (const event of extractCallEvents(instruction)) { + if (event.kind === "invoke") { + // The compiler emits invoke on both the caller JUMP and + // callee entry JUMPDEST for a REAL call, on consecutive + // steps — collapse that double. Key the dedup on the + // inline marker so a virtual invoke never merges with an + // adjacent real invoke of the same name (and vice versa). + const top = stack[stack.length - 1]; + const isDuplicate = + top && + top.identifier === event.identifier && + top.callType === event.callType && + top.stepIndex === i - 1 && + !!top.isInline === !!event.isInline; + if (isDuplicate) { + // Use the callee entry step for resolution — argument + // pointers/names live on the JUMPDEST, not the JUMP. + const argResult = extractArgInfo(instruction); + top.stepIndex = i; + top.argumentNames = argResult?.names ?? top.argumentNames; + top.argumentPointers = argResult?.pointers; + } else { + const argResult = extractArgInfo(instruction); + stack.push({ + identifier: event.identifier, + stepIndex: i, + callType: event.callType, + argumentNames: argResult?.names, + argumentPointers: argResult?.pointers, + // Tag virtual activations so the widget can render + // them distinctly from real calls. + ...(event.isInline ? { isInline: true } : {}), + }); + } + } else if (event.kind === "return" || event.kind === "revert") { + // close-after: defer the pop until we advance past this + // step, so the frame is visible AT its return instruction. + if (i < upToStep && stack.length > 0) { + stack.pop(); + } } } + + // Defensive membership guard: virtual frames beyond the + // instruction's inline-marker count are stale — belt-and- + // suspenders against a dropped or incomplete virtual return so + // a phantom activation can never leak into caller code (or + // linger after an inner inlined body has ended). + let trailingVirtual = 0; + for (let k = stack.length - 1; k >= 0 && stack[k].isInline; k--) { + trailingVirtual++; + } + while ( + trailingVirtual > inlineCount && + stack.length > 0 && + stack[stack.length - 1].isInline + ) { + stack.pop(); + trailingVirtual--; + } } return stack; diff --git a/packages/web/docs/core-schemas/programs/tracing-examples.ts b/packages/web/docs/core-schemas/programs/tracing-examples.ts index 08cb5f8df..a6c67fc5d 100644 --- a/packages/web/docs/core-schemas/programs/tracing-examples.ts +++ b/packages/web/docs/core-schemas/programs/tracing-examples.ts @@ -96,3 +96,45 @@ create { code { result = isEven(4); }`; + +export const tailRecursiveSum = `name TailSum; + +define { + function sum(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return sum(n - 1, acc + n); } + }; +} + +storage { + [0] result: uint256; +} + +create { + result = 0; +} + +code { + result = sum(5, 0); +}`; + +export const tailRecursiveFactorial = `name TailFactorial; + +define { + function fact(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return fact(n - 1, acc * n); } + }; +} + +storage { + [0] result: uint256; +} + +create { + result = 0; +} + +code { + result = fact(5, 1); +}`; diff --git a/packages/web/docs/core-schemas/programs/tracing.mdx b/packages/web/docs/core-schemas/programs/tracing.mdx index a22c6551d..d901224fb 100644 --- a/packages/web/docs/core-schemas/programs/tracing.mdx +++ b/packages/web/docs/core-schemas/programs/tracing.mdx @@ -11,6 +11,8 @@ import { multipleStorageSlots, functionCallAndReturn, mutualRecursion, + tailRecursiveSum, + tailRecursiveFactorial, } from "./tracing-examples"; # Tracing execution @@ -245,6 +247,76 @@ code instead of (or alongside) a reason pointer: }`} +## Tail-call optimization + +The recursion examples above push a new frame for every call — step +through them and watch the call stack grow. A compiler can often avoid +that. When a recursive call sits in **tail position** — its result is +returned directly, with no further work after it — the compiler can +reuse the current frame instead of pushing a new one. This is +**tail-call optimization** (TCO), and it turns recursion into a loop. + +The two programs below are written so bugc's optimizer folds them. Each +accumulates its result in an `acc` parameter and hands it to the next +call in tail position, so no work is left pending on the stack. + +Use the **Opt** selector in the trace drawer to set the optimization +level. Compile at **O0** (no optimization) and again at **O2** +(optimizations on, including TCO), then step through and compare the +call stack. TCO kicks in at **O2**. + + + + + +At **O0**, each `sum`/`fact` call is a real invoke/return pair and the +call stack grows one frame per iteration. At **O2**, the recursive +call becomes a **back-edge**: a single JUMP that ends one iteration and +begins the next without pushing a frame. The call stack stays flat. + +That one JUMP carries three facts at once, composed as sibling keys on a +single context — the flat form described on the +[transform context](/spec/program/context/transform) page: + + + {`{ + "return": { + "identifier": "sum" + }, + "invoke": { + "jump": true, + "identifier": "sum", + "target": { + "pointer": { "location": "code", "offset": "0x33", "length": 1 } + } + }, + "transform": ["tailcall"] +}`} + + +The `return` and `invoke` state the source-level facts — the previous +iteration returned, the next was invoked — and `transform: ["tailcall"]` +explains how the compiler realized that pair as one JUMP. Because no +value crosses a frame boundary here, the `return` carries no `data` and +the `invoke` no `arguments`: the accumulator is threaded through the +loop directly, and the invoke `target` points at the loop header the +JUMP re-enters. A debugger that ignores the transform still reads a +coherent invoke/return sequence; one that understands it can show that +the call stack isn't really growing, and present the recursion as the +loop it compiled to. + ## Trace data structure A trace step captures the EVM state at a single point: diff --git a/packages/web/spec/program/context/function/invoke.mdx b/packages/web/spec/program/context/function/invoke.mdx index b249a1a0a..76da58aff 100644 --- a/packages/web/spec/program/context/function/invoke.mdx +++ b/packages/web/spec/program/context/function/invoke.mdx @@ -51,11 +51,49 @@ caller's JUMP has already consumed the destination from the stack, so pointer slot values reflect the post-JUMP layout. The target points to a code location and arguments are passed on the stack. +The `target` field is optional. It may be omitted when there is no +meaningful code pointer to record — most notably at the first +instruction of an inlined function body, where the inlining pass +has elided the JUMP that would normally carry the target. The +callee identity (`identifier`, `declaration`, `type`) remains +meaningful in this case; a sibling `transform: ["inline"]` key +on the same context indicates that the call was inlined rather +than physically invoked. + +### Inlined internal calls + +When the compiler inlines a callee, there is no JUMP and no +runtime activation record: the callee's instructions are spliced +directly into the caller. The call still happened at the source +level, so it is still marked with an invoke context — one that +describes the _kind_ of call without a physical target: + +- `jump: true` marks the invocation as an **internal call kind** + (as opposed to a message call or contract creation). It does + **not** assert that a JUMP instruction executes here — an + inlined call has none. +- `target` is omitted: there is no code location to point at, + because the JUMP that would carry it was elided. +- a sibling `transform: ["inline"]` key marks the instruction as + belonging to an inlined body. + +The callee identity (`identifier`, `declaration`, `type`, all +optional) is preserved, so the inlined function still appears on +the debugger's call stack: the debugger reconstructs a **virtual +activation** for it (see +[Reconstructing activations](#reconstructing-activations)). + +Compilers typically inline small or leaf non-recursive callees at +every call site, so the _same_ callee can produce several +independent virtual activations across a trace — one per inlined +site. (The precise eligibility rule is a compiler choice; the +format is the same however inlining decisions are made.) + ## External call An external call represents a call to another contract via CALL, @@ -77,3 +115,135 @@ presence of `salt` implies CREATE2. schema={{ id: "schema:ethdebug/format/program/context/function/invoke" }} pointer="#/$defs/ContractCreation" /> + +## Reconstructing activations + +A debugger reconstructs the logical call stack from `invoke` and +`return` contexts. Each entry on that stack is an **activation** +(the DWARF term for a call-stack entry). Activation handling is +uniform whether or not the call was inlined: + +- **Push** an activation when an `invoke` context is encountered + and **pop** it when the matching `return` context is + encountered, in trace order. The `invoke` opens the activation + inclusive of its instruction; the `return` closes it after its + instruction, so the instruction bearing `return` is still inside + the activation. + +Because push/pop is driven by where the `invoke` and `return` +contexts sit, a compiler must emit them as a **bracket**: the +`invoke` on the first instruction of the body and the `return` on +its last. A compiler must **not** duplicate `invoke` or `return` +across a body's interior instructions — repeating them would push +or pop spurious activations. The sole exception is a body that +compiles to a **single instruction**, whose entry and exit +coincide: that one instruction legitimately carries both `invoke` +and `return`, and a debugger processes them in order (push, then +pop). This bracket rule is what keeps the guarantee below — +that a debugger ignoring `transform` still sees a coherent +`invoke`/`return` pair — true for inlined calls. + +An inlined callee therefore appears on the call stack exactly as a +non-inlined one does. Two kinds of activation differ only in how +they are backed, distinguished by the presence of an `inline` +transform marker — **not** by whether `target` is present: + +- A **real activation** comes from an `invoke` **without** an + `inline` transform marker. It corresponds to an actual call at + runtime, corroborated by machine state — a return address on the + EVM stack — and occupies a real stack region. +- A **virtual activation** comes from an `invoke` whose context + carries `transform: ["inline"]` (an `inline` identifier in its + transform list). It has **no runtime corroboration** and occupies + no EVM stack region; it exists only in the debug annotations. Its + `target` is typically omitted (the JUMP was elided), but + `target`-absence is not itself the signal — a real internal call + may also omit `target` (see [Internal call](#internal-call)). The + reliable discriminator is the `inline` marker. + +### Activation membership + +Push/pop and membership answer two different questions. Push/pop +(above) determines **when** a virtual activation is open — its +lifetime on the call stack. Membership determines **which** open +activation a given instruction belongs to. The two are +independent, and a debugger uses both. + +An instruction belongs to the innermost open virtual activation if +and only if its context carries an `inline` identifier in its +transform list — so composed markers such as `["inline", "fold"]` +still confer membership. The nesting depth is the number of +`"inline"` occurrences in the list (doubly-inlined code carries +`["inline", "inline"]`). Membership is determined +per-instruction from this marker, **not** from instruction ranges: +optimization passes may relocate or interleave an inlined body, so +a positional "everything between the invoke and the return" rule +would be unsound. + +This is why membership is separate from lifetime. An activation +opened by an `invoke` stays open until its `return`, even across +instructions that are **not** its members — for example, caller +code an optimizer interleaved into the body's trace span. Such a +non-member instruction (no `inline` marker for that depth) is +attributed to the enclosing activation, not the inlined one, even +while the virtual activation remains on the stack. + +### Correlating with `name` + +The push/pop and membership rules above reconstruct activations +without any correlation identifier: they rely on `invoke`/`return` +appearing in a well-nested order and on the `inline` marker to +attribute instructions. That is sufficient for typical compiler +output. It has two blind spots, both arising because the marker +alone can't tell one activation from another of the same function: + +- **Adjacent activations of the same function.** Two inlined + copies of the same callee placed back-to-back, with no + intervening caller instruction, read as one activation to a + debugger that groups a consecutive run of `inline`-marked + instructions. +- **Reordered or interleaved bodies.** An optimizer that moves a + `return` marker ahead of its `invoke`, or interleaves two + activations non-nested, defeats strict push/pop pairing. + +A [`name`](/spec/program/context/name) closes both. When an +`invoke` context carries a `name`, it **declares** that activation; +the matching `return`, and the body instructions that belong to it, +carry the same `name` to **reference** it. Because each name is +declared by exactly one `invoke`, the pairing is explicit and +order-independent: adjacent same-function activations have distinct +names, and a reference resolves to its declaration regardless of +trace order. + +When names are present they are **authoritative** for activation +structure — which `invoke` pairs with which `return`, and which +instructions belong to which activation. Push/pop, the `inline` +marker, and the marker-count depth remain the fallback a name-less +debugger uses; in well-nested output the two agree. Where they +cannot — the two blind spots above — the names are correct. A +compiler that emits names should therefore keep them consistent +with the push/pop structure wherever both are determinate, so the +two views never silently disagree. + +### Identity and values + +Every function-identity field (`identifier`, `declaration`, +`type`) is optional, so a virtual activation degrades gracefully — +from full identity down to an anonymous inlined frame — with no +fabricated data. A debugger renders whatever is present. + +An inlining compiler typically preserves the callee's declaration +and per-instruction source ranges for a virtual activation, and can +resolve inlined locals that it homed in addressable memory, via +[`variables`](/spec/program/context/variables) contexts. Identity +fields remain optional and degrade gracefully as described above. +Such a compiler does **not** emit `invoke.arguments` or +`return.data` pointers in this first version; individual parameter +values may still be inspectable as locals inside the body where +they are memory-homed. A virtual activation with no resolvable +values is still a valid, displayable frame. + +A debugger that ignores `transform` contexts still sees a coherent +`invoke`/`return` pair and a sound source-level call stack. One +that understands them can present virtual activations distinctly — +for example, collapsible and tied to the callee's source location. diff --git a/packages/web/spec/program/context/gather.mdx b/packages/web/spec/program/context/gather.mdx index eb9cc3613..3e3a90b59 100644 --- a/packages/web/spec/program/context/gather.mdx +++ b/packages/web/spec/program/context/gather.mdx @@ -6,6 +6,35 @@ import SchemaViewer from "@site/src/components/SchemaViewer"; # Gather multiple contexts +A `gather` context asserts that every one of its child contexts +holds at the marked instruction. It is the tool for composing +multiple context facts that cannot coexist as sibling keys on a +single object. + + +## When to use `gather` + +The context schema is open: a single context object may carry +any number of discriminator keys together — `code`, `variables`, +`invoke`, `return`, `transform`, and so on all compose as +siblings on the same object. Prefer the flat form when it +works. + +Reach for `gather` only when two or more facts would collide on +the same key. The canonical cases are: + +- **Multiple [`frame`](/spec/program/context/frame)s** — an instruction + that maps + simultaneously to an IR step and a source step needs one + entry per frame, each with its own `code` range. +- **Multiple `variables` blocks** — when separate pipeline + passes each contribute variable information (e.g., one + names the variable, the other supplies its pointer), each + set lives in its own context. + +If every child context uses a different discriminator key, a +`gather` can be collapsed into a single flat object with the +same meaning — and that flat form is the preferred style. diff --git a/packages/web/spec/program/context/name.mdx b/packages/web/spec/program/context/name.mdx index 31fd7fb17..7a247d5d6 100644 --- a/packages/web/spec/program/context/name.mdx +++ b/packages/web/spec/program/context/name.mdx @@ -6,9 +6,58 @@ import SchemaViewer from "@site/src/components/SchemaViewer"; # Named contexts -Contexts may include a `name` property for distinguishing them from -other contexts. This is particularly useful inside `pick` alternatives, -where several possible contexts may apply at a given point in execution -and runtime information is needed to select which one is active. +A context may carry a `name`: a machine-generated identifier that gives +the context a stable identity other contexts can reference. A name is +what makes a **cross-context reference** possible — one context declares +a name, and another points back to it by the same name. + +Names are opaque strings; the format imposes no structure on them. +Within a single program — one [`instructions`](/spec/program) +sequence — each name **must** be declared by exactly one context; no +two contexts may declare the same name. Other contexts may reference +that name freely — that repetition is how they point back — and every +reference resolves to the single declaring context. Compilers +**should** also choose names that are meaningful to debugger users. + +## Uses + +### Selecting `pick` alternatives + +Inside a [`pick`](/spec/program/context/pick), several contexts may apply +at a given point in execution and runtime information is needed to select +which one is active. A `name` on each alternative gives the selection +a stable handle for the chosen alternative. + +### Correlating an invocation with its return + +A `name` lets a function invocation and its return be paired directly. +An [`invoke`](/spec/program/context/function/invoke) context **declares** +an activation's name; the matching +[`return`](/spec/program/context/function/return) context — and the +instructions belonging to that activation's body — **reference** it by +the same name. + +This declaration/reference split follows the format's general +reference-by-name idiom (as a +[pointer template](/spec/pointer/collection/templates) is declared +once and referenced elsewhere). It pairs a call with its return +without relying on the trace +being strictly nested: even when optimization reorders or interleaves +code so that a naive "innermost open activation" rule would mispair +them, the shared name resolves the pairing unambiguously. When two +inlined copies of the same function appear back-to-back, their distinct +names keep them distinct activations. + +Because a single context object can hold at most one `name`, two +activation facts that must carry **different** names at the same +instruction — for example a tail call, where one instruction both +returns from the current activation and invokes the next — are expressed +with a [`gather`](/spec/program/context/gather) whose members each carry +their own name. The naming granularity therefore tracks the structure of +the contexts themselves. + +See the invoke context's +[Reconstructing activations](/spec/program/context/function/invoke#reconstructing-activations) +for how a debugger uses these names to rebuild the call stack. diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx new file mode 100644 index 000000000..1a784a3a7 --- /dev/null +++ b/packages/web/spec/program/context/transform.mdx @@ -0,0 +1,141 @@ +--- +sidebar_position: 8 +--- + +import SchemaViewer from "@site/src/components/SchemaViewer"; + +# Transform contexts + +A transform context annotates an instruction with the compiler +transformations that produced it. The value is a list of short +identifiers; the list may repeat the same identifier when the +transformation has been applied multiple times—for example, +doubly-inlined code carries `transform: ["inline", "inline"]`. + + + +## Role: additional annotation + +A transform context does not replace semantic contexts. When the +compiler inlines a function, the caller's debug info should still +carry invoke/return contexts naming the inlined callee at the +call boundary—so the debugger's logical call stack reflects the +source-level structure. The transform context is _additional_ +information telling the debugger **how** the call was realized. + +Consumers are free to ignore transform contexts entirely; the +invoke/return contexts alone always give a sound source-level +view. Consumers that understand transform contexts can offer +optimization-aware presentations: + +- Render inlined code as a collapsible block tied to the + original callee's source location. +- Show which call sites were tail-call-optimized vs. realized as + full call/return sequences. +- Explain apparent anomalies in the trace (e.g., a JUMP that + carries an invoke context is a TCO back-edge). + +## v1 identifiers + +Four identifiers are recognized in v1: + +- **`"inline"`** — the marked instruction is part of an inlined + function body. A surrounding `invoke`/`return` pair names the + inlined callee, and a debugger reconstructs a _virtual + activation_ for it (see + [inlined internal calls](/spec/program/context/function/invoke#inlined-internal-calls)). + This marker tells the debugger the physical code has no separate + runtime activation record. +- **`"tailcall"`** — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where the + call was realized without pushing/popping a full activation. + A JUMP carrying a `tailcall` transform typically sits on a + context that also carries both a `return` (from the previous + iteration) and an `invoke` (of the new iteration). +- **`"fold"`** — the marked instruction carries the result of + a compile-time constant fold. Typically a PUSH of the folded + value replacing a compute sequence (e.g., `ADD` over two + known constants) that appeared in source. The instruction's + surrounding `code` context, if present, points to the + original expression. +- **`"coalesce"`** — the marked instruction is part of a + read-write merging sequence the compiler introduced to + combine adjacent source-level reads or writes. Common + examples include SHL/OR sequences that pack narrower fields + into a single storage slot, or wider loads split into + narrower field extractions. The user did not write these + instructions directly; the `coalesce` marker lets a debugger + present the sequence as one source-level operation rather + than stepping through each byte-shuffling opcode. + +The identifier set is extensible. Compilers may emit additional +identifiers for optimizations not yet standardized; debuggers +should preserve unfamiliar identifiers as opaque labels rather +than rejecting them. + +## Repetition and composition + +Identifiers may repeat. A function inlined into another inlined +function produces `transform: ["inline", "inline"]`. A coalesce +sequence nested inside another coalesced region produces +`transform: ["coalesce", "coalesce"]`. + +Different transformations compose: +`transform: ["inline", "tailcall"]` marks an instruction inside +an inlined body that was itself a TCO back-edge in the callee; +`transform: ["inline", "fold"]` marks a constant-folded PUSH +sitting inside an inlined body. + +Order in the array is not semantically significant—only the +multiset of identifiers matters. + +## Composing with other contexts + +A context object can carry several discriminator keys at once — +`code`, `variables`, `invoke`, `return`, `transform`, and so on +all live in the same object. A TCO back-edge JUMP, for example, +typically combines three facts as sibling keys on a single +context: + +```yaml +return: + identifier: "fact" + declaration: { ... } +invoke: + jump: true + identifier: "fact" + target: { pointer: { location: code, offset: ... } } +transform: ["tailcall"] +``` + +The `return` and `invoke` state the source-level facts +(iteration N returned, iteration N+1 was invoked); the +`transform` explains how the compiler realized that pair as a +single JUMP. + +An inlined call site combines an invoke with an inline transform. +The invoke marks the call kind with `jump: true` but omits +`target`, because the JUMP was elided: + +```yaml +invoke: + jump: true + identifier: "square" + declaration: { ... } +transform: ["inline"] +``` + +Each instruction of the inlined body also carries +`transform: ["inline"]`, and a matching `return` closes the +[virtual activation](/spec/program/context/function/invoke#reconstructing-activations). +A small helper inlined at several call sites produces one such +`invoke`/`return` pair — and one virtual activation — per site. + +Reach for [`gather`](/spec/program/context/gather) only when +two contexts would collide on the same key — e.g., two +independent `variables` blocks or two +[`frame`](/spec/program/context/frame)s from different +pipeline stages. When keys don't collide, the flat form is +preferred. diff --git a/packages/web/src/schemas.ts b/packages/web/src/schemas.ts index 84ba8c701..31496043a 100644 --- a/packages/web/src/schemas.ts +++ b/packages/web/src/schemas.ts @@ -228,7 +228,16 @@ const programSchemaIndex: SchemaIndex = { href: "/spec/program/context", }, - ...["name", "code", "variables", "remark", "pick", "gather", "frame"] + ...[ + "name", + "code", + "variables", + "remark", + "pick", + "gather", + "frame", + "transform", + ] .map((name) => ({ [`schema:ethdebug/format/program/context/${name}`]: { href: `/spec/program/context/${name}`, diff --git a/packages/web/src/theme/ProgramExample/CallInfoPanel.css b/packages/web/src/theme/ProgramExample/CallInfoPanel.css index 75cd06511..b2835861c 100644 --- a/packages/web/src/theme/ProgramExample/CallInfoPanel.css +++ b/packages/web/src/theme/ProgramExample/CallInfoPanel.css @@ -27,6 +27,12 @@ border-left: 3px solid var(--programs-revert-accent, #cf222e); } +.call-info-banner-tailcall { + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border-left: 3px solid var(--programs-transform-accent, #a475f9); +} + .call-info-refs { display: flex; flex-direction: column; diff --git a/packages/web/src/theme/ProgramExample/CallStackDisplay.css b/packages/web/src/theme/ProgramExample/CallStackDisplay.css index 9143b8d76..90afee044 100644 --- a/packages/web/src/theme/ProgramExample/CallStackDisplay.css +++ b/packages/web/src/theme/ProgramExample/CallStackDisplay.css @@ -48,3 +48,15 @@ .call-stack-parens { color: var(--programs-text-muted, #888); } + +.call-stack-tailcall { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 0.8em; + font-weight: 500; + white-space: nowrap; + background: var(--programs-transform-bg, #f3ecff); + color: var(--programs-transform-text, #8250df); + border: 1px solid var(--programs-transform-accent, #a475f9); +} diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.css b/packages/web/src/theme/ProgramExample/TraceDrawer.css index 9f36d24c0..401a318f1 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.css +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.css @@ -25,6 +25,61 @@ overflow: hidden; } +/* Header actions */ +.trace-drawer-actions { + display: flex; + align-items: center; + gap: 10px; +} + +.opt-level-toggle { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 6px 2px 8px; + border-radius: 6px; + background: var(--ifm-color-emphasis-100); +} + +.opt-level-label { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--ifm-color-content-secondary); + margin-right: 2px; +} + +.opt-level-btn { + padding: 2px 8px; + border: 1px solid var(--ifm-color-emphasis-300); + border-radius: 4px; + background: var(--ifm-background-color); + color: var(--ifm-color-content-secondary); + font-size: 12px; + font-weight: 600; + font-family: var(--ifm-font-family-monospace); + cursor: pointer; + transition: + background 0.15s, + color 0.15s; +} + +.opt-level-btn:hover:not(:disabled) { + background: var(--ifm-color-emphasis-200); +} + +.opt-level-btn.active { + background: var(--ifm-color-primary); + border-color: var(--ifm-color-primary); + color: white; +} + +.opt-level-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + /* Header buttons */ .trace-drawer-btn { padding: 6px 14px; @@ -204,11 +259,45 @@ border-left: 3px solid var(--ifm-color-danger); } +/* Transform / tail-call accent (no ifm purple semantic, so + a theme-tolerant purple tint with content-colored text). */ +.call-info-tailcall { + background: rgba(130, 80, 223, 0.12); + color: var(--ifm-color-content); + border-left: 3px solid #8250df; +} + +.call-stack-tailcall, +.call-stack-inline { + margin-left: 4px; + padding: 0 5px; + border-radius: 8px; + font-size: 10px; + font-weight: 600; + white-space: nowrap; + background: rgba(130, 80, 223, 0.15); + color: var(--ifm-color-content); + border: 1px solid rgba(130, 80, 223, 0.45); +} + +/* Virtual (inline) activations read as "not a real frame": dashed + border + italic, sharing the transform purple tint. */ +.call-stack-inline { + border-style: dashed; + font-style: italic; +} + /* Trace panels */ .trace-panels { display: grid; grid-template-columns: 1fr 1fr; + /* Single row that fills the flex height, so the panels + absorb vertical space when the drawer is drag-expanded + (an `auto` row would size to content and leave dead + space below). */ + grid-template-rows: 1fr; flex: 1; + min-height: 0; overflow: hidden; gap: 1px; background: var(--ifm-color-emphasis-200); @@ -216,6 +305,9 @@ .trace-panel { background: var(--ifm-background-color); + /* min-height:0 lets the grid item shrink so its own + overflow scrolls instead of expanding the grid. */ + min-height: 0; overflow: auto; } @@ -248,6 +340,72 @@ color: var(--ifm-color-primary-darkest); } +.opcode-gas { + margin-left: auto; + font-size: 11px; + font-variant-numeric: tabular-nums; + color: var(--ifm-color-content-secondary); +} + +/* Collapsible right-column sections */ +.trace-section { + border-bottom: 1px solid var(--ifm-color-emphasis-200); +} + +.trace-section-summary { + padding: 6px 12px; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--ifm-color-content-secondary); + background: var(--ifm-background-surface-color); + cursor: pointer; + user-select: none; + list-style-position: inside; +} + +.trace-section-summary:hover { + color: var(--ifm-color-content); +} + +/* Transform annotations */ +.transform-list { + padding: 6px 12px; + display: flex; + flex-direction: column; + gap: 6px; +} + +.transform-item { + display: flex; + align-items: baseline; + gap: 8px; + font-size: 12px; +} + +.transform-tag { + flex-shrink: 0; + padding: 0 6px; + border-radius: 8px; + font-family: var(--ifm-font-family-monospace); + font-weight: 600; + font-size: 11px; + color: var(--ifm-color-content); + background: rgba(130, 80, 223, 0.15); + border: 1px solid rgba(130, 80, 223, 0.45); +} + +.transform-gloss { + color: var(--ifm-color-content-secondary); +} + +.variable-value { + font-family: var(--ifm-font-family-monospace); + color: var(--ifm-color-content); + word-break: break-all; +} + .current-opcode .opcode-pc { font-size: 12px; color: var(--ifm-color-content-secondary); diff --git a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx index cd9b37416..78023423b 100644 --- a/packages/web/src/theme/ProgramExample/TraceDrawer.tsx +++ b/packages/web/src/theme/ProgramExample/TraceDrawer.tsx @@ -25,6 +25,15 @@ import { } from "@ethdebug/bugc-react"; import { Executor, createTraceCollector, type TraceStep } from "@ethdebug/evm"; import { dereference, Data, type Machine } from "@ethdebug/pointers"; +import { + buildCallStack, + extractCallInfoFromInstruction, + extractTransformFromInstruction, + type CallFrame, + type CallInfo, + type TraceStep as ProgramsTraceStep, +} from "@ethdebug/programs-react"; +import type { Program } from "@ethdebug/format"; import { Drawer } from "@theme/Drawer"; import { useTracePlayground } from "./TracePlaygroundContext"; @@ -42,6 +51,16 @@ interface CompileResult { bytecode?: BytecodeOutput; } +/** bugc optimizer levels the tracer can compile at. */ +type OptLevel = 0 | 1 | 2 | 3; +const OPT_LEVELS: readonly OptLevel[] = [0, 1, 2, 3]; +const OPT_LEVEL_TITLES: Record = { + 0: "No optimization", + 1: "Level 1 — constant folding, propagation, dead-code elimination", + 2: "Level 2 — adds CSE, tail-call optimization, jump optimization", + 3: "Level 3 — adds block/return/read-write merging", +}; + function TraceDrawerContent(): JSX.Element { const { example, isOpen, toggleDrawer, closeDrawer, setSource } = useTracePlayground(); @@ -56,6 +75,14 @@ function TraceDrawerContent(): JSX.Element { const [isTracing, setIsTracing] = useState(false); const [traceError, setTraceError] = useState(null); const [storage, setStorage] = useState>({}); + // Optimizer level the tracer compiles at. Readers flip + // 0 ↔ 2 to watch optimizer transforms (e.g. the tailcall + // annotation on TCO back-edges) appear. A ref mirrors it + // so the example-load effect can read the current value + // without re-running when only the level changes. + const [optimizerLevel, setOptimizerLevel] = useState(0); + const optimizerLevelRef = useRef(optimizerLevel); + optimizerLevelRef.current = optimizerLevel; // Build PC -> instruction map for source highlighting const pcToInstruction = useMemo(() => { @@ -92,77 +119,52 @@ function TraceDrawerContent(): JSX.Element { return extractVariables(instruction.debug.context); }, [trace, currentStep, pcToInstruction]); - // Extract call info from current instruction context - const currentCallInfo = useMemo(() => { - if (trace.length === 0 || currentStep >= trace.length) { - return undefined; + // Adapt the bugc instruction map + evm trace to the shared + // programs-react call-stack helpers, which read the + // ethdebug format shape (instruction.context) and a {pc} + // trace. This lets the drawer reuse the same tailcall-aware + // buildCallStack as the standalone TraceViewer instead of + // duplicating the logic. + const formatPcToInstruction = useMemo(() => { + const m = new Map(); + for (const [pc, inst] of pcToInstruction) { + m.set(pc, { + offset: pc, + context: inst.debug?.context, + } as unknown as Program.Instruction); } + return m; + }, [pcToInstruction]); - const step = trace[currentStep]; - const instruction = pcToInstruction.get(step.pc); - if (!instruction?.debug?.context) return undefined; - - return extractCallInfo(instruction.debug.context); - }, [trace, currentStep, pcToInstruction]); + const programsTrace = useMemo( + () => trace.map((s) => ({ pc: s.pc, opcode: s.opcode })), + [trace], + ); - // Build call stack by scanning invoke/return/revert up to - // current step - const callStack = useMemo(() => { - const frames: Array<{ - identifier?: string; - stepIndex: number; - callType?: string; - argumentNames?: string[]; - argumentPointers?: unknown[]; - }> = []; - - for (let i = 0; i <= currentStep && i < trace.length; i++) { - const step = trace[i]; - const instruction = pcToInstruction.get(step.pc); - if (!instruction?.debug?.context) continue; - - const info = extractCallInfo(instruction.debug.context); - if (!info) continue; - - if (info.kind === "invoke") { - // The compiler emits invoke on both the caller - // JUMP and callee entry JUMPDEST for the same - // call. These occur on consecutive trace steps. - // Only skip if the top frame matches AND was - // pushed on the immediately preceding step — - // otherwise this is a new call (e.g. recursion). - const top = frames[frames.length - 1]; - const isDuplicate = - top && - top.identifier === info.identifier && - top.callType === info.callType && - top.stepIndex === i - 1; - if (isDuplicate) { - // Use the callee entry step for resolution — - // argument pointers reference stack slots - // valid at the JUMPDEST, not the JUMP. - // Argument names also live on the callee entry. - top.stepIndex = i; - top.argumentNames = info.argumentNames ?? top.argumentNames; - top.argumentPointers = info.argumentPointers; - } else { - frames.push({ - identifier: info.identifier, - stepIndex: i, - callType: info.callType, - argumentNames: info.argumentNames, - argumentPointers: info.argumentPointers, - }); - } - } else if (info.kind === "return" || info.kind === "revert") { - if (frames.length > 0) { - frames.pop(); - } - } - } + const currentInstruction = useMemo(() => { + const step = trace[currentStep]; + if (!step) return undefined; + return formatPcToInstruction.get(step.pc); + }, [trace, currentStep, formatPcToInstruction]); - return frames; - }, [trace, currentStep, pcToInstruction]); + // Extract call info from current instruction context + const currentCallInfo = useMemo(() => { + if (!currentInstruction) return undefined; + return extractCallInfoFromInstruction(currentInstruction); + }, [currentInstruction]); + + // Compiler transform tags on the current instruction + // (e.g. "tailcall"), for the transform annotations panel. + const currentTransforms = useMemo(() => { + if (!currentInstruction) return []; + return extractTransformFromInstruction(currentInstruction); + }, [currentInstruction]); + + // Build call stack via the shared, tailcall-aware helper. + const callStack = useMemo( + () => buildCallStack(programsTrace, formatPcToInstruction, currentStep), + [programsTrace, formatPcToInstruction, currentStep], + ); // Resolve argument values for call stack frames const argCacheRef = useRef>(new Map()); @@ -237,94 +239,147 @@ function TraceDrawerContent(): JSX.Element { }; }, [callStack, trace, storage]); + // Resolve the current instruction's variable values by + // dereferencing each variable's pointer against the step + // state (reuses the same machinery as argument resolution). + const [resolvedVarValues, setResolvedVarValues] = useState< + Map + >(new Map()); + + useEffect(() => { + const step = trace[currentStep]; + if (!step || currentVariables.length === 0) { + setResolvedVarValues(new Map()); + return; + } + + let cancelled = false; + const state = traceStepToState(step, storage); + const next = new Map(); + + Promise.all( + currentVariables.map(async (v) => { + if (!v.pointer) return; + try { + next.set(v.identifier, await resolvePointer(v.pointer, state)); + } catch { + // leave unresolved + } + }), + ).then(() => { + if (!cancelled) setResolvedVarValues(next); + }); + + return () => { + cancelled = true; + }; + }, [currentVariables, currentStep, trace, storage]); + + // Gas remaining at the current step plus the delta consumed + // reaching it (when the executor reports gas). + const gasText = useMemo(() => { + const step = trace[currentStep]; + if (!step || step.gasRemaining === undefined) return ""; + const rem = step.gasRemaining.toLocaleString(); + const prev = trace[currentStep - 1]; + if (prev?.gasRemaining !== undefined) { + const delta = prev.gasRemaining - step.gasRemaining; + if (delta > 0n) return `gas ${rem} (−${delta.toLocaleString()})`; + } + return `gas ${rem}`; + }, [trace, currentStep]); + // Compile source and run trace in one shot. // Takes source directly to avoid stale-state issues. - const compileAndTrace = useCallback(async (sourceCode: string) => { - setIsCompiling(true); - setCompileResult(null); - setTrace([]); - setCurrentStep(0); - setTraceError(null); - setStorage({}); - - let bytecode: BytecodeOutput | undefined; - - try { - const result = await bugCompile({ - to: "bytecode", - source: sourceCode, - optimizer: { level: 0 }, - }); + const compileAndTrace = useCallback( + async (sourceCode: string, level: OptLevel) => { + setIsCompiling(true); + setCompileResult(null); + setTrace([]); + setCurrentStep(0); + setTraceError(null); + setStorage({}); + + let bytecode: BytecodeOutput | undefined; - if (!result.success) { - const errors = result.messages[Severity.Error] || []; + try { + const result = await bugCompile({ + to: "bytecode", + source: sourceCode, + optimizer: { level }, + }); + + if (!result.success) { + const errors = result.messages[Severity.Error] || []; + setCompileResult({ + success: false, + error: errors[0]?.message || "Compilation failed", + }); + return; + } + + bytecode = { + runtime: result.value.bytecode.runtime, + create: result.value.bytecode.create, + runtimeInstructions: result.value.bytecode.runtimeInstructions, + createInstructions: result.value.bytecode.createInstructions, + }; + + setCompileResult({ success: true, bytecode }); + } catch (e) { setCompileResult({ success: false, - error: errors[0]?.message || "Compilation failed", + error: e instanceof Error ? e.message : String(e), }); return; + } finally { + setIsCompiling(false); } - bytecode = { - runtime: result.value.bytecode.runtime, - create: result.value.bytecode.create, - runtimeInstructions: result.value.bytecode.runtimeInstructions, - createInstructions: result.value.bytecode.createInstructions, - }; - - setCompileResult({ success: true, bytecode }); - } catch (e) { - setCompileResult({ - success: false, - error: e instanceof Error ? e.message : String(e), - }); - return; - } finally { - setIsCompiling(false); - } - - if (!bytecode) return; + if (!bytecode) return; - setIsTracing(true); + setIsTracing(true); - try { - const executor = new Executor(); + try { + const executor = new Executor(); - if (bytecode.create) { - const createHex = Array.from(bytecode.create) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - await executor.deploy(createHex); - } + if (bytecode.create) { + const createHex = Array.from(bytecode.create) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + await executor.deploy(createHex); + } - const [handler, getTrace] = createTraceCollector(); - await executor.execute({}, handler); + const [handler, getTrace] = createTraceCollector(); + await executor.execute({}, handler); - const collectedTrace = getTrace(); - setTrace(collectedTrace.steps); - setCurrentStep(0); + const collectedTrace = getTrace(); + setTrace(collectedTrace.steps); + setCurrentStep(0); - const storageEntries: Record = {}; - for (let i = 0n; i < 16n; i++) { - const value = await executor.getStorage(i); - if (value !== 0n) { - const slot = `0x${i.toString(16).padStart(2, "0")}`; - storageEntries[slot] = `0x${value.toString(16).padStart(64, "0")}`; + const storageEntries: Record = {}; + for (let i = 0n; i < 16n; i++) { + const value = await executor.getStorage(i); + if (value !== 0n) { + const slot = `0x${i.toString(16).padStart(2, "0")}`; + storageEntries[slot] = `0x${value.toString(16).padStart(64, "0")}`; + } } + setStorage(storageEntries); + } catch (e) { + setTraceError(e instanceof Error ? e.message : String(e)); + } finally { + setIsTracing(false); } - setStorage(storageEntries); - } catch (e) { - setTraceError(e instanceof Error ? e.message : String(e)); - } finally { - setIsTracing(false); - } - }, []); + }, + [], + ); // Auto compile+trace when a new example is loaded useEffect(() => { if (example?.source) { setLocalSource(example.source); - compileAndTrace(example.source); + compileAndTrace(example.source, optimizerLevelRef.current); } }, [example, compileAndTrace]); @@ -337,8 +392,17 @@ function TraceDrawerContent(): JSX.Element { ); const handleCompileAndTrace = useCallback(() => { - compileAndTrace(source); - }, [source, compileAndTrace]); + compileAndTrace(source, optimizerLevel); + }, [source, compileAndTrace, optimizerLevel]); + + const handleLevelChange = useCallback( + (level: OptLevel) => { + if (level === optimizerLevel) return; + setOptimizerLevel(level); + compileAndTrace(source, level); + }, + [source, compileAndTrace, optimizerLevel], + ); const stepForward = () => { setCurrentStep((prev) => Math.min(prev + 1, trace.length - 1)); @@ -395,18 +459,42 @@ function TraceDrawerContent(): JSX.Element { const isBusy = isCompiling || isTracing; const headerActions = ( - +
+
+ Opt + {OPT_LEVELS.map((level) => ( + + ))} +
+ +
); return ( @@ -522,6 +610,22 @@ function TraceDrawerContent(): JSX.Element { > {frame.identifier || "(anonymous)"}( {formatFrameArgs(frame, resolvedArgs)}) + {frame.isTailCall && ( + + ⮌ tail call + + )} + {frame.isInline && ( + + ⧉ inline + + )} )) @@ -530,7 +634,11 @@ function TraceDrawerContent(): JSX.Element { {currentCallInfo && (
{formatCallBanner(currentCallInfo)}
@@ -554,23 +662,34 @@ function TraceDrawerContent(): JSX.Element { @ 0x{currentTraceStep.pc.toString(16)} + {gasText && ( + {gasText} + )}
-
Stack
- + {currentTransforms.length > 0 && ( +
+ +
+ )} + +
+ +
{currentVariables.length > 0 && ( - <> -
Variables
- - +
+ +
)} {Object.keys(storage).length > 0 && ( - <> -
Storage
+
- +
)} )} @@ -685,107 +804,93 @@ function formatBigInt(value: bigint): string { interface Variable { identifier: string; type?: string; + pointer?: unknown; } interface VariablesDisplayProps { variables: Variable[]; + resolved: Map; } -function VariablesDisplay({ variables }: VariablesDisplayProps): JSX.Element { +function VariablesDisplay({ + variables, + resolved, +}: VariablesDisplayProps): JSX.Element { return (
- {variables.map((variable, i) => ( -
- {variable.identifier} - {variable.type && ( - {variable.type} - )} + {variables.map((variable, i) => { + const value = resolved.get(variable.identifier); + return ( +
+ {variable.identifier} + {value !== undefined && ( + {formatAsDecimal(value)} + )} + {variable.type && ( + {variable.type} + )} +
+ ); + })} +
+ ); +} + +/** One-line glosses for known transform identifiers. */ +const TRANSFORM_GLOSS: Record = { + tailcall: "tail call — frame reused, no new activation (TCO)", + inline: "inlined function body", + fold: "constant-folded at compile time", + coalesce: "merged read/write sequence", +}; + +function TransformList({ transforms }: { transforms: string[] }): JSX.Element { + return ( +
+ {transforms.map((t, i) => ( +
+ {t} + + {TRANSFORM_GLOSS[t] ?? "compiler transform"} +
))}
); } -/** - * Info about a call context (invoke/return/revert). - */ -interface CallInfoResult { - kind: "invoke" | "return" | "revert"; - identifier?: string; - callType?: string; - argumentNames?: string[]; - argumentPointers?: unknown[]; +/** A collapsible right-column section. */ +function Section({ + title, + defaultOpen = true, + children, +}: { + title: string; + defaultOpen?: boolean; + children: React.ReactNode; +}): JSX.Element { + return ( +
+ {title} + {children} +
+ ); } /** - * Extract call info from an ethdebug format context object. + * Info about a call context (invoke/return/revert). */ -function extractCallInfo(context: unknown): CallInfoResult | undefined { - if (!context || typeof context !== "object") { - return undefined; - } - - const ctx = context as Record; - - if ("invoke" in ctx && ctx.invoke) { - const inv = ctx.invoke as Record; - let callType: string | undefined; - if ("jump" in inv) callType = "internal"; - else if ("message" in inv) callType = "external"; - else if ("create" in inv) callType = "create"; - - const argInfo = extractArgInfoFromInvoke(inv); - return { - kind: "invoke", - identifier: inv.identifier as string | undefined, - callType, - argumentNames: argInfo?.names, - argumentPointers: argInfo?.pointers, - }; - } - - if ("return" in ctx && ctx.return) { - const ret = ctx.return as Record; - return { - kind: "return", - identifier: ret.identifier as string | undefined, - }; - } - - if ("revert" in ctx && ctx.revert) { - const rev = ctx.revert as Record; - return { - kind: "revert", - identifier: rev.identifier as string | undefined, - }; - } - - // Walk gather/pick - if ("gather" in ctx && Array.isArray(ctx.gather)) { - for (const sub of ctx.gather) { - const info = extractCallInfo(sub); - if (info) return info; - } - } - - if ("pick" in ctx && Array.isArray(ctx.pick)) { - for (const sub of ctx.pick) { - const info = extractCallInfo(sub); - if (info) return info; - } - } - - return undefined; -} - /** * Format a call info banner string. */ -function formatCallBanner(info: CallInfoResult): string { +function formatCallBanner(info: CallInfo): string { const name = info.identifier || "(anonymous)"; const params = info.argumentNames ? `(${info.argumentNames.join(", ")})` : "()"; + if (info.isTailCall) { + return `Tail call: ${name} (frame reused)`; + } switch (info.kind) { case "invoke": { const prefix = info.callType === "create" ? "Creating" : "Calling"; @@ -798,38 +903,6 @@ function formatCallBanner(info: CallInfoResult): string { } } -function extractArgInfoFromInvoke( - inv: Record, -): { names?: string[]; pointers?: unknown[] } | undefined { - const args = inv.arguments as Record | undefined; - if (!args) return undefined; - - const pointer = args.pointer as Record | undefined; - if (!pointer) return undefined; - - const group = pointer.group as Array> | undefined; - if (!Array.isArray(group)) return undefined; - - const names: string[] = []; - const pointers: unknown[] = []; - let hasAnyName = false; - for (const entry of group) { - const name = entry.name as string | undefined; - if (name) { - names.push(name); - hasAnyName = true; - } else { - names.push("_"); - } - pointers.push(entry); - } - - return { - names: hasAnyName ? names : undefined, - pointers, - }; -} - /** * Extract variables from an ethdebug format context object. */ @@ -849,6 +922,7 @@ function extractVariables(context: unknown): Variable[] { variables.push({ identifier: String(variable.identifier), type: variable.type ? formatType(variable.type) : undefined, + pointer: variable.pointer, }); } } diff --git a/schemas/program/context.schema.yaml b/schemas/program/context.schema.yaml index a57fce654..1a82e76df 100644 --- a/schemas/program/context.schema.yaml +++ b/schemas/program/context.schema.yaml @@ -89,6 +89,14 @@ allOf: description: | Indicates association with a function revert. $ref: "schema:ethdebug/format/program/context/function/revert" + - if: + required: ["transform"] + then: + description: | + Compiler transformations applied to produce this instruction + (e.g., inlining, tail-call optimization). Additional + annotation — does not replace semantic contexts. + $ref: "schema:ethdebug/format/program/context/transform" unevaluatedProperties: false diff --git a/schemas/program/context/function/invoke.schema.yaml b/schemas/program/context/function/invoke.schema.yaml index 41ef71d77..ebb824d07 100644 --- a/schemas/program/context/function/invoke.schema.yaml +++ b/schemas/program/context/function/invoke.schema.yaml @@ -88,6 +88,13 @@ $defs: description: | Pointer to the target of the invocation. For internal calls, this typically points to a code location. + Optional: may be omitted when there is no meaningful + target pointer to record, e.g., at the first + instruction of an inlined function body where the + inlining pass has elided the JUMP that would normally + carry this pointer. The callee identity + (`identifier`, `declaration`, `type`) is still + meaningful in this case. properties: pointer: $ref: "schema:ethdebug/format/pointer" @@ -107,7 +114,7 @@ $defs: - pointer additionalProperties: false - required: [jump, target] + required: [jump] ExternalCall: title: External call @@ -284,6 +291,26 @@ examples: location: stack slot: 2 + # ----------------------------------------------------------- + # Inlined internal call: no target pointer + # ----------------------------------------------------------- + # When the compiler inlines a function, the JUMP that would + # normally carry the invoke context has been elided — there + # is no physical call instruction and no code target to + # point at. The invoke context still records the callee's + # identity so the debugger can maintain a source-level call + # stack, and a `transform: ["inline"]` context (typically + # via `gather`) annotates the inlining. + - invoke: + identifier: "transfer" + declaration: + source: + id: 0 + range: + offset: 128 + length: 95 + jump: true + # ----------------------------------------------------------- # External CALL: token.balanceOf(account) # ----------------------------------------------------------- diff --git a/schemas/program/context/name.schema.yaml b/schemas/program/context/name.schema.yaml index 759161059..c9830fc1f 100644 --- a/schemas/program/context/name.schema.yaml +++ b/schemas/program/context/name.schema.yaml @@ -3,15 +3,31 @@ $id: "schema:ethdebug/format/program/context/name" title: ethdebug/format/program/context/name description: | - A label for distinguishing this context from other contexts. - This is particularly useful inside `pick` alternatives, - where several possible contexts may apply at a given point in - execution and runtime information is needed to select which one - is active. + A machine-generated identifier for this context that other + contexts may reference by name. - Context names are opaque strings with no format-imposed semantics. - Compilers **should** choose names that are meaningful to debugger - users. + A `name` gives a context a stable identity that the rest of a + program's debug information can point back to. This is what makes + cross-context references possible. Uses include: + + - **Selecting `pick` alternatives.** Several contexts may apply + at a point in execution; a `name` identifies which alternative + is active, so runtime information can select it. + - **Correlating an invocation with its return.** An `invoke` + context *declares* an activation's name; the matching `return` + context — and the instructions belonging to that activation — + *reference* it by the same name. This pairs a call with its + return directly, without relying on the trace being strictly + nested (see the invoke context's activation-reconstruction + guidance). + + Names are opaque strings; the format imposes no structure on + them. Within a single program — one **instructions** sequence — + each name **must** be declared by exactly one context; no two + contexts may declare the same name. Other contexts may reference + that name freely, and every reference resolves to the single + declaring context. Compilers **should** also choose names that + are meaningful to debugger users. type: object properties: @@ -21,7 +37,10 @@ required: - name examples: - # example: naming an inlined call site - - name: "inlined-call" + # example: declaring an inlined activation, referenced by its + # matching return and by the instructions of the inlined body + - name: "inline-0" # example: naming a generic instantiation - name: "Array" + # example: distinguishing a `pick` alternative + - name: "storage-layout-v2" diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml new file mode 100644 index 000000000..8951a00ca --- /dev/null +++ b/schemas/program/context/transform.schema.yaml @@ -0,0 +1,79 @@ +$schema: "https://json-schema.org/draft/2020-12/schema" +$id: "schema:ethdebug/format/program/context/transform" + +title: ethdebug/format/program/context/transform +description: | + Annotates an instruction with compiler transformations that + produced it. The value is a list of short identifiers naming + each transformation; the list may repeat an identifier when + the same transformation has been applied more than once (e.g., + `["inline", "inline"]` for doubly-inlined code). + + A transform context is *additional* annotation — it does not + replace semantic contexts. When the compiler inlines a + function, the invoke/return contexts for the logical call + should still be emitted at the call boundary so the debugger's + source-level call stack remains coherent. The transform + context tells debuggers **how** the call was realized. + + Combine a transform with other discriminator keys (`invoke`, + `return`, `code`, etc.) by placing them side-by-side on the + same context object — `gather` is only needed when two + contexts would collide on the same key. + + Consumers that ignore transform contexts still get a sound + source-level view from the invoke/return contexts alone. + Consumers that understand transform contexts can offer + optimization-aware presentations — e.g., rendering inlined + code as a collapsible block, or reconciling tail-call-optimized + back-edges with the logical call stack. + + The identifier set is extensible. v1 defines: + + - `"inline"` — the marked instruction is part of an inlined + function body. Surrounding invoke/return contexts name the + inlined callee. + - `"tailcall"` — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where + the call was realized as a direct jump (or reuse of the + caller's frame) rather than a standard call/return sequence. + - `"fold"` — the marked instruction carries the result of a + compile-time constant fold. Typically a PUSH of the folded + value, replacing a compute sequence that appeared in source. + - `"coalesce"` — the marked instruction is part of a + read-write merging sequence (e.g., SHL/OR sequences packing + narrower fields into a wider word) that the user did not + explicitly write; the compiler introduced it to combine + adjacent source-level reads or writes. + + Debuggers unfamiliar with a given identifier should preserve + it as an opaque label. + + Order in the array is not semantically significant — only the + multiset of identifiers matters. + +type: object +properties: + transform: + title: Applied transformations + description: | + List of transformation identifiers. Identifiers may + repeat; order is not semantically significant. + type: array + items: + type: string + minLength: 1 + minItems: 1 + +required: + - transform + +examples: + - transform: ["inline"] + - transform: ["tailcall"] + - transform: ["fold"] + - transform: ["coalesce"] + - transform: ["inline", "inline"] + - transform: ["inline", "tailcall"] + - transform: ["inline", "fold"] + - transform: ["coalesce", "coalesce"]