diff --git a/packages/bugc/src/evmgen/generation/block.ts b/packages/bugc/src/evmgen/generation/block.ts index e23460dfb..632ee132a 100644 --- a/packages/bugc/src/evmgen/generation/block.ts +++ b/packages/bugc/src/evmgen/generation/block.ts @@ -14,6 +14,7 @@ import { Memory } from "#evmgen/analysis"; import { calculateSize } from "#evmgen/serialize"; import * as Instruction from "./instruction.js"; +import { bracketActivation, carriesActivation } from "./bracket-activation.js"; import { loadValue } from "./values/index.js"; import { generateTerminator, @@ -161,9 +162,31 @@ export function generate( // the runtime predecessor differs from the layout-order // predecessor. - // Process regular instructions + // Process regular instructions. Invoke/return activation + // discriminators must be bracketed to the first/last emitted op + // of the instruction (see bracket-activation.ts); everything else + // (source mapping, variables, transform markers) rides all ops. for (const inst of block.instructions) { - result = result.then(Instruction.generate(inst)); + const gen = Instruction.generate(inst); + const operationCtx = inst.operationDebug?.context; + if ( + !carriesActivation(operationCtx, "invoke") && + !carriesActivation(operationCtx, "return") + ) { + result = result.then(gen); + continue; + } + result = result.peek((state, builder) => { + const start = state.instructions.length; + return builder.then(gen).then((s) => ({ + ...s, + instructions: bracketActivation( + s.instructions, + start, + operationCtx, + ), + })); + }); } // Emit phi copies for successor blocks before the diff --git a/packages/bugc/src/evmgen/generation/bracket-activation.ts b/packages/bugc/src/evmgen/generation/bracket-activation.ts new file mode 100644 index 000000000..653c2ffc8 --- /dev/null +++ b/packages/bugc/src/evmgen/generation/bracket-activation.ts @@ -0,0 +1,164 @@ +/** + * Bracket invoke/return activation discriminators onto the boundary + * ops of an IR instruction's emitted op-run. + * + * A single IR instruction lowers to N EVM micro-ops, and the generic + * lowering attaches that instruction's whole `operationDebug` (source + * mapping, variables, transform markers, AND any invoke/return + * discriminators) to every one of those ops. That is correct for + * source/variable/transform context — a debugger wants all N ops + * mapped to the instruction — but WRONG for invoke/return: those are + * positional activation boundaries. An `invoke` marks a single push + * point; a `return` a single pop point. Broadcasting them across the + * whole op-run makes a push/pop reconstruction see every op as both a + * push and a pop. + * + * This module de-smears: for the ops emitted by one instruction, the + * `invoke` discriminator is kept on only the FIRST op, `return` on only + * the LAST op, and stripped from the interior. The `transform` + * membership markers (and source/variables) stay on every op. + * + * It is a general evmgen invariant, not inline-specific: it is a no-op + * for real calls (whose invoke/return already ride single-op JUMP / + * JUMPDEST terminators) and fires only when invoke/return happen to + * ride a multi-op instruction — which today is inlined virtual + * activations. + */ +import type * as Format from "@ethdebug/format"; +import type * as Evm from "#evm"; + +type Ctx = Format.Program.Context; +type Activation = "invoke" | "return"; + +function isPick(ctx: Ctx): ctx is Ctx & { pick: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "pick" in ctx && + Array.isArray((ctx as { pick: unknown }).pick) + ); +} + +function isGather(ctx: Ctx): ctx is Ctx & { gather: Ctx[] } { + return ( + typeof ctx === "object" && + ctx !== null && + "gather" in ctx && + Array.isArray((ctx as { gather: unknown }).gather) + ); +} + +/** Whether ctx carries the given activation key anywhere, reaching + * into pick/gather composites. */ +export function carriesActivation( + ctx: Ctx | undefined, + key: Activation, +): boolean { + if (!ctx || typeof ctx !== "object") return false; + if (isPick(ctx)) return ctx.pick.some((c) => carriesActivation(c, key)); + if (isGather(ctx)) return ctx.gather.some((c) => carriesActivation(c, key)); + return key in ctx; +} + +/** The first activation value found for the given key, reaching into + * pick/gather composites. */ +function findActivation(ctx: Ctx | undefined, key: Activation): unknown { + if (!ctx || typeof ctx !== "object") return undefined; + if (isPick(ctx)) { + for (const c of ctx.pick) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + if (isGather(ctx)) { + for (const c of ctx.gather) { + const v = findActivation(c, key); + if (v !== undefined) return v; + } + return undefined; + } + return (ctx as Record)[key]; +} + +/** Remove invoke and return discriminators anywhere in ctx, reaching + * into pick/gather composites. Returns undefined if nothing remains. */ +export function stripActivation(ctx: Ctx | undefined): Ctx | undefined { + if (!ctx || typeof ctx !== "object") return ctx; + if (isPick(ctx)) { + const kids = ctx.pick + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { pick: kids } as Ctx; + } + if (isGather(ctx)) { + const kids = ctx.gather + .map(stripActivation) + .filter((c): c is Ctx => c !== undefined); + if (kids.length === 0) return undefined; + if (kids.length === 1) return kids[0]; + return { gather: kids } as Ctx; + } + const rest = { ...(ctx as Record) }; + delete rest.invoke; + delete rest.return; + return Object.keys(rest).length > 0 ? (rest as Ctx) : undefined; +} + +/** Attach an activation discriminator, composing it as a flat sibling + * key on a leaf context (per the flat-composition convention), or + * appending it to a pick/gather composite. */ +function attachActivation( + ctx: Ctx | undefined, + key: Activation, + value: unknown, +): Ctx { + const marker = { [key]: value } as Ctx; + if (!ctx || typeof ctx !== "object") return marker; + if (isPick(ctx)) return { pick: [...ctx.pick, marker] } as Ctx; + if (isGather(ctx)) return { gather: [...ctx.gather, marker] } as Ctx; + return { ...(ctx as Record), [key]: value } as Ctx; +} + +/** + * Rewrite the ops emitted by one IR instruction (the tail slice + * `instructions[start..]`) so invoke rides only the first op and + * return only the last op, using the discriminators found on the + * instruction's `operationDebug` context. No-op unless that context + * carries invoke and/or return, so it never touches ordinary code. + */ +export function bracketActivation( + instructions: Evm.Instruction[], + start: number, + operationCtx: Ctx | undefined, +): Evm.Instruction[] { + const end = instructions.length; // exclusive + if (end <= start) return instructions; + + const hasInvoke = carriesActivation(operationCtx, "invoke"); + const hasReturn = carriesActivation(operationCtx, "return"); + if (!hasInvoke && !hasReturn) return instructions; + + const invokeValue = hasInvoke + ? findActivation(operationCtx, "invoke") + : undefined; + const returnValue = hasReturn + ? findActivation(operationCtx, "return") + : undefined; + + const out = instructions.slice(); + for (let i = start; i < end; i++) { + const op = out[i]; + let ctx = stripActivation(op.debug?.context); + if (hasInvoke && i === start) { + ctx = attachActivation(ctx, "invoke", invokeValue); + } + if (hasReturn && i === end - 1) { + ctx = attachActivation(ctx, "return", returnValue); + } + out[i] = { ...op, debug: { ...op.debug, context: ctx } }; + } + return out; +} diff --git a/packages/bugc/src/evmgen/inline-bracket.test.ts b/packages/bugc/src/evmgen/inline-bracket.test.ts new file mode 100644 index 000000000..6583bcfed --- /dev/null +++ b/packages/bugc/src/evmgen/inline-bracket.test.ts @@ -0,0 +1,192 @@ +/** + * Verifies that inlined virtual-activation invoke/return contexts are + * BRACKETED on the emitted bytecode, not smeared across every op. + * + * An IR instruction lowers to N EVM micro-ops. evmgen must attach the + * `invoke` discriminator to only the FIRST emitted op of the + * invoke-bearing instruction and the `return` discriminator to only the + * LAST emitted op of the return-bearing instruction, while keeping the + * `transform: ["inline"]` membership marker on ALL body ops. + * + * Without bracketing, the tracer's push/pop reconstruction sees every + * body op as both a push and a pop -> phantom frames. + */ +import { describe, it, expect } from "vitest"; + +import { compile } from "#compiler"; +import { executeProgram } from "#test/evm/behavioral"; +import type * as Format from "@ethdebug/format"; +import { Program } from "@ethdebug/format"; + +const { Context } = Program; + +type OptLevel = 0 | 1 | 2 | 3; + +async function runtimeInstructions(source: string, level: OptLevel) { + const result = await compile({ + to: "bytecode", + source, + optimizer: { level }, + }); + if (!result.success) { + const errors = result.messages.error ?? []; + throw new Error( + `Compilation failed at level ${level}:\n` + + errors + .map((e: { message?: string }) => e.message ?? String(e)) + .join("\n"), + ); + } + return result.value.bytecode.runtimeInstructions; +} + +/** Flatten a context into leaves, unwrapping gather/pick. */ +function leaves(ctx: Format.Program.Context): Format.Program.Context[] { + if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves); + if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) { + return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves); + } + return [ctx]; +} + +/** Per-op discriminator/marker presence, reaching nested pick/gather. */ +function flags(instr: { debug?: { context?: Format.Program.Context } }) { + const ctx = instr.debug?.context; + if (!ctx) return { invoke: false, return: false, inline: false }; + const all = [ctx, ...leaves(ctx)]; + return { + invoke: all.some((c) => Context.isInvoke(c)), + return: all.some((c) => Context.isReturn(c)), + inline: all.some( + (c) => Context.isTransform(c) && c.transform.includes("inline"), + ), + }; +} + +function tally(instrs: ReturnType[]) { + let invoke = 0, + ret = 0, + both = 0, + inline = 0; + for (const f of instrs) { + if (f.invoke) invoke += 1; + if (f.return) ret += 1; + if (f.invoke && f.return) both += 1; + if (f.inline) inline += 1; + } + return { invoke, ret, both, inline }; +} + +// The exact fixture the UI reported mis-rendering: a leaf helper +// inlined at two sites. +const dblTwoSites = `name Multi; +define { function dbl(x: uint256) -> uint256 { return x + x; }; } +storage { [0] r: uint256; } +create { r = 0; } +code { + let a = dbl(5); + let b = dbl(10); + r = a + b; +}`; + +// A multi-instruction body: entry (t = x + x) differs from exit +// (t * x), so invoke and return live on distinct IR instructions. +const multiInstrBody = `name Poly; +define { function poly(x: uint256) -> uint256 { let t = x + x; return t * x; }; } +storage { [0] a: uint256; [1] r: uint256; } +create { a = 3; r = 0; } +code { r = poly(a); }`; + +describe("inlined invoke/return are bracketed on emitted bytecode", () => { + it("dbl@2-sites: one push and one pop per site, never both on an op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const t = tally(instrs.map(flags)); + // Two inlined sites => exactly one invoke op and one return op each. + expect(t.invoke).toBe(2); + expect(t.ret).toBe(2); + // No op may be both a push and a pop (that breaks push/pop). + expect(t.both).toBe(0); + // Membership marker stays on every body op (more than the 4 + // boundary ops). + expect(t.inline).toBeGreaterThan(4); + }); + + it("dbl@2-sites: each site's invoke op precedes its return op", async () => { + const instrs = await runtimeInstructions(dblTwoSites, 2); + const seq = instrs + .map((instr, i) => ({ i, f: flags(instr) })) + .filter(({ f }) => f.invoke || f.return) + .map(({ f }) => (f.invoke ? "invoke" : "return")); + // Bracketed order across two sites: push,pop,push,pop. + expect(seq).toEqual(["invoke", "return", "invoke", "return"]); + }); + + it("multi-instruction body: invoke on entry, return on exit, both=0", async () => { + const instrs = await runtimeInstructions(multiInstrBody, 2); + const t = tally(instrs.map(flags)); + expect(t.invoke).toBe(1); + expect(t.ret).toBe(1); + expect(t.both).toBe(0); + }); + + it("preserves runtime behavior at every level", async () => { + for (const level of [0, 1, 2, 3] as const) { + const res = await executeProgram(dblTwoSites, { + calldata: "", + optimizationLevel: level, + }); + expect(res.callSuccess).toBe(true); + // dbl(5)=10, dbl(10)=20, r=30 + expect(await res.getStorage(0n)).toBe(30n); + } + }); +}); + +// A self-tail-recursive accumulator: TCO turns the recursive call +// into a single back-edge JUMP that legitimately carries BOTH invoke +// and return on its one op (end one iteration + begin the next). +const tailRecursive = `name TailSum; +define { + function sum(n: uint256, acc: uint256) -> uint256 { + if (n == 0) { return acc; } + else { return sum(n - 1, acc + n); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = sum(5, 0); }`; + +// Mutually recursive functions never inline, so their calls stay real +// (invoke on a 1-op JUMP, return on a 1-op JUMPDEST). +const mutualRecursion = `name EvenOdd; +define { + function isEven(n: uint256) -> uint256 { + if (n == 0) { return 1; } else { return isOdd(n - 1); } + }; + function isOdd(n: uint256) -> uint256 { + if (n == 0) { return 0; } else { return isEven(n - 1); } + }; +} +storage { [0] result: uint256; } +create { result = 0; } +code { result = isEven(4); }`; + +describe("bracketing is a no-op for single-op invoke/return carriers", () => { + it("tailcall back-edge keeps its combined invoke+return on one op", async () => { + // The back-edge JUMP is a single op carrying both markers; bracketing + // to first-op/last-op is first==last, so both must survive. + const instrs = await runtimeInstructions(tailRecursive, 2); + const t = tally(instrs.map(flags)); + expect(t.both).toBeGreaterThanOrEqual(1); + }); + + it("real (non-inlined) calls never carry both on an op", async () => { + const instrs = await runtimeInstructions(mutualRecursion, 2); + const t = tally(instrs.map(flags)); + // Real calls put invoke on a 1-op JUMP and return on a 1-op JUMPDEST, + // distinct ops — the fix must not fabricate a both. + expect(t.both).toBe(0); + expect(t.invoke).toBeGreaterThan(0); + expect(t.ret).toBeGreaterThan(0); + }); +});