Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions packages/bugc/src/evmgen/generation/block.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { Memory } from "#evmgen/analysis";
import { calculateSize } from "#evmgen/serialize";

import * as Instruction from "./instruction.js";
import { bracketActivation, carriesActivation } from "./bracket-activation.js";
import { loadValue } from "./values/index.js";
import {
generateTerminator,
Expand Down Expand Up @@ -161,9 +162,31 @@ export function generate<S extends Stack>(
// the runtime predecessor differs from the layout-order
// predecessor.

// Process regular instructions
// Process regular instructions. Invoke/return activation
// discriminators must be bracketed to the first/last emitted op
// of the instruction (see bracket-activation.ts); everything else
// (source mapping, variables, transform markers) rides all ops.
for (const inst of block.instructions) {
result = result.then(Instruction.generate(inst));
const gen = Instruction.generate(inst);
const operationCtx = inst.operationDebug?.context;
if (
!carriesActivation(operationCtx, "invoke") &&
!carriesActivation(operationCtx, "return")
) {
result = result.then(gen);
continue;
}
result = result.peek((state, builder) => {
const start = state.instructions.length;
return builder.then(gen).then((s) => ({
...s,
instructions: bracketActivation(
s.instructions,
start,
operationCtx,
),
}));
});
}

// Emit phi copies for successor blocks before the
Expand Down
164 changes: 164 additions & 0 deletions packages/bugc/src/evmgen/generation/bracket-activation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/**
* Bracket invoke/return activation discriminators onto the boundary
* ops of an IR instruction's emitted op-run.
*
* A single IR instruction lowers to N EVM micro-ops, and the generic
* lowering attaches that instruction's whole `operationDebug` (source
* mapping, variables, transform markers, AND any invoke/return
* discriminators) to every one of those ops. That is correct for
* source/variable/transform context — a debugger wants all N ops
* mapped to the instruction — but WRONG for invoke/return: those are
* positional activation boundaries. An `invoke` marks a single push
* point; a `return` a single pop point. Broadcasting them across the
* whole op-run makes a push/pop reconstruction see every op as both a
* push and a pop.
*
* This module de-smears: for the ops emitted by one instruction, the
* `invoke` discriminator is kept on only the FIRST op, `return` on only
* the LAST op, and stripped from the interior. The `transform`
* membership markers (and source/variables) stay on every op.
*
* It is a general evmgen invariant, not inline-specific: it is a no-op
* for real calls (whose invoke/return already ride single-op JUMP /
* JUMPDEST terminators) and fires only when invoke/return happen to
* ride a multi-op instruction — which today is inlined virtual
* activations.
*/
import type * as Format from "@ethdebug/format";
import type * as Evm from "#evm";

type Ctx = Format.Program.Context;
type Activation = "invoke" | "return";

function isPick(ctx: Ctx): ctx is Ctx & { pick: Ctx[] } {
return (
typeof ctx === "object" &&
ctx !== null &&
"pick" in ctx &&
Array.isArray((ctx as { pick: unknown }).pick)
);
}

function isGather(ctx: Ctx): ctx is Ctx & { gather: Ctx[] } {
return (
typeof ctx === "object" &&
ctx !== null &&
"gather" in ctx &&
Array.isArray((ctx as { gather: unknown }).gather)
);
}

/** Whether ctx carries the given activation key anywhere, reaching
* into pick/gather composites. */
export function carriesActivation(
ctx: Ctx | undefined,
key: Activation,
): boolean {
if (!ctx || typeof ctx !== "object") return false;
if (isPick(ctx)) return ctx.pick.some((c) => carriesActivation(c, key));
if (isGather(ctx)) return ctx.gather.some((c) => carriesActivation(c, key));
return key in ctx;
}

/** The first activation value found for the given key, reaching into
* pick/gather composites. */
function findActivation(ctx: Ctx | undefined, key: Activation): unknown {
if (!ctx || typeof ctx !== "object") return undefined;
if (isPick(ctx)) {
for (const c of ctx.pick) {
const v = findActivation(c, key);
if (v !== undefined) return v;
}
return undefined;
}
if (isGather(ctx)) {
for (const c of ctx.gather) {
const v = findActivation(c, key);
if (v !== undefined) return v;
}
return undefined;
}
return (ctx as Record<string, unknown>)[key];
}

/** Remove invoke and return discriminators anywhere in ctx, reaching
* into pick/gather composites. Returns undefined if nothing remains. */
export function stripActivation(ctx: Ctx | undefined): Ctx | undefined {
if (!ctx || typeof ctx !== "object") return ctx;
if (isPick(ctx)) {
const kids = ctx.pick
.map(stripActivation)
.filter((c): c is Ctx => c !== undefined);
if (kids.length === 0) return undefined;
if (kids.length === 1) return kids[0];
return { pick: kids } as Ctx;
}
if (isGather(ctx)) {
const kids = ctx.gather
.map(stripActivation)
.filter((c): c is Ctx => c !== undefined);
if (kids.length === 0) return undefined;
if (kids.length === 1) return kids[0];
return { gather: kids } as Ctx;
}
const rest = { ...(ctx as Record<string, unknown>) };
delete rest.invoke;
delete rest.return;
return Object.keys(rest).length > 0 ? (rest as Ctx) : undefined;
}

/** Attach an activation discriminator, composing it as a flat sibling
* key on a leaf context (per the flat-composition convention), or
* appending it to a pick/gather composite. */
function attachActivation(
ctx: Ctx | undefined,
key: Activation,
value: unknown,
): Ctx {
const marker = { [key]: value } as Ctx;
if (!ctx || typeof ctx !== "object") return marker;
if (isPick(ctx)) return { pick: [...ctx.pick, marker] } as Ctx;
if (isGather(ctx)) return { gather: [...ctx.gather, marker] } as Ctx;
return { ...(ctx as Record<string, unknown>), [key]: value } as Ctx;
}

/**
* Rewrite the ops emitted by one IR instruction (the tail slice
* `instructions[start..]`) so invoke rides only the first op and
* return only the last op, using the discriminators found on the
* instruction's `operationDebug` context. No-op unless that context
* carries invoke and/or return, so it never touches ordinary code.
*/
export function bracketActivation(
instructions: Evm.Instruction[],
start: number,
operationCtx: Ctx | undefined,
): Evm.Instruction[] {
const end = instructions.length; // exclusive
if (end <= start) return instructions;

const hasInvoke = carriesActivation(operationCtx, "invoke");
const hasReturn = carriesActivation(operationCtx, "return");
if (!hasInvoke && !hasReturn) return instructions;

const invokeValue = hasInvoke
? findActivation(operationCtx, "invoke")
: undefined;
const returnValue = hasReturn
? findActivation(operationCtx, "return")
: undefined;

const out = instructions.slice();
for (let i = start; i < end; i++) {
const op = out[i];
let ctx = stripActivation(op.debug?.context);
if (hasInvoke && i === start) {
ctx = attachActivation(ctx, "invoke", invokeValue);
}
if (hasReturn && i === end - 1) {
ctx = attachActivation(ctx, "return", returnValue);
}
out[i] = { ...op, debug: { ...op.debug, context: ctx } };
}
return out;
}
192 changes: 192 additions & 0 deletions packages/bugc/src/evmgen/inline-bracket.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/**
* Verifies that inlined virtual-activation invoke/return contexts are
* BRACKETED on the emitted bytecode, not smeared across every op.
*
* An IR instruction lowers to N EVM micro-ops. evmgen must attach the
* `invoke` discriminator to only the FIRST emitted op of the
* invoke-bearing instruction and the `return` discriminator to only the
* LAST emitted op of the return-bearing instruction, while keeping the
* `transform: ["inline"]` membership marker on ALL body ops.
*
* Without bracketing, the tracer's push/pop reconstruction sees every
* body op as both a push and a pop -> phantom frames.
*/
import { describe, it, expect } from "vitest";

import { compile } from "#compiler";
import { executeProgram } from "#test/evm/behavioral";
import type * as Format from "@ethdebug/format";
import { Program } from "@ethdebug/format";

const { Context } = Program;

type OptLevel = 0 | 1 | 2 | 3;

async function runtimeInstructions(source: string, level: OptLevel) {
const result = await compile({
to: "bytecode",
source,
optimizer: { level },
});
if (!result.success) {
const errors = result.messages.error ?? [];
throw new Error(
`Compilation failed at level ${level}:\n` +
errors
.map((e: { message?: string }) => e.message ?? String(e))
.join("\n"),
);
}
return result.value.bytecode.runtimeInstructions;
}

/** Flatten a context into leaves, unwrapping gather/pick. */
function leaves(ctx: Format.Program.Context): Format.Program.Context[] {
if (Context.isGather(ctx)) return ctx.gather.flatMap(leaves);
if ("pick" in ctx && Array.isArray((ctx as { pick: unknown[] }).pick)) {
return (ctx as { pick: Format.Program.Context[] }).pick.flatMap(leaves);
}
return [ctx];
}

/** Per-op discriminator/marker presence, reaching nested pick/gather. */
function flags(instr: { debug?: { context?: Format.Program.Context } }) {
const ctx = instr.debug?.context;
if (!ctx) return { invoke: false, return: false, inline: false };
const all = [ctx, ...leaves(ctx)];
return {
invoke: all.some((c) => Context.isInvoke(c)),
return: all.some((c) => Context.isReturn(c)),
inline: all.some(
(c) => Context.isTransform(c) && c.transform.includes("inline"),
),
};
}

function tally(instrs: ReturnType<typeof flags>[]) {
let invoke = 0,
ret = 0,
both = 0,
inline = 0;
for (const f of instrs) {
if (f.invoke) invoke += 1;
if (f.return) ret += 1;
if (f.invoke && f.return) both += 1;
if (f.inline) inline += 1;
}
return { invoke, ret, both, inline };
}

// The exact fixture the UI reported mis-rendering: a leaf helper
// inlined at two sites.
const dblTwoSites = `name Multi;
define { function dbl(x: uint256) -> uint256 { return x + x; }; }
storage { [0] r: uint256; }
create { r = 0; }
code {
let a = dbl(5);
let b = dbl(10);
r = a + b;
}`;

// A multi-instruction body: entry (t = x + x) differs from exit
// (t * x), so invoke and return live on distinct IR instructions.
const multiInstrBody = `name Poly;
define { function poly(x: uint256) -> uint256 { let t = x + x; return t * x; }; }
storage { [0] a: uint256; [1] r: uint256; }
create { a = 3; r = 0; }
code { r = poly(a); }`;

describe("inlined invoke/return are bracketed on emitted bytecode", () => {
it("dbl@2-sites: one push and one pop per site, never both on an op", async () => {
const instrs = await runtimeInstructions(dblTwoSites, 2);
const t = tally(instrs.map(flags));
// Two inlined sites => exactly one invoke op and one return op each.
expect(t.invoke).toBe(2);
expect(t.ret).toBe(2);
// No op may be both a push and a pop (that breaks push/pop).
expect(t.both).toBe(0);
// Membership marker stays on every body op (more than the 4
// boundary ops).
expect(t.inline).toBeGreaterThan(4);
});

it("dbl@2-sites: each site's invoke op precedes its return op", async () => {
const instrs = await runtimeInstructions(dblTwoSites, 2);
const seq = instrs
.map((instr, i) => ({ i, f: flags(instr) }))
.filter(({ f }) => f.invoke || f.return)
.map(({ f }) => (f.invoke ? "invoke" : "return"));
// Bracketed order across two sites: push,pop,push,pop.
expect(seq).toEqual(["invoke", "return", "invoke", "return"]);
});

it("multi-instruction body: invoke on entry, return on exit, both=0", async () => {
const instrs = await runtimeInstructions(multiInstrBody, 2);
const t = tally(instrs.map(flags));
expect(t.invoke).toBe(1);
expect(t.ret).toBe(1);
expect(t.both).toBe(0);
});

it("preserves runtime behavior at every level", async () => {
for (const level of [0, 1, 2, 3] as const) {
const res = await executeProgram(dblTwoSites, {
calldata: "",
optimizationLevel: level,
});
expect(res.callSuccess).toBe(true);
// dbl(5)=10, dbl(10)=20, r=30
expect(await res.getStorage(0n)).toBe(30n);
}
});
});

// A self-tail-recursive accumulator: TCO turns the recursive call
// into a single back-edge JUMP that legitimately carries BOTH invoke
// and return on its one op (end one iteration + begin the next).
const tailRecursive = `name TailSum;
define {
function sum(n: uint256, acc: uint256) -> uint256 {
if (n == 0) { return acc; }
else { return sum(n - 1, acc + n); }
};
}
storage { [0] result: uint256; }
create { result = 0; }
code { result = sum(5, 0); }`;

// Mutually recursive functions never inline, so their calls stay real
// (invoke on a 1-op JUMP, return on a 1-op JUMPDEST).
const mutualRecursion = `name EvenOdd;
define {
function isEven(n: uint256) -> uint256 {
if (n == 0) { return 1; } else { return isOdd(n - 1); }
};
function isOdd(n: uint256) -> uint256 {
if (n == 0) { return 0; } else { return isEven(n - 1); }
};
}
storage { [0] result: uint256; }
create { result = 0; }
code { result = isEven(4); }`;

describe("bracketing is a no-op for single-op invoke/return carriers", () => {
it("tailcall back-edge keeps its combined invoke+return on one op", async () => {
// The back-edge JUMP is a single op carrying both markers; bracketing
// to first-op/last-op is first==last, so both must survive.
const instrs = await runtimeInstructions(tailRecursive, 2);
const t = tally(instrs.map(flags));
expect(t.both).toBeGreaterThanOrEqual(1);
});

it("real (non-inlined) calls never carry both on an op", async () => {
const instrs = await runtimeInstructions(mutualRecursion, 2);
const t = tally(instrs.map(flags));
// Real calls put invoke on a 1-op JUMP and return on a 1-op JUMPDEST,
// distinct ops — the fix must not fabricate a both.
expect(t.both).toBe(0);
expect(t.invoke).toBeGreaterThan(0);
expect(t.ret).toBeGreaterThan(0);
});
});
Loading